diff --git a/src/Numerics/OneDimGridFactory.cpp b/src/Numerics/OneDimGridFactory.cpp
index 16a17ec9b30..bc90a9f5053 100644
--- a/src/Numerics/OneDimGridFactory.cpp
+++ b/src/Numerics/OneDimGridFactory.cpp
@@ -13,19 +13,21 @@
 
 
 #include "OneDimGridFactory.h"
+#include "Configuration.h"
 #include "OhmmsData/AttributeSet.h"
 #include "Message/UniformCommunicateError.h"
 
 namespace qmcplusplus
 {
-std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNodePtr cur)
+template <typename T>
+std::unique_ptr<typename OneDimGridFactory<T>::GridType> OneDimGridFactory<T>::createGrid(xmlNodePtr cur)
 {
   std::unique_ptr<GridType> agrid;
   RealType ri     = 1e-5;
   RealType rf     = 100.0;
   RealType ascale = -1.0e0;
   RealType astep  = 1.25e-2;
-  IndexType npts  = 1001;
+  QMCTraits::IndexType npts  = 1001;
   std::string gridType("log");
   std::string gridID("invalid");
   OhmmsAttributeSet radAttrib;
@@ -74,4 +76,7 @@ std::unique_ptr<OneDimGridFactory::GridType> OneDimGridFactory::createGrid(xmlNo
   }
   return agrid;
 }
+
+template struct OneDimGridFactory<double>;
+template struct OneDimGridFactory<float>;
 } // namespace qmcplusplus
diff --git a/src/Numerics/OneDimGridFactory.h b/src/Numerics/OneDimGridFactory.h
index 6365db25aa6..d27b1fb9041 100644
--- a/src/Numerics/OneDimGridFactory.h
+++ b/src/Numerics/OneDimGridFactory.h
@@ -14,15 +14,17 @@
 
 #ifndef QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
 #define QMCPLUSPLUS_ONEDIMGRIDFACTORY_H
-#include "Configuration.h"
 #include "Numerics/OneDimGridFunctor.h"
+#include "Numerics/LibxmlNumericIO.h"
 
 namespace qmcplusplus
 {
 /** Factory class using Singleton pattern
  */
-struct OneDimGridFactory : public QMCTraits
+template <typename T>
+struct OneDimGridFactory
 {
+  using RealType = T;
   ///typedef of the one-dimensional grid
   using GridType = OneDimGridBase<RealType>;
 
diff --git a/src/Numerics/SoaCartesianTensor.h b/src/Numerics/SoaCartesianTensor.h
index 21fa7f52bf1..540ab826b0d 100644
--- a/src/Numerics/SoaCartesianTensor.h
+++ b/src/Numerics/SoaCartesianTensor.h
@@ -37,7 +37,7 @@ namespace qmcplusplus
 template<class T>
 struct SoaCartesianTensor
 {
-  using value_type = T;
+  using ValueType = T;
   using ggg_type   = TinyVector<Tensor<T, 3>, 3>;
 
   ///maximum angular momentum
diff --git a/src/Numerics/SoaSphericalTensor.h b/src/Numerics/SoaSphericalTensor.h
index 56c638b42e6..c5e4f3e1ae1 100644
--- a/src/Numerics/SoaSphericalTensor.h
+++ b/src/Numerics/SoaSphericalTensor.h
@@ -37,6 +37,8 @@ namespace qmcplusplus
 template<typename T>
 struct SoaSphericalTensor
 {
+  using ValueType = T;
+
   ///maximum angular momentum for the center
   int Lmax;
   /// Normalization factors
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 9d6eace715f..f364d373493 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -85,7 +85,9 @@ if(OHMMS_DIM MATCHES 3)
       LCAO/LCAOrbitalBuilderT.cpp
       LCAO/MultiQuinticSpline1D.cpp
       LCAO/AOBasisBuilder.cpp
-      LCAO/SoaLocalizedBasisSet.cpp)
+      LCAO/AOBasisBuilderT.cpp
+      LCAO/SoaLocalizedBasisSet.cpp
+      LCAO/SoaLocalizedBasisSetT.cpp)
   if(QMC_COMPLEX)
     set(FERMION_SRCS ${FERMION_SRCS} LCAO/LCAOSpinorBuilder.cpp LCAO/LCAOSpinorBuilder.cpp)
   else(QMC_COMPLEX)
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
new file mode 100644
index 00000000000..022d6db4a50
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.cpp
@@ -0,0 +1,923 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of
+// Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of
+//                    Illinois at Urbana-Champaign Jaron T. Krogel,
+//                    krogeljt@ornl.gov, Oak Ridge National Laboratory Mark A.
+//                    Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore
+//                    National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois
+// at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "AOBasisBuilderT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "OhmmsData/AttributeSet.h"
+#include "RadialOrbitalSetBuilder.h"
+#include "SoaAtomicBasisSetT.h"
+#include "Utilities/ProgressReportEngine.h"
+
+namespace qmcplusplus
+{
+template <typename COT>
+AOBasisBuilderT<COT>::AOBasisBuilderT(
+    const std::string& eName, Communicate* comm) :
+    MPIObjectBase(comm),
+    addsignforM(false),
+    expandlm(GAUSSIAN_EXPAND),
+    Morder("gaussian"),
+    sph("default"),
+    basisType("Numerical"),
+    elementType(eName),
+    Normalized("yes")
+{
+    // mmorales: for "Cartesian Gaussian", m is an integer that maps
+    //           the component to Gamess notation, see
+    //           Numerics/CartesianTensor.h
+    nlms_id["n"] = q_n;
+    nlms_id["l"] = q_l;
+    nlms_id["m"] = q_m;
+    nlms_id["s"] = q_s;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::put(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "put(xmlNodePtr)");
+    // Register valid attributes attributes
+    OhmmsAttributeSet aAttrib;
+    aAttrib.add(basisType, "type");
+    aAttrib.add(sph, "angular");
+    aAttrib.add(addsignforM, "expM");
+    aAttrib.add(Morder, "expandYlm");
+    aAttrib.add(Normalized, "normalized");
+    aAttrib.put(cur);
+    PRE.echo(cur);
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+
+    // Numerical basis is a special case
+    if (basisType == "Numerical")
+        myComm->barrier_and_abort(
+            "Purely numerical atomic orbitals are not supported any longer.");
+
+    return true;
+}
+
+template <class COT>
+bool
+AOBasisBuilderT<COT>::putH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "putH5(hin)");
+    std::string CenterID, basisName;
+
+    if (myComm->rank() == 0) {
+        hin.read(sph, "angular");
+        hin.read(CenterID, "elementType");
+        hin.read(Normalized, "normalized");
+        hin.read(Morder, "expandYlm");
+        hin.read(basisName, "name");
+    }
+
+    myComm->bcast(sph);
+    myComm->bcast(Morder);
+    myComm->bcast(CenterID);
+    myComm->bcast(Normalized);
+    myComm->bcast(basisName);
+    myComm->bcast(basisType);
+    myComm->bcast(addsignforM);
+
+    if (sph == "spherical")
+        addsignforM = 1; // include (-1)^m
+
+    if (Morder == "gaussian")
+        expandlm = GAUSSIAN_EXPAND;
+    else if (Morder == "natural")
+        expandlm = NATURAL_EXPAND;
+    else if (Morder == "no")
+        expandlm = DONOT_EXPAND;
+    else if (Morder == "pyscf") {
+        expandlm = MOD_NATURAL_EXPAND;
+        addsignforM = 1;
+        if (sph != "spherical") {
+            myComm->barrier_and_abort(
+                " Error: expandYlm='pyscf' only compatible with "
+                "angular='spherical'. Aborting.\n");
+        }
+    }
+
+    if (sph == "cartesian" || Morder == "Gamess") {
+        expandlm = CARTESIAN_EXPAND;
+        addsignforM = 0;
+    }
+
+    if (Morder == "Dirac") {
+        expandlm = DIRAC_CARTESIAN_EXPAND;
+        addsignforM = 0;
+        if (sph != "cartesian")
+            myComm->barrier_and_abort(
+                " Error: expandYlm='Dirac' only compatible with "
+                "angular='cartesian'. Aborting\n");
+    }
+    app_log() << R"(<input node="atomicBasisSet" name=")" << basisName
+              << "\" expandYlm=\"" << Morder << "\" angular=\"" << sph
+              << "\" elementType=\"" << CenterID << "\" normalized=\""
+              << Normalized << "\" type=\"" << basisType << "\" expM=\""
+              << addsignforM << "\" />" << std::endl;
+
+    return true;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSet(xmlNodePtr cur)
+{
+    ReportEngine PRE("AtomicBasisBuilder", "createAOSet(xmlNodePtr)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+    // process the basic property: maximun angular momentum, the number of basis
+    // functions to be added
+    std::vector<xmlNodePtr> radGroup;
+    xmlNodePtr cur1 = cur->xmlChildrenNode;
+    xmlNodePtr gptr = 0;
+    while (cur1 != NULL) {
+        std::string cname1((const char*)(cur1->name));
+        if (cname1 == "basisGroup") {
+            radGroup.push_back(cur1);
+            const int l = std::stoi(getXMLAttributeValue(cur1, "l"));
+            Lmax = std::max(Lmax, l);
+            // expect that only Rnl is given
+            if (expandlm == CARTESIAN_EXPAND ||
+                expandlm == DIRAC_CARTESIAN_EXPAND)
+                num += (l + 1) * (l + 2) / 2;
+            else if (expandlm)
+                num += 2 * l + 1;
+            else
+                num++;
+        }
+        else if (cname1 == "grid") {
+            gptr = cur1;
+        }
+        cur1 = cur1->next;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGrid(
+        gptr, basisType); // assign a radial grid for the new center
+    std::vector<xmlNodePtr>::iterator it(radGroup.begin());
+    std::vector<xmlNodePtr>::iterator it_end(radGroup.end());
+    std::vector<int> all_nl;
+    while (it != it_end) {
+        cur1 = (*it);
+        xmlAttrPtr att = cur1->properties;
+        while (att != NULL) {
+            std::string aname((const char*)(att->name));
+            if (aname == "rid" || aname == "id")
+            // accept id/rid
+            {
+                rnl = (const char*)(att->children->content);
+            }
+            else {
+                std::map<std::string, int>::iterator iit = nlms_id.find(aname);
+                if (iit != nlms_id.end())
+                // valid for n,l,m,s
+                {
+                    nlms[(*iit).second] =
+                        atoi((const char*)(att->children->content));
+                }
+            }
+            att = att->next;
+        }
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbital(cur1, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+        ++it;
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+std::unique_ptr<COT>
+AOBasisBuilderT<COT>::createAOSetH5(hdf_archive& hin)
+{
+    ReportEngine PRE("AOBasisBuilderT:", "createAOSetH5(std::string)");
+    app_log() << "  AO BasisSet for " << elementType << "\n";
+
+    if (expandlm != CARTESIAN_EXPAND) {
+        if (addsignforM)
+            app_log() << "   Spherical Harmonics contain (-1)^m factor"
+                      << std::endl;
+        else
+            app_log() << "   Spherical Harmonics  DO NOT contain (-1)^m factor"
+                      << std::endl;
+    }
+
+    switch (expandlm) {
+    case (GAUSSIAN_EXPAND):
+        app_log() << "   Angular momentum m expanded according to Gaussian"
+                  << std::endl;
+        break;
+    case (NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l"
+                  << std::endl;
+        break;
+    case (MOD_NATURAL_EXPAND):
+        app_log() << "   Angular momentum m expanded as -l, ... ,l, with the "
+                     "exception of L=1 (1,-1,0)"
+                  << std::endl;
+        break;
+    case (CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions x^lx "
+                     "y^ly z^lz according to Gamess"
+                  << std::endl;
+        break;
+    case (DIRAC_CARTESIAN_EXPAND):
+        app_log() << "   Angular momentum expanded in cartesian functions in "
+                     "DIRAC ordering"
+                  << std::endl;
+        break;
+    default:
+        app_log() << "   Angular momentum m is explicitly given." << std::endl;
+    }
+
+    QuantumNumberType nlms;
+    std::string rnl;
+    int Lmax(0); // maxmimum angular momentum of this center
+    int num(0); // the number of localized basis functions of this center
+
+    int numbasisgroups(0);
+    if (myComm->rank() == 0) {
+        if (!hin.readEntry(numbasisgroups, "NbBasisGroups"))
+            PRE.error(
+                "Could not read NbBasisGroups in H5; Probably Corrupt H5 file",
+                true);
+    }
+    myComm->bcast(numbasisgroups);
+
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        int l(0);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(l, "l");
+            hin.pop();
+        }
+        myComm->bcast(l);
+
+        Lmax = std::max(Lmax, l);
+        // expect that only Rnl is given
+        if (expandlm == CARTESIAN_EXPAND || expandlm == DIRAC_CARTESIAN_EXPAND)
+            num += (l + 1) * (l + 2) / 2;
+        else if (expandlm)
+            num += 2 * l + 1;
+        else
+            num++;
+    }
+
+    // create a new set of atomic orbitals sharing a center with (Lmax, num)
+    // if(addsignforM) the basis function has (-1)^m sqrt(2)Re(Ylm)
+    auto aos = std::make_unique<COT>(Lmax, addsignforM);
+    aos->LM.resize(num);
+    aos->NL.resize(num);
+
+    // Now, add distinct Radial Orbitals and (l,m) channels
+    RadialOrbitalSetBuilder<COT> radFuncBuilder(myComm, *aos);
+    radFuncBuilder.Normalized = (Normalized == "yes");
+    radFuncBuilder.addGridH5(hin); // assign a radial grid for the new center
+    std::vector<int> all_nl;
+    for (int i = 0; i < numbasisgroups; i++) {
+        std::string basisGroupID = "basisGroup" + std::to_string(i);
+        if (myComm->rank() == 0) {
+            hin.push(basisGroupID);
+            hin.read(rnl, "rid");
+            hin.read(nlms[0], "n");
+            hin.read(nlms[1], "l");
+        }
+        myComm->bcast(rnl);
+        myComm->bcast(nlms[0]);
+        myComm->bcast(nlms[1]);
+
+        // add Ylm channels
+        app_log() << "   R(n,l,m,s) " << nlms[0] << " " << nlms[1] << " "
+                  << nlms[2] << " " << nlms[3] << std::endl;
+        std::map<std::string, int>::iterator rnl_it = RnlID.find(rnl);
+        if (rnl_it == RnlID.end()) {
+            int nl = aos->RnlID.size();
+            if (radFuncBuilder.addRadialOrbitalH5(hin, basisType, nlms))
+                RnlID[rnl] = nl;
+            all_nl.push_back(nl);
+        }
+        else {
+            all_nl.push_back((*rnl_it).second);
+        }
+
+        if (myComm->rank() == 0)
+            hin.pop();
+    }
+
+    if (expandYlm(aos.get(), all_nl, expandlm) != num)
+        myComm->barrier_and_abort(
+            "expandYlm doesn't match the number of basis.");
+    radFuncBuilder.finalize();
+    // aos->Rmax can be set small
+    // aos->setRmax(0);
+    aos->setBasisSetSize(-1);
+    app_log() << "   Maximum Angular Momentum  = " << aos->Ylm.lmax()
+              << std::endl
+              << "   Number of Radial functors = " << aos->RnlID.size()
+              << std::endl
+              << "   Basis size                = " << aos->getBasisSetSize()
+              << "\n\n";
+    return aos;
+}
+
+template <typename COT>
+int
+AOBasisBuilderT<COT>::expandYlm(
+    COT* aos, std::vector<int>& all_nl, int expandlm)
+{
+    int num = 0;
+    if (expandlm == GAUSSIAN_EXPAND) {
+        app_log() << "Expanding Ylm according to Gaussian98" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << 2 * l + 1
+                      << " spherical orbitals for l= " << l << std::endl;
+            switch (l) {
+            case (0):
+                aos->LM[num] = aos->Ylm.index(0, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1): // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default: // 0,1,-1,2,-2,...,l,-l
+                aos->LM[num] = aos->Ylm.index(l, 0);
+                aos->NL[num] = nl;
+                num++;
+                for (int tm = 1; tm <= l; tm++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                    num++;
+                    aos->LM[num] = aos->Ylm.index(l, -tm);
+                    aos->NL[num] = nl;
+                    num++;
+                }
+                break;
+            }
+        }
+    }
+    else if (expandlm == MOD_NATURAL_EXPAND) {
+        app_log()
+            << "Expanding Ylm as L=1 as (1,-1,0) and L>1 as -l,-l+1,...,l-1,l"
+            << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            if (l == 1) {
+                // px(1),py(-1),pz(0)
+                aos->LM[num] = aos->Ylm.index(1, 1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, -1);
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = aos->Ylm.index(1, 0);
+                aos->NL[num] = nl;
+                num++;
+            }
+            else {
+                for (int tm = -l; tm <= l; tm++, num++) {
+                    aos->LM[num] = aos->Ylm.index(l, tm);
+                    aos->NL[num] = nl;
+                }
+            }
+        }
+    }
+    else if (expandlm == NATURAL_EXPAND) {
+        app_log() << "Expanding Ylm as -l,-l+1,...,l-1,l" << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "   Adding " << 2 * l + 1 << " spherical orbitals"
+                      << std::endl;
+            for (int tm = -l; tm <= l; tm++, num++) {
+                aos->LM[num] = aos->Ylm.index(l, tm);
+                aos->NL[num] = nl;
+            }
+        }
+    }
+    else if (expandlm == CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to Gamess "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            for (int i = 0; i < (l + 1) * (l + 2) / 2; i++) {
+                aos->LM[num] = nbefore + i;
+                aos->NL[num] = nl;
+                num++;
+            }
+        }
+    }
+    else if (expandlm == DIRAC_CARTESIAN_EXPAND) {
+        app_log() << "Expanding Ylm (angular function) according to DIRAC "
+                     "using cartesian gaussians"
+                  << std::endl;
+        for (int nl = 0; nl < aos->RnlID.size(); nl++) {
+            int l = aos->RnlID[nl][q_l];
+            app_log() << "Adding " << (l + 1) * (l + 2) / 2
+                      << " cartesian gaussian orbitals for l= " << l
+                      << std::endl;
+            int nbefore = 0;
+            for (int i = 0; i < l; i++)
+                nbefore += (i + 1) * (i + 2) / 2;
+            switch (l) {
+            case (0):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (1):
+                aos->LM[num] = nbefore + 0;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1;
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2;
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (2):
+                aos->LM[num] = nbefore + 0; // xx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // yz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (3):
+                aos->LM[num] = nbefore + 0; // xxx
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // xxy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // xxz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // xyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // xyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // xzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // yyy
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // yyz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // yzz
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // zzz
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (4):
+                aos->LM[num] = nbefore + 0; // 400
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 310
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 301
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 220
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 211
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 202
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 130
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 121
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 112
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 103
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 040
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 031
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 022
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 013
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 004
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (5):
+                aos->LM[num] = nbefore + 0; // 500
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 410
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 401
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 320
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 311
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 302
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 230
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 221
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 212
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 203
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 140
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 131
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 122
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 113
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 104
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 050
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 041
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 032
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 023
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 014
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 005
+                aos->NL[num] = nl;
+                num++;
+                break;
+            case (6):
+                aos->LM[num] = nbefore + 0; // 600
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 3; // 510
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 4; // 501
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 9; // 420
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 15; // 411
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 10; // 402
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 18; // 330
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 21; // 321
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 22; // 312
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 19; // 303
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 11; // 240
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 23; // 231
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 27; // 222
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 25; // 213
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 13; // 204
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 5; // 150
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 16; // 141
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 24; // 132
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 26; // 123
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 17; // 114
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 7; // 105
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 1; // 060
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 6; // 051
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 12; // 042
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 20; // 033
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 14; // 024
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 8; // 015
+                aos->NL[num] = nl;
+                num++;
+                aos->LM[num] = nbefore + 2; // 006
+                aos->NL[num] = nl;
+                num++;
+                break;
+            default:
+                myComm->barrier_and_abort(
+                    "Cartesian Tensor only defined up to Lmax=6. Aborting\n");
+                break;
+            }
+        }
+    }
+    else {
+        for (int ind = 0; ind < all_nl.size(); ind++) {
+            int nl = all_nl[ind];
+            int l = aos->RnlID[nl][q_l];
+            int m = aos->RnlID[nl][q_m];
+            // assign the index for real Spherical Harmonic with (l,m)
+            aos->LM[num] = aos->Ylm.index(l, m);
+            // assign the index for radial orbital with (n,l)
+            aos->NL[num] = nl;
+            // increment number of basis functions
+            num++;
+        }
+    }
+    return num;
+}
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<double>,
+    SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<MultiQuinticSpline1D<float>,
+    SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaCartesianTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>>;
+
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>>;
+template class AOBasisBuilderT<SoaAtomicBasisSetT<
+    MultiFunctorAdapter<SlaterCombo<float>>, SoaSphericalTensor<float>, float>>;
+template class AOBasisBuilderT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>>;
+
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
new file mode 100644
index 00000000000..144b2b4dc9a
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/AOBasisBuilderT.h
@@ -0,0 +1,75 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2020 QMCPACK developers.
+//
+// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
+//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
+//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
+//
+// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+#define QMCPLUSPLUS_ATOMICORBITALBUILDERT_H
+
+
+#include "Message/MPIObjectBase.h"
+#include "hdf/hdf_archive.h"
+#include "QMCWaveFunctions/SPOSet.h"
+
+namespace qmcplusplus
+{
+/** atomic basisset builder
+   * @tparam COT, CenteredOrbitalType = SoaAtomicBasisSet<RF,SH>
+   *
+   * Reimplement AtomiSPOSetBuilder.h
+   */
+template<typename COT>
+class AOBasisBuilderT : public MPIObjectBase
+{
+public:
+  enum
+  {
+    DONOT_EXPAND    = 0,
+    GAUSSIAN_EXPAND = 1,
+    NATURAL_EXPAND,
+    CARTESIAN_EXPAND,
+    MOD_NATURAL_EXPAND,
+    DIRAC_CARTESIAN_EXPAND
+  };
+
+private:
+  bool addsignforM;
+  int expandlm;
+  std::string Morder;
+  std::string sph;
+  std::string basisType;
+  std::string elementType;
+  std::string Normalized;
+
+  ///map for the radial orbitals
+  std::map<std::string, int> RnlID;
+
+  ///map for (n,l,m,s) to its quantum number index
+  std::map<std::string, int> nlms_id;
+
+public:
+  AOBasisBuilderT(const std::string& eName, Communicate* comm);
+
+  bool put(xmlNodePtr cur);
+  bool putH5(hdf_archive& hin);
+
+  SPOSet* createSPOSetFromXML(xmlNodePtr cur) { return 0; }
+
+  std::unique_ptr<COT> createAOSet(xmlNodePtr cur);
+  std::unique_ptr<COT> createAOSetH5(hdf_archive& hin);
+
+  int expandYlm(COT* aos, std::vector<int>& all_nl, int expandlm = DONOT_EXPAND);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
index 4e1e4f6bd1b..f35d56efd16 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.cpp
@@ -19,22 +19,20 @@
 
 #include "LCAOrbitalBuilderT.h"
 
-#include "AOBasisBuilder.h"
+#include "AOBasisBuilderT.h"
+#include "CPU/math.hpp"
+#include "CuspCorrectionConstructionT.h"
 #include "LCAOrbitalSetT.h"
+#include "LCAOrbitalSetWithCorrectionT.h"
+#include "Message/CommOperators.h"
 #include "MultiFunctorAdapter.h"
 #include "MultiQuinticSpline1D.h"
 #include "Numerics/SoaCartesianTensor.h"
 #include "Numerics/SoaSphericalTensor.h"
 #include "OhmmsData/AttributeSet.h"
 #include "QMCWaveFunctions/SPOSetT.h"
-#include "SoaAtomicBasisSet.h"
-#include "SoaLocalizedBasisSet.h"
-#if !defined(QMC_COMPLEX)
-#include "CuspCorrectionConstructionT.h"
-#include "LCAOrbitalSetWithCorrectionT.h"
-#endif
-#include "CPU/math.hpp"
-#include "Message/CommOperators.h"
+#include "SoaAtomicBasisSetT.h"
+#include "SoaLocalizedBasisSetT.h"
 #include "Utilities/ProgressReportEngine.h"
 #include "hdf/hdf_archive.h"
 
@@ -61,8 +59,8 @@ struct ao_traits<T, ORBT, 0, 0>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for numerical-spherical AO */
@@ -71,8 +69,8 @@ struct ao_traits<T, ORBT, 0, 1>
 {
     using radial_type = MultiQuinticSpline1D<T>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -81,8 +79,8 @@ struct ao_traits<T, ORBT, 1, 0>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaCartesianTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for GTO-cartesian AO */
@@ -91,8 +89,8 @@ struct ao_traits<T, ORBT, 1, 1>
 {
     using radial_type = MultiFunctorAdapter<GaussianCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 /** specialization for STO-spherical AO */
@@ -101,8 +99,8 @@ struct ao_traits<T, ORBT, 2, 1>
 {
     using radial_type = MultiFunctorAdapter<SlaterCombo<T>>;
     using angular_type = SoaSphericalTensor<T>;
-    using ao_type = SoaAtomicBasisSet<radial_type, angular_type>;
-    using basis_type = SoaLocalizedBasisSet<ao_type, ORBT>;
+    using ao_type = SoaAtomicBasisSetT<radial_type, angular_type, ORBT>;
+    using basis_type = SoaLocalizedBasisSetT<ao_type, ORBT>;
 };
 
 inline bool
@@ -243,7 +241,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<0, 1>(cur);
@@ -251,7 +249,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<0, 0>(cur);
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSet<1, 1>(cur);
@@ -259,12 +257,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent)
             myBasisSet = createBasisSet<1, 0>(cur);
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSet<2, 1>(cur);
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
 
@@ -312,7 +310,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
     /** process atomicBasisSet per ion species */
     switch (radialOrbType) {
     case (0): // numerical
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiQuintic," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiQuintic," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<0, 1>();
@@ -320,7 +318,7 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<0, 0>();
         break;
     case (1): // gto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiGTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiGTO," << ylm << ">"
                   << std::endl;
         if (ylm)
             myBasisSet = createBasisSetH5<1, 1>();
@@ -328,12 +326,12 @@ LCAOrbitalBuilderT<T>::loadBasisSetFromH5(xmlNodePtr parent)
             myBasisSet = createBasisSetH5<1, 0>();
         break;
     case (2): // sto
-        app_log() << "  LCAO: SoaAtomicBasisSet<MultiSTO," << ylm << ">"
+        app_log() << "  LCAO: SoaAtomicBasisSetT<MultiSTO," << ylm << ">"
                   << std::endl;
         myBasisSet = createBasisSetH5<2, 1>();
         break;
     default:
-        PRE.error("Cannot construct SoaAtomicBasisSet<ROT,YLM>.", true);
+        PRE.error("Cannot construct SoaAtomicBasisSetT<ROT,YLM>.", true);
         break;
     }
     return std::unique_ptr<BasisSet_t>(myBasisSet);
@@ -374,7 +372,7 @@ LCAOrbitalBuilderT<T>::createBasisSet(xmlNodePtr cur)
             auto it = std::find(
                 ao_built_centers.begin(), ao_built_centers.end(), elementType);
             if (it == ao_built_centers.end()) {
-                AOBasisBuilder<ao_type> any(elementType, this->myComm);
+                AOBasisBuilderT<ao_type> any(elementType, this->myComm);
                 any.put(cur);
                 auto aoBasis = any.createAOSet(cur);
                 if (aoBasis) {
@@ -453,7 +451,7 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
         auto it = std::find(
             ao_built_centers.begin(), ao_built_centers.end(), elementType);
         if (it == ao_built_centers.end()) {
-            AOBasisBuilder<ao_type> any(elementType, this->myComm);
+            AOBasisBuilderT<ao_type> any(elementType, this->myComm);
             any.putH5(hin);
             auto aoBasis = any.createAOSetH5(hin);
             if (aoBasis) {
@@ -478,6 +476,174 @@ LCAOrbitalBuilderT<T>::createBasisSetH5()
     return mBasisSet;
 }
 
+template <>
+std::unique_ptr<SPOSetT<double>>
+LCAOrbitalBuilderT<double>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<double>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<double>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSet tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<double>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<double>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<double>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<double>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<double>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<double>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<double>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<float>>
+LCAOrbitalBuilderT<float>::createWithCuspCorrection(xmlNodePtr cur,
+    const std::string& spo_name, std::string cusp_file,
+    std::unique_ptr<BasisSet_t>&& myBasisSet)
+{
+    app_summary() << "        Using cusp correction." << std::endl;
+    std::unique_ptr<SPOSetT<float>> sposet;
+    {
+        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<float>>(
+            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
+        loadMO(lcwc->lcao, cur);
+        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
+        sposet = std::move(lcwc);
+    }
+
+    // Create a temporary particle set to use for cusp initialization.
+    // The particle coordinates left at the end are unsuitable for further
+    // computations. The coordinates get set to nuclear positions, which
+    // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
+    // This problem only appears when the electron positions are specified
+    // in the input. The random particle placement step executes after this
+    // part of the code, overwriting the leftover positions from the cusp
+    // initialization.
+    ParticleSet tmp_targetPtcl(targetPtcl);
+
+    const int num_centers = sourcePtcl.getTotalNum();
+    auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<float>&>(*sposet);
+
+    const int orbital_set_size = lcwc.getOrbitalSetSize();
+    Matrix<CuspCorrectionParametersT<float>> info(
+        num_centers, orbital_set_size);
+
+    // set a default file name if not given
+    if (cusp_file.empty())
+        cusp_file = spo_name + ".cuspInfo.xml";
+
+    bool file_exists(
+        this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
+    this->myComm->bcast(file_exists);
+    app_log() << "  Cusp correction file " << cusp_file
+              << (file_exists ? " exits." : " doesn't exist.") << std::endl;
+
+    // validate file if it exists
+    if (file_exists) {
+        bool valid = 0;
+        if (this->myComm->rank() == 0)
+            valid = CuspCorrectionConstructionT<float>::readCuspInfo(
+                cusp_file, spo_name, orbital_set_size, info);
+        this->myComm->bcast(valid);
+        if (!valid)
+            this->myComm->barrier_and_abort(
+                "Invalid cusp correction file " + cusp_file);
+#ifdef HAVE_MPI
+        for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
+            for (int center_idx = 0; center_idx < num_centers; center_idx++)
+                CuspCorrectionConstructionT<float>::broadcastCuspInfo(
+                    info(center_idx, orb_idx), *this->myComm, 0);
+#endif
+    }
+    else {
+        CuspCorrectionConstructionT<float>::generateCuspInfo(info,
+            tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
+        if (this->myComm->rank() == 0)
+            CuspCorrectionConstructionT<float>::saveCusp(
+                cusp_file, info, spo_name);
+    }
+
+    CuspCorrectionConstructionT<float>::applyCuspCorrection(
+        info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
+
+    return sposet;
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<double>>>
+LCAOrbitalBuilderT<std::complex<double>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+}
+
+template <>
+std::unique_ptr<SPOSetT<std::complex<float>>>
+LCAOrbitalBuilderT<std::complex<float>>::createWithCuspCorrection(
+    xmlNodePtr, const std::string&, std::string, std::unique_ptr<BasisSet_t>&&)
+{
+    this->myComm->barrier_and_abort(
+        "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
+        "supported on complex LCAO.");
+}
+
 template <typename T>
 std::unique_ptr<SPOSetT<T>>
 LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
@@ -501,18 +667,8 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
 
     std::unique_ptr<SPOSetT<T>> sposet;
     if (doCuspCorrection) {
-#if defined(QMC_COMPLEX)
-        this->myComm->barrier_and_abort(
-            "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not "
-            "supported on complex LCAO.");
-#else
-        app_summary() << "        Using cusp correction." << std::endl;
-        auto lcwc = std::make_unique<LCAOrbitalSetWithCorrectionT<T>>(
-            spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet));
-        loadMO(lcwc->lcao, cur);
-        lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize());
-        sposet = std::move(lcwc);
-#endif
+        createWithCuspCorrection(
+            cur, spo_name, cusp_file, std::move(myBasisSet));
     }
     else {
         auto lcos = std::make_unique<LCAOrbitalSetT<T>>(
@@ -521,65 +677,6 @@ LCAOrbitalBuilderT<T>::createSPOSetFromXML(xmlNodePtr cur)
         sposet = std::move(lcos);
     }
 
-#if !defined(QMC_COMPLEX)
-    if (doCuspCorrection) {
-        // Create a temporary particle set to use for cusp initialization.
-        // The particle coordinates left at the end are unsuitable for further
-        // computations. The coordinates get set to nuclear positions, which
-        // leads to zero e-N distance, which causes a NaN in SoaAtomicBasisSet.h
-        // This problem only appears when the electron positions are specified
-        // in the input. The random particle placement step executes after this
-        // part of the code, overwriting the leftover positions from the cusp
-        // initialization.
-        ParticleSet tmp_targetPtcl(targetPtcl);
-
-        const int num_centers = sourcePtcl.getTotalNum();
-        auto& lcwc = dynamic_cast<LCAOrbitalSetWithCorrectionT<T>&>(*sposet);
-
-        const int orbital_set_size = lcwc.getOrbitalSetSize();
-        Matrix<CuspCorrectionParametersT<T>> info(
-            num_centers, orbital_set_size);
-
-        // set a default file name if not given
-        if (cusp_file.empty())
-            cusp_file = spo_name + ".cuspInfo.xml";
-
-        bool file_exists(
-            this->myComm->rank() == 0 && std::ifstream(cusp_file).good());
-        this->myComm->bcast(file_exists);
-        app_log() << "  Cusp correction file " << cusp_file
-                  << (file_exists ? " exits." : " doesn't exist.") << std::endl;
-
-        // validate file if it exists
-        if (file_exists) {
-            bool valid = 0;
-            if (this->myComm->rank() == 0)
-                valid = CuspCorrectionConstructionT<T>::readCuspInfo(
-                    cusp_file, spo_name, orbital_set_size, info);
-            this->myComm->bcast(valid);
-            if (!valid)
-                this->myComm->barrier_and_abort(
-                    "Invalid cusp correction file " + cusp_file);
-#ifdef HAVE_MPI
-            for (int orb_idx = 0; orb_idx < orbital_set_size; orb_idx++)
-                for (int center_idx = 0; center_idx < num_centers; center_idx++)
-                    CuspCorrectionConstructionT<T>::broadcastCuspInfo(
-                        info(center_idx, orb_idx), *this->myComm, 0);
-#endif
-        }
-        else {
-            CuspCorrectionConstructionT<T>::generateCuspInfo(info,
-                tmp_targetPtcl, sourcePtcl, lcwc.lcao, spo_name, *this->myComm);
-            if (this->myComm->rank() == 0)
-                CuspCorrectionConstructionT<T>::saveCusp(
-                    cusp_file, info, spo_name);
-        }
-
-        CuspCorrectionConstructionT<T>::applyCuspCorrection(
-            info, tmp_targetPtcl, sourcePtcl, lcwc.lcao, lcwc.cusp, spo_name);
-    }
-#endif
-
     return sposet;
 }
 
diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
index a746326df7d..83386951d28 100644
--- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
+++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilderT.h
@@ -125,6 +125,10 @@ class LCAOrbitalBuilderT : public SPOSetBuilderT<T>
         Matrix<RealType>& Creal) const;
 
 private:
+    /// enable cusp correction
+    std::unique_ptr<SPOSetT<T>>
+    createWithCuspCorrection(xmlNodePtr cur, const std::string& spo_name,
+        std::string cusp_file, std::unique_ptr<BasisSet_t>&& myBasisSet);
     /// load a basis set from XML input
     std::unique_ptr<BasisSet_t>
     loadBasisSetFromXML(xmlNodePtr cur, xmlNodePtr parent);
diff --git a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
index 110491c006d..ee9ecde7fef 100644
--- a/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
+++ b/src/QMCWaveFunctions/LCAO/MultiFunctorAdapter.h
@@ -21,6 +21,7 @@
 #include "hdf/hdf_archive.h"
 #include "LCAO/MultiQuinticSpline1D.h"
 #include "LCAO/SoaAtomicBasisSet.h"
+#include "LCAO/SoaAtomicBasisSetT.h"
 
 namespace qmcplusplus
 {
@@ -145,5 +146,60 @@ class RadialOrbitalSetBuilder<SoaAtomicBasisSet<MultiFunctorAdapter<FN>, SH>> :
     m_orbitals.setRmax(0); //set Rmax
   }
 };
+
+template<typename FN, typename SH, typename ORBT>
+class RadialOrbitalSetBuilder<SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>> : public MPIObjectBase
+{
+public:
+  using COT             = SoaAtomicBasisSetT<MultiFunctorAdapter<FN>, SH, ORBT>;
+  using RadialOrbital_t = MultiFunctorAdapter<FN>;
+  using single_type     = typename RadialOrbital_t::single_type;
+
+  ///true, if the RadialOrbitalType is normalized
+  bool Normalized;
+  ///orbitals to build
+  COT& m_orbitals;
+
+  ///constructor
+  RadialOrbitalSetBuilder(Communicate* comm, COT& aos) : MPIObjectBase(comm), Normalized(true), m_orbitals(aos) {}
+
+  ///implement functions used by AOBasisBuilder
+  bool addGrid(xmlNodePtr cur, const std::string& rad_type) { return true; }
+  bool addGridH5(hdf_archive& hin) { return true; }
+  bool openNumericalBasisH5(xmlNodePtr cur) { return true; }
+  bool put(xmlNodePtr cur)
+  {
+    const std::string a(lowerCase(getXMLAttributeValue(cur, "normalized")));
+    if (a == "no")
+      Normalized = false;
+    return true;
+  }
+
+  bool addRadialOrbital(xmlNodePtr cur, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroup(cur);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+    return true;
+  }
+
+  bool addRadialOrbitalH5(hdf_archive& hin, const std::string& rad_type, const QuantumNumberType& nlms)
+  {
+    auto radorb = std::make_unique<single_type>(nlms[q_l], Normalized);
+    radorb->putBasisGroupH5(hin, *myComm);
+
+    m_orbitals.RnlID.push_back(nlms);
+    m_orbitals.MultiRnl.Rnl.push_back(std::move(radorb));
+
+    return true;
+  }
+
+  void finalize()
+  {
+    m_orbitals.setRmax(0); //set Rmax
+  }
+};
 } // namespace qmcplusplus
 #endif
diff --git a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
index 71e36230bd7..4d03b3d652c 100644
--- a/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
+++ b/src/QMCWaveFunctions/LCAO/RadialOrbitalSetBuilder.h
@@ -166,7 +166,7 @@ bool RadialOrbitalSetBuilder<COT>::addGrid(xmlNodePtr cur, const std::string& ra
     hin.pop();
   }
   else
-    input_grid = OneDimGridFactory::createGrid(cur);
+    input_grid = OneDimGridFactory<RealType>::createGrid(cur);
 
   //set zero to use std::max
   m_rcut_safe = 0;
diff --git a/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
new file mode 100644
index 00000000000..4015806c05b
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaAtomicBasisSetT.h
@@ -0,0 +1,752 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+/** @file SoaAtomicBasisSetT.h
+ */
+#ifndef QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+#define QMCPLUSPLUS_SOA_SPHERICALORBITAL_BASISSETT_H
+
+#include "CPU/math.hpp"
+#include "OptimizableObject.h"
+
+namespace qmcplusplus
+{
+/* A basis set for a center type
+ *
+ * @tparam ROT : radial function type, e.g.,NGFunctor<RealType>
+ * @tparam SH : spherical or carteisan Harmonics for (l,m) expansion
+ *
+ * \f$ \phi_{n,l,m}({\bf r})=R_{n,l}(r) Y_{l,m}(\theta) \f$
+ */
+template <typename ROT, typename SH, typename ORBT>
+struct SoaAtomicBasisSetT
+{
+    using RadialOrbital_t = ROT;
+    using RealType = typename ROT::RealType;
+    using GridType = typename ROT::GridType;
+    using ValueType = ORBT;
+
+    /// size of the basis set
+    int BasisSetSize;
+    /// Number of Cell images for the evaluation of the orbital with PBC. If No
+    /// PBC, should be 0;
+    TinyVector<int, 3> PBCImages;
+    /// Coordinates of SuperTwist
+    TinyVector<double, 3> SuperTwist;
+    /// Phase Factor array
+    std::vector<ValueType> periodic_image_phase_factors;
+    /// maximum radius of this center
+    RealType Rmax;
+    /// spherical harmonics
+    SH Ylm;
+    /// radial orbitals
+    ROT MultiRnl;
+    /// index of the corresponding real Spherical Harmonic with quantum numbers
+    /// \f$ (l,m) \f$
+    aligned_vector<int> LM;
+    /**index of the corresponding radial orbital with quantum numbers \f$ (n,l)
+     * \f$ */
+    aligned_vector<int> NL;
+    /// container for the quantum-numbers
+    std::vector<QuantumNumberType> RnlID;
+    /// temporary storage
+    VectorSoaContainer<RealType, 4> tempS;
+
+    /// the constructor
+    explicit SoaAtomicBasisSetT(int lmax, bool addsignforM = false) :
+        Ylm(lmax, addsignforM)
+    {
+    }
+
+    void
+    checkInVariables(opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkInVariables(active);
+    }
+
+    void
+    checkOutVariables(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->checkOutVariables(active);
+    }
+
+    void
+    resetParameters(const opt_variables_type& active)
+    {
+        // for(size_t nl=0; nl<Rnl.size(); nl++)
+        //   Rnl[nl]->resetParameters(active);
+    }
+
+    /** return the number of basis functions
+     */
+    inline int
+    getBasisSetSize() const
+    {
+        //=NL.size();
+        return BasisSetSize;
+    }
+
+    /** Set the number of periodic image for the evaluation of the orbitals and
+     * the phase factor. In the case of Non-PBC, PBCImages=(1,1,1),
+     * SuperTwist(0,0,0) and the PhaseFactor=1.
+     */
+    void
+    setPBCParams(const TinyVector<int, 3>& pbc_images,
+        const TinyVector<double, 3> supertwist,
+        const std::vector<ValueType>& PeriodicImagePhaseFactors)
+    {
+        PBCImages = pbc_images;
+        periodic_image_phase_factors = PeriodicImagePhaseFactors;
+        SuperTwist = supertwist;
+    }
+
+    /** implement a BasisSetBase virtual function
+     *
+     * Set Rmax and BasisSetSize
+     * @todo Should be able to overwrite Rmax to be much smaller than the
+     * maximum grid
+     */
+    inline void
+    setBasisSetSize(int n)
+    {
+        BasisSetSize = LM.size();
+        tempS.resize(std::max(Ylm.size(), RnlID.size()));
+    }
+
+    /** Set Rmax */
+    template <typename RealType>
+    inline void
+    setRmax(RealType rmax)
+    {
+        Rmax = (rmax > 0) ? rmax : MultiRnl.rmax();
+    }
+
+    /// set the current offset
+    inline void
+    setCenter(int c, int offset)
+    {
+    }
+
+    /// Sets a boolean vector for S-type orbitals.  Used for cusp correction.
+    void
+    queryOrbitalsForSType(std::vector<bool>& s_orbitals) const
+    {
+        for (int i = 0; i < BasisSetSize; i++) {
+            s_orbitals[i] = (RnlID[NL[i]][1] == 0);
+        }
+    }
+
+    /** evaluate VGL
+     */
+    template <typename LAT, typename PosType, typename VGL>
+    inline void
+    evaluateVGL(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGL& vgl, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+        // RealType psi_new, dpsi_x_new, dpsi_y_new, dpsi_z_new,d2psi_new;
+
+#if not defined(QMC_COMPLEX)
+        const ValueType correctphase = 1;
+#else
+
+        RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] +
+            SuperTwist[2] * Tv[2];
+        RealType s, c;
+        qmcplusplus::sincos(-phasearg, &s, &c);
+        const ValueType correctphase(c, s);
+#endif
+
+        constexpr RealType cone(1);
+        constexpr RealType ctwo(2);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgl.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgl.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgl.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgl.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+        auto* restrict d2psi = vgl.data(4) + offset;
+        const RealType* restrict ylm_l = Ylm[4]; // lap
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            d2psi[ib] = 0;
+        }
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+                    Ylm.evaluateVGL(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr * Phase;
+                        dpsi_x[ib] += (ang * gr_x + vr * ang_x) * Phase;
+                        dpsi_y[ib] += (ang * gr_y + vr * ang_y) * Phase;
+                        dpsi_z[ib] += (ang * gr_z + vr * ang_z) * Phase;
+                        d2psi[ib] += (ang * (ctwo * drnloverr + d2phi[nl]) +
+                                         ctwo *
+                                             (gr_x * ang_x + gr_y * ang_y +
+                                                 gr_z * ang_z) +
+                                         vr * ylm_l[lm]) *
+                            Phase;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGH>
+    inline void
+    evaluateVGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGH& vgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+            //      d2psi[ib]  = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+                    Ylm.evaluateVGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi);
+
+                    const RealType rinv = cone / r_new;
+
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.
+                        const RealType gr2_tmp = rinv * rinv * (d2phi[nl] - drnloverr);
+                        const RealType gr_xx = x * x * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * y * gr2_tmp;
+                        const RealType gr_xz = x * z * gr2_tmp;
+                        const RealType gr_yy = y * y * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * z * gr2_tmp;
+                        const RealType gr_zz = z * z * gr2_tmp + drnloverr;
+
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename LAT, typename PosType, typename VGHGH>
+    inline void
+    evaluateVGHGH(const LAT& lattice, const RealType r, const PosType& dr,
+        const size_t offset, VGHGH& vghgh)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+        constexpr RealType cone(1);
+
+        // one can assert the alignment
+        RealType* restrict phi = tempS.data(0);
+        RealType* restrict dphi = tempS.data(1);
+        RealType* restrict d2phi = tempS.data(2);
+        RealType* restrict d3phi = tempS.data(3);
+
+        // V,Gx,Gy,Gz,L
+        auto* restrict psi = vghgh.data(0) + offset;
+        const RealType* restrict ylm_v = Ylm[0]; // value
+        auto* restrict dpsi_x = vghgh.data(1) + offset;
+        const RealType* restrict ylm_x = Ylm[1]; // gradX
+        auto* restrict dpsi_y = vghgh.data(2) + offset;
+        const RealType* restrict ylm_y = Ylm[2]; // gradY
+        auto* restrict dpsi_z = vghgh.data(3) + offset;
+        const RealType* restrict ylm_z = Ylm[3]; // gradZ
+
+        auto* restrict dhpsi_xx = vghgh.data(4) + offset;
+        const RealType* restrict ylm_xx = Ylm[4];
+        auto* restrict dhpsi_xy = vghgh.data(5) + offset;
+        const RealType* restrict ylm_xy = Ylm[5];
+        auto* restrict dhpsi_xz = vghgh.data(6) + offset;
+        const RealType* restrict ylm_xz = Ylm[6];
+        auto* restrict dhpsi_yy = vghgh.data(7) + offset;
+        const RealType* restrict ylm_yy = Ylm[7];
+        auto* restrict dhpsi_yz = vghgh.data(8) + offset;
+        const RealType* restrict ylm_yz = Ylm[8];
+        auto* restrict dhpsi_zz = vghgh.data(9) + offset;
+        const RealType* restrict ylm_zz = Ylm[9];
+
+        auto* restrict dghpsi_xxx = vghgh.data(10) + offset;
+        const RealType* restrict ylm_xxx = Ylm[10];
+        auto* restrict dghpsi_xxy = vghgh.data(11) + offset;
+        const RealType* restrict ylm_xxy = Ylm[11];
+        auto* restrict dghpsi_xxz = vghgh.data(12) + offset;
+        const RealType* restrict ylm_xxz = Ylm[12];
+        auto* restrict dghpsi_xyy = vghgh.data(13) + offset;
+        const RealType* restrict ylm_xyy = Ylm[13];
+        auto* restrict dghpsi_xyz = vghgh.data(14) + offset;
+        const RealType* restrict ylm_xyz = Ylm[14];
+        auto* restrict dghpsi_xzz = vghgh.data(15) + offset;
+        const RealType* restrict ylm_xzz = Ylm[15];
+        auto* restrict dghpsi_yyy = vghgh.data(16) + offset;
+        const RealType* restrict ylm_yyy = Ylm[16];
+        auto* restrict dghpsi_yyz = vghgh.data(17) + offset;
+        const RealType* restrict ylm_yyz = Ylm[17];
+        auto* restrict dghpsi_yzz = vghgh.data(18) + offset;
+        const RealType* restrict ylm_yzz = Ylm[18];
+        auto* restrict dghpsi_zzz = vghgh.data(19) + offset;
+        const RealType* restrict ylm_zzz = Ylm[19];
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+            psi[ib] = 0;
+
+            dpsi_x[ib] = 0;
+            dpsi_y[ib] = 0;
+            dpsi_z[ib] = 0;
+
+            dhpsi_xx[ib] = 0;
+            dhpsi_xy[ib] = 0;
+            dhpsi_xz[ib] = 0;
+            dhpsi_yy[ib] = 0;
+            dhpsi_yz[ib] = 0;
+            dhpsi_zz[ib] = 0;
+
+            dghpsi_xxx[ib] = 0;
+            dghpsi_xxy[ib] = 0;
+            dghpsi_xxz[ib] = 0;
+            dghpsi_xyy[ib] = 0;
+            dghpsi_xyz[ib] = 0;
+            dghpsi_xzz[ib] = 0;
+            dghpsi_yyy[ib] = 0;
+            dghpsi_yyz[ib] = 0;
+            dghpsi_yzz[ib] = 0;
+            dghpsi_zzz[ib] = 0;
+        }
+
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+                    dr_new[0] = dr[0] + TransX * lattice.R(0, 0) +
+                        TransY * lattice.R(1, 0) + TransZ * lattice.R(2, 0);
+                    dr_new[1] = dr[1] + TransX * lattice.R(0, 1) +
+                        TransY * lattice.R(1, 1) + TransZ * lattice.R(2, 1);
+                    dr_new[2] = dr[2] + TransX * lattice.R(0, 2) +
+                        TransY * lattice.R(1, 2) + TransZ * lattice.R(2, 2);
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+
+                    // const size_t ib_max=NL.size();
+                    if (r_new >= Rmax)
+                        continue;
+
+                    // SIGN Change!!
+                    const RealType x = -dr_new[0], y = -dr_new[1], z = -dr_new[2];
+                    Ylm.evaluateVGHGH(x, y, z);
+
+                    MultiRnl.evaluate(r_new, phi, dphi, d2phi, d3phi);
+
+                    const RealType rinv = cone / r_new;
+                    const RealType xu = x * rinv, yu = y * rinv, zu = z * rinv;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib) {
+                        const int nl(NL[ib]);
+                        const int lm(LM[ib]);
+                        const RealType drnloverr = rinv * dphi[nl];
+                        const RealType ang = ylm_v[lm];
+                        const RealType gr_x = drnloverr * x;
+                        const RealType gr_y = drnloverr * y;
+                        const RealType gr_z = drnloverr * z;
+
+                        // The non-strictly diagonal term in \partial_i
+                        // \partial_j R_{nl} is
+                        //  \frac{x_i x_j}{r^2}\left(\frac{\partial^2
+                        //  R_{nl}}{\partial r^2} - \frac{1}{r}\frac{\partial
+                        //  R_{nl}}{\partial r}) To save recomputation, I
+                        //  evaluate everything except the x_i*x_j term once,
+                        //  and store it in gr2_tmp.  The full term is obtained
+                        //  by x_i*x_j*gr2_tmp.  This is p(r) in the notes.
+                        const RealType gr2_tmp = rinv * (d2phi[nl] - drnloverr);
+
+                        const RealType gr_xx = x * xu * gr2_tmp + drnloverr;
+                        const RealType gr_xy = x * yu * gr2_tmp;
+                        const RealType gr_xz = x * zu * gr2_tmp;
+                        const RealType gr_yy = y * yu * gr2_tmp + drnloverr;
+                        const RealType gr_yz = y * zu * gr2_tmp;
+                        const RealType gr_zz = z * zu * gr2_tmp + drnloverr;
+
+                        // This is q(r) in the notes.
+                        const RealType gr3_tmp = d3phi[nl] - 3.0 * gr2_tmp;
+
+                        const RealType gr_xxx =
+                            xu * xu * xu * gr3_tmp + gr2_tmp * (3. * xu);
+                        const RealType gr_xxy = xu * xu * yu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_xxz = xu * xu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_xyy = xu * yu * yu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_xyz = xu * yu * zu * gr3_tmp;
+                        const RealType gr_xzz = xu * zu * zu * gr3_tmp + gr2_tmp * xu;
+                        const RealType gr_yyy =
+                            yu * yu * yu * gr3_tmp + gr2_tmp * (3. * yu);
+                        const RealType gr_yyz = yu * yu * zu * gr3_tmp + gr2_tmp * zu;
+                        const RealType gr_yzz = yu * zu * zu * gr3_tmp + gr2_tmp * yu;
+                        const RealType gr_zzz =
+                            zu * zu * zu * gr3_tmp + gr2_tmp * (3. * zu);
+
+                        // Angular derivatives up to third
+                        const RealType ang_x = ylm_x[lm];
+                        const RealType ang_y = ylm_y[lm];
+                        const RealType ang_z = ylm_z[lm];
+
+                        const RealType ang_xx = ylm_xx[lm];
+                        const RealType ang_xy = ylm_xy[lm];
+                        const RealType ang_xz = ylm_xz[lm];
+                        const RealType ang_yy = ylm_yy[lm];
+                        const RealType ang_yz = ylm_yz[lm];
+                        const RealType ang_zz = ylm_zz[lm];
+
+                        const RealType ang_xxx = ylm_xxx[lm];
+                        const RealType ang_xxy = ylm_xxy[lm];
+                        const RealType ang_xxz = ylm_xxz[lm];
+                        const RealType ang_xyy = ylm_xyy[lm];
+                        const RealType ang_xyz = ylm_xyz[lm];
+                        const RealType ang_xzz = ylm_xzz[lm];
+                        const RealType ang_yyy = ylm_yyy[lm];
+                        const RealType ang_yyz = ylm_yyz[lm];
+                        const RealType ang_yzz = ylm_yzz[lm];
+                        const RealType ang_zzz = ylm_zzz[lm];
+
+                        const RealType vr = phi[nl];
+
+                        psi[ib] += ang * vr;
+                        dpsi_x[ib] += ang * gr_x + vr * ang_x;
+                        dpsi_y[ib] += ang * gr_y + vr * ang_y;
+                        dpsi_z[ib] += ang * gr_z + vr * ang_z;
+
+                        // \partial_i \partial_j (R*Y) = Y \partial_i \partial_j
+                        // R + R \partial_i \partial_j Y
+                        //                             + (\partial_i R)
+                        //                             (\partial_j Y) +
+                        //                             (\partial_j R)(\partial_i
+                        //                             Y)
+                        dhpsi_xx[ib] +=
+                            gr_xx * ang + ang_xx * vr + 2.0 * gr_x * ang_x;
+                        dhpsi_xy[ib] += gr_xy * ang + ang_xy * vr +
+                            gr_x * ang_y + gr_y * ang_x;
+                        dhpsi_xz[ib] += gr_xz * ang + ang_xz * vr +
+                            gr_x * ang_z + gr_z * ang_x;
+                        dhpsi_yy[ib] +=
+                            gr_yy * ang + ang_yy * vr + 2.0 * gr_y * ang_y;
+                        dhpsi_yz[ib] += gr_yz * ang + ang_yz * vr +
+                            gr_y * ang_z + gr_z * ang_y;
+                        dhpsi_zz[ib] +=
+                            gr_zz * ang + ang_zz * vr + 2.0 * gr_z * ang_z;
+
+                        dghpsi_xxx[ib] += gr_xxx * ang + vr * ang_xxx +
+                            3.0 * gr_xx * ang_x + 3.0 * gr_x * ang_xx;
+                        dghpsi_xxy[ib] += gr_xxy * ang + vr * ang_xxy +
+                            gr_xx * ang_y + ang_xx * gr_y +
+                            2.0 * gr_xy * ang_x + 2.0 * ang_xy * gr_x;
+                        dghpsi_xxz[ib] += gr_xxz * ang + vr * ang_xxz +
+                            gr_xx * ang_z + ang_xx * gr_z +
+                            2.0 * gr_xz * ang_x + 2.0 * ang_xz * gr_x;
+                        dghpsi_xyy[ib] += gr_xyy * ang + vr * ang_xyy +
+                            gr_yy * ang_x + ang_yy * gr_x +
+                            2.0 * gr_xy * ang_y + 2.0 * ang_xy * gr_y;
+                        dghpsi_xyz[ib] += gr_xyz * ang + vr * ang_xyz +
+                            gr_xy * ang_z + ang_xy * gr_z + gr_yz * ang_x +
+                            ang_yz * gr_x + gr_xz * ang_y + ang_xz * gr_y;
+                        dghpsi_xzz[ib] += gr_xzz * ang + vr * ang_xzz +
+                            gr_zz * ang_x + ang_zz * gr_x +
+                            2.0 * gr_xz * ang_z + 2.0 * ang_xz * gr_z;
+                        dghpsi_yyy[ib] += gr_yyy * ang + vr * ang_yyy +
+                            3.0 * gr_yy * ang_y + 3.0 * gr_y * ang_yy;
+                        dghpsi_yyz[ib] += gr_yyz * ang + vr * ang_yyz +
+                            gr_yy * ang_z + ang_yy * gr_z +
+                            2.0 * gr_yz * ang_y + 2.0 * ang_yz * gr_y;
+                        dghpsi_yzz[ib] += gr_yzz * ang + vr * ang_yzz +
+                            gr_zz * ang_y + ang_zz * gr_y +
+                            2.0 * gr_yz * ang_z + 2.0 * ang_yz * gr_z;
+                        dghpsi_zzz[ib] += gr_zzz * ang + vr * ang_zzz +
+                            3.0 * gr_zz * ang_z + 3.0 * gr_z * ang_zz;
+                    }
+                }
+            }
+        }
+    }
+
+    /** evaluate V
+     */
+    template <typename LAT, typename PosType, typename VT>
+    inline void
+    evaluateV(const LAT& lattice, const RealType r, const PosType& dr,
+        VT* restrict psi, PosType Tv)
+    {
+        int TransX, TransY, TransZ;
+
+        PosType dr_new;
+        RealType r_new;
+
+#if not defined(QMC_COMPLEX)
+        const ValueType correctphase = 1.0;
+#else
+
+        RealType phasearg = SuperTwist[0] * Tv[0] + SuperTwist[1] * Tv[1] +
+            SuperTwist[2] * Tv[2];
+        RealType s, c;
+        qmcplusplus::sincos(-phasearg, &s, &c);
+        const ValueType correctphase(c, s);
+
+#endif
+
+        RealType* restrict ylm_v = tempS.data(0);
+        RealType* restrict phi_r = tempS.data(1);
+
+        for (size_t ib = 0; ib < BasisSetSize; ++ib)
+            psi[ib] = 0;
+        // Phase_idx (iter) needs to be initialized at -1 as it has to be
+        // incremented first to comply with the if statement (r_new >=Rmax)
+        int iter = -1;
+        for (int i = 0; i <= PBCImages[0]; i++) // loop Translation over X
+        {
+            // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+            TransX = ((i % 2) * 2 - 1) * ((i + 1) / 2);
+            for (int j = 0; j <= PBCImages[1]; j++) // loop Translation over Y
+            {
+                // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                TransY = ((j % 2) * 2 - 1) * ((j + 1) / 2);
+                for (int k = 0; k <= PBCImages[2];
+                     k++) // loop Translation over Z
+                {
+                    // Allows to increment cells from 0,1,-1,2,-2,3,-3 etc...
+                    TransZ = ((k % 2) * 2 - 1) * ((k + 1) / 2);
+
+                    dr_new[0] = dr[0] +
+                        (TransX * lattice.R(0, 0) + TransY * lattice.R(1, 0) +
+                            TransZ * lattice.R(2, 0));
+                    dr_new[1] = dr[1] +
+                        (TransX * lattice.R(0, 1) + TransY * lattice.R(1, 1) +
+                            TransZ * lattice.R(2, 1));
+                    dr_new[2] = dr[2] +
+                        (TransX * lattice.R(0, 2) + TransY * lattice.R(1, 2) +
+                            TransZ * lattice.R(2, 2));
+
+                    r_new = std::sqrt(dot(dr_new, dr_new));
+                    iter++;
+                    if (r_new >= Rmax)
+                        continue;
+
+                    Ylm.evaluateV(-dr_new[0], -dr_new[1], -dr_new[2], ylm_v);
+                    MultiRnl.evaluate(r_new, phi_r);
+                    /// Phase for PBC containing the phase for the nearest image
+                    /// displacement and the correction due to the Distance
+                    /// table.
+                    const ValueType Phase =
+                        periodic_image_phase_factors[iter] * correctphase;
+                    for (size_t ib = 0; ib < BasisSetSize; ++ib)
+                        psi[ib] += ylm_v[LM[ib]] * phi_r[NL[ib]] * Phase;
+                }
+            }
+        }
+    }
+};
+
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
new file mode 100644
index 00000000000..78318edda3b
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.cpp
@@ -0,0 +1,466 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source
+// License. See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#include "SoaLocalizedBasisSetT.h"
+
+#include "MultiFunctorAdapter.h"
+#include "MultiQuinticSpline1D.h"
+#include "Numerics/SoaCartesianTensor.h"
+#include "Numerics/SoaSphericalTensor.h"
+#include "Particle/DistanceTable.h"
+#include "SoaAtomicBasisSetT.h"
+
+#include <memory>
+
+namespace qmcplusplus
+{
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    ParticleSet& ions, ParticleSet& els) :
+    ions_(ions),
+    myTableIndex(els.addTable(ions,
+        DTModes::NEED_FULL_TABLE_ANYTIME |
+            DTModes::NEED_VP_FULL_TABLE_ON_HOST)),
+    SuperTwist(0.0)
+{
+    NumCenters = ions.getTotalNum();
+    NumTargets = els.getTotalNum();
+    LOBasisSet.resize(ions.getSpeciesSet().getTotalNum());
+    BasisOffset.resize(NumCenters + 1);
+    BasisSetSize = 0;
+}
+
+template <class COT, typename ORBT>
+SoaLocalizedBasisSetT<COT, ORBT>::SoaLocalizedBasisSetT(
+    const SoaLocalizedBasisSetT& a) :
+    SoaBasisSetBase<ORBT>(a),
+    NumCenters(a.NumCenters),
+    NumTargets(a.NumTargets),
+    ions_(a.ions_),
+    myTableIndex(a.myTableIndex),
+    SuperTwist(a.SuperTwist),
+    BasisOffset(a.BasisOffset)
+{
+    LOBasisSet.reserve(a.LOBasisSet.size());
+    for (auto& elem : a.LOBasisSet)
+        LOBasisSet.push_back(std::make_unique<COT>(*elem));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setPBCParams(
+    const TinyVector<int, 3>& PBCImages, const TinyVector<double, 3> Sup_Twist,
+    const std::vector<ORBT>& phase_factor)
+{
+    for (int i = 0; i < LOBasisSet.size(); ++i)
+        LOBasisSet[i]->setPBCParams(PBCImages, Sup_Twist, phase_factor);
+
+    SuperTwist = Sup_Twist;
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::setBasisSetSize(int nbs)
+{
+    const auto& IonID(ions_.GroupID);
+    if (BasisSetSize > 0 && nbs == BasisSetSize)
+        return;
+
+    if (auto& mapping = ions_.get_map_storage_to_input(); mapping.empty()) {
+        // evaluate the total basis dimension and offset for each center
+        BasisOffset[0] = 0;
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c + 1] =
+                BasisOffset[c] + LOBasisSet[IonID[c]]->getBasisSetSize();
+        BasisSetSize = BasisOffset[NumCenters];
+    }
+    else {
+        // when particles are reordered due to grouping, AOs need to restore the
+        // input order to match MOs.
+        std::vector<int> map_input_to_storage(mapping.size());
+        for (int c = 0; c < NumCenters; c++)
+            map_input_to_storage[mapping[c]] = c;
+
+        std::vector<size_t> basis_offset_input_order(BasisOffset.size(), 0);
+        for (int c = 0; c < NumCenters; c++)
+            basis_offset_input_order[c + 1] = basis_offset_input_order[c] +
+                LOBasisSet[IonID[map_input_to_storage[c]]]->getBasisSetSize();
+
+        for (int c = 0; c < NumCenters; c++)
+            BasisOffset[c] = basis_offset_input_order[mapping[c]];
+
+        BasisSetSize = basis_offset_input_order[NumCenters];
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::queryOrbitalsForSType(
+    const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const
+{
+    const auto& IonID(ions_.GroupID);
+    for (int c = 0; c < NumCenters; c++) {
+        int idx = BasisOffset[c];
+        int bss = LOBasisSet[IonID[c]]->BasisSetSize;
+        std::vector<bool> local_is_s_orbital(bss);
+        LOBasisSet[IonID[c]]->queryOrbitalsForSType(local_is_s_orbital);
+        for (int k = 0; k < bss; k++) {
+            if (corrCenter[c]) {
+                is_s_orbital[idx++] = local_is_s_orbital[k];
+            }
+            else {
+                is_s_orbital[idx++] = false;
+            }
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGL(
+    const ParticleSet& P, int iat, vgl_type& vgl)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateVGL(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgl, Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateVGL(
+    const RefVectorWithLeader<ParticleSet>& P_list, int iat,
+    OffloadMWVGLArray& vgl_v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++) {
+        const auto& IonID(ions_.GroupID);
+        const auto& coordR = P_list[iw].activeR(iat);
+        const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
+        const auto& dist = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDists() :
+            d_table.getDistRow(iat);
+        const auto& displ = (P_list[iw].getActivePtcl() == iat) ?
+            d_table.getTempDispls() :
+            d_table.getDisplRow(iat);
+
+        PosType Tv;
+
+        // number of walkers * BasisSetSize
+        auto stride = vgl_v.size(1) * BasisSetSize;
+        assert(BasisSetSize == vgl_v.size(2));
+        vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);
+
+        for (int c = 0; c < NumCenters; c++) {
+            Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+            Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+            Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+            LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c],
+                displ[c], BasisOffset[c], vgl_iw, Tv);
+        }
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGH(
+    const ParticleSet& P, int iat, vgh_type& vgh)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateVGHGH(
+    const ParticleSet& P, int iat, vghgh_type& vghgh)
+{
+    // APP_ABORT("SoaLocalizedBasisSetT::evaluateVGH() not implemented\n");
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+    for (int c = 0; c < NumCenters; c++) {
+        LOBasisSet[IonID[c]]->evaluateVGHGH(
+            P.getLattice(), dist[c], displ[c], BasisOffset[c], vghgh);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateV(
+    const ParticleSet& P, int iat, ORBT* restrict vals)
+{
+    const auto& IonID(ions_.GroupID);
+    const auto& coordR = P.activeR(iat);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    for (int c = 0; c < NumCenters; c++) {
+        Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
+        Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
+        Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
+        LOBasisSet[IonID[c]]->evaluateV(
+            P.getLattice(), dist[c], displ[c], vals + BasisOffset[c], Tv);
+    }
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::mw_evaluateValue(
+    const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVArray& v)
+{
+    for (size_t iw = 0; iw < P_list.size(); iw++)
+        evaluateV(P_list[iw], iat, v.data_at(iw, 0));
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceV(const ParticleSet& P,
+    int iat, const ParticleSet& ions, int jion, vgl_type& vgl)
+{
+    // We need to zero out the temporary array vgl.
+    auto* restrict gx = vgl.data(1);
+    auto* restrict gy = vgl.data(2);
+    auto* restrict gz = vgl.data(3);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+    }
+
+    const auto& IonID(ions_.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    PosType Tv;
+    Tv[0] = Tv[1] = Tv[2] = 0;
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+    LOBasisSet[IonID[jion]]->evaluateVGL(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vgl, Tv);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::evaluateGradSourceVGL(const ParticleSet& P,
+    int iat, const ParticleSet& ions, int jion, vghgh_type& vghgh)
+{
+    // We need to zero out the temporary array vghgh.
+    auto* restrict gx = vghgh.data(1);
+    auto* restrict gy = vghgh.data(2);
+    auto* restrict gz = vghgh.data(3);
+
+    auto* restrict hxx = vghgh.data(4);
+    auto* restrict hxy = vghgh.data(5);
+    auto* restrict hxz = vghgh.data(6);
+    auto* restrict hyy = vghgh.data(7);
+    auto* restrict hyz = vghgh.data(8);
+    auto* restrict hzz = vghgh.data(9);
+
+    auto* restrict gxxx = vghgh.data(10);
+    auto* restrict gxxy = vghgh.data(11);
+    auto* restrict gxxz = vghgh.data(12);
+    auto* restrict gxyy = vghgh.data(13);
+    auto* restrict gxyz = vghgh.data(14);
+    auto* restrict gxzz = vghgh.data(15);
+    auto* restrict gyyy = vghgh.data(16);
+    auto* restrict gyyz = vghgh.data(17);
+    auto* restrict gyzz = vghgh.data(18);
+    auto* restrict gzzz = vghgh.data(19);
+
+    for (int ib = 0; ib < BasisSetSize; ib++) {
+        gx[ib] = 0;
+        gy[ib] = 0;
+        gz[ib] = 0;
+
+        hxx[ib] = 0;
+        hxy[ib] = 0;
+        hxz[ib] = 0;
+        hyy[ib] = 0;
+        hyz[ib] = 0;
+        hzz[ib] = 0;
+
+        gxxx[ib] = 0;
+        gxxy[ib] = 0;
+        gxxz[ib] = 0;
+        gxyy[ib] = 0;
+        gxyz[ib] = 0;
+        gxzz[ib] = 0;
+        gyyy[ib] = 0;
+        gyyz[ib] = 0;
+        gyzz[ib] = 0;
+        gzzz[ib] = 0;
+    }
+
+    // Since jion is indexed on the source ions not the ions_ the distinction
+    // between ions_ and ions is extremely important.
+    const auto& IonID(ions.GroupID);
+    const auto& d_table = P.getDistTableAB(myTableIndex);
+    const auto& dist = (P.getActivePtcl() == iat) ? d_table.getTempDists() :
+                                                    d_table.getDistRow(iat);
+    const auto& displ = (P.getActivePtcl() == iat) ? d_table.getTempDispls() :
+                                                     d_table.getDisplRow(iat);
+
+    // Since LCAO's are written only in terms of (r-R), ionic derivatives only
+    // exist for the atomic center that we wish to take derivatives of.
+    // Moreover, we can obtain an ion derivative by multiplying an electron
+    // derivative by -1.0.  Handling this sign is left to LCAOrbitalSet.  For
+    // now, just note this is the electron VGL function.
+
+    LOBasisSet[IonID[jion]]->evaluateVGHGH(
+        P.getLattice(), dist[jion], displ[jion], BasisOffset[jion], vghgh);
+}
+
+template <class COT, typename ORBT>
+void
+SoaLocalizedBasisSetT<COT, ORBT>::add(int icenter, std::unique_ptr<COT> aos)
+{
+    LOBasisSet[icenter] = std::move(aos);
+}
+
+// TODO: this should be redone with template template parameters
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaCartesianTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaCartesianTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<double>, SoaSphericalTensor<double>,
+        std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiQuinticSpline1D<float>, SoaSphericalTensor<float>,
+        std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<GaussianCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaCartesianTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaCartesianTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, double>,
+    double>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<double>>,
+        SoaSphericalTensor<double>, std::complex<double>>,
+    std::complex<double>>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, float>,
+    float>;
+template class SoaLocalizedBasisSetT<
+    SoaAtomicBasisSetT<MultiFunctorAdapter<SlaterCombo<float>>,
+        SoaSphericalTensor<float>, std::complex<float>>,
+    std::complex<float>>;
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
new file mode 100644
index 00000000000..d0c446fae0d
--- /dev/null
+++ b/src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSetT.h
@@ -0,0 +1,170 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by:
+//
+// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+/** @file SoaLocalizedBasisSetT.h
+ * @brief A derived class from BasisSetBase
+ *
+ * This is intended as a replacement for MolecularWaveFunctionComponent and
+ * any other localized basis set.
+ */
+#ifndef QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+#define QMCPLUSPLUS_SOA_LOCALIZEDBASISSETT_H
+
+#include <memory>
+#include "QMCWaveFunctions/BasisSetBase.h"
+#include "OMPTarget/OffloadAlignedAllocators.hpp"
+
+namespace qmcplusplus
+{
+/** A localized basis set derived from SoaBasisSetBase<ORBT>
+ *
+ * This class performs the evaluation of the basis functions and their
+ * derivatives for each of the N-particles in a configuration.
+ * The template parameter COT denotes Centered-Orbital-Type which provides
+ * a set of localized orbitals associated with a center.
+ * The template parameter ORBT denotes the orbital value return type
+ */
+template<class COT, typename ORBT>
+class SoaLocalizedBasisSetT : public SoaBasisSetBase<ORBT>
+{
+public:
+  using RealType  = typename COT::RealType;
+  using BaseType  = SoaBasisSetBase<ORBT>;
+  using ValueType = ORBT;
+
+  using vgl_type          = typename BaseType::vgl_type;
+  using vgh_type          = typename BaseType::vgh_type;
+  using vghgh_type        = typename BaseType::vghgh_type;
+  using PosType           = typename ParticleSet::PosType;
+  using OffloadMWVGLArray = typename BaseType::OffloadMWVGLArray;
+  using OffloadMWVArray   = typename BaseType::OffloadMWVArray;
+
+  using BaseType::BasisSetSize;
+
+  ///number of centers, e.g., ions
+  size_t NumCenters;
+  ///number of quantum particles
+  size_t NumTargets;
+  ///ion particle set
+  const ParticleSet& ions_;
+  ///number of quantum particles
+  const int myTableIndex;
+  ///Global Coordinate of Supertwist read from HDF5
+  PosType SuperTwist;
+
+
+  /** container to store the offsets of the basis functions for each center
+   * Due to potential reordering of ions, offsets can be in any order.
+   */
+  std::vector<size_t> BasisOffset;
+
+  /** container of the unique pointers to the Atomic Orbitals
+   *
+   * size of LOBasisSet = number  of unique centers
+   */
+  std::vector<std::unique_ptr<COT>> LOBasisSet;
+
+  /** constructor
+   * @param ions ionic system
+   * @param els electronic system
+   */
+  SoaLocalizedBasisSetT(ParticleSet& ions, ParticleSet& els);
+
+  /** copy constructor */
+  SoaLocalizedBasisSetT(const SoaLocalizedBasisSetT& a);
+
+  /** makeClone */
+  BaseType* makeClone() const override { return new SoaLocalizedBasisSetT<COT, ORBT>(*this); }
+
+  /** set Number of periodic Images to evaluate the orbitals. 
+      Set to 0 for non-PBC, and set manually in the input.
+      Passes the pre-computed phase factor for evaluation of complex wavefunction. If WF is real Phase_factor is real and equals 1 if gamma or -1 if non-Gamma.  
+  */
+  void setPBCParams(const TinyVector<int, 3>& PBCImages,
+                    const TinyVector<double, 3> Sup_Twist,
+                    const std::vector<ORBT>& phase_factor);
+
+  /** set BasisSetSize and allocate mVGL container
+   */
+  void setBasisSetSize(int nbs) override;
+
+  /**  Determine which orbitals are S-type.  Used by cusp correction.
+    */
+  void queryOrbitalsForSType(const std::vector<bool>& corrCenter, std::vector<bool>& is_s_orbital) const override;
+
+  /** compute VGL 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(5,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) override;
+
+  /** compute V using packed array with all walkers 
+   * @param P_list list of quantum particleset (one for each walker)
+   * @param iat active particle
+   * @param v   Array(n_walkers, BasisSetSize)
+   */
+  void mw_evaluateValue(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVArray& v) override;
+
+  /** compute VGL using packed array with all walkers 
+   * @param P_list list of quantum particleset (one for each walker)
+   * @param iat active particle
+   * @param vgl   Array(n_walkers, 5, BasisSetSize)
+   */
+  void mw_evaluateVGL(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVGLArray& vgl) override;
+
+  /** compute VGH 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vgl Matrix(10,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) override;
+
+  /** compute VGHGH 
+   * @param P quantum particleset
+   * @param iat active particle
+   * @param vghgh Matrix(20,BasisSetSize)
+   * @param trialMove if true, use getTempDists()/getTempDispls()
+   */
+  void evaluateVGHGH(const ParticleSet& P, int iat, vghgh_type& vghgh) override;
+
+  /** compute values for the iat-paricle move
+   *
+   * Always uses getTempDists() and getTempDispls()
+   * Tv is a translation vector; In PBC, in order to reduce the number
+   * of images that need to be summed over when generating the AO the 
+   * nearest image displacement, dr, is used. Tv corresponds to the 
+   * translation that takes the 'general displacement' (displacement
+   * between ion position and electron position) to the nearest image 
+   * displacement. We need to keep track of Tv because it must be add
+   * as a phase factor, i.e., exp(i*k*Tv).
+   */
+  void evaluateV(const ParticleSet& P, int iat, ORBT* restrict vals) override;
+
+  void evaluateGradSourceV(const ParticleSet& P, int iat, const ParticleSet& ions, int jion, vgl_type& vgl) override;
+
+  void evaluateGradSourceVGL(const ParticleSet& P,
+                             int iat,
+                             const ParticleSet& ions,
+                             int jion,
+                             vghgh_type& vghgh) override;
+
+  /** add a new set of Centered Atomic Orbitals
+   * @param icenter the index of the center
+   * @param aos a set of Centered Atomic Orbitals
+   */
+  void add(int icenter, std::unique_ptr<COT> aos);
+};
+} // namespace qmcplusplus
+#endif
diff --git a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
index b98952f7794..671fbd5487c 100644
--- a/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
+++ b/src/QMCWaveFunctions/SPOSetBuilderFactoryT.cpp
@@ -253,11 +253,11 @@ SPOSetBuilderFactoryT<T>::addSPOSet(std::unique_ptr<SPOSetT<T>> spo)
 template <typename T>
 std::string SPOSetBuilderFactoryT<T>::basisset_tag = "basisset";
 
-#ifdef QMC_COMPLEX
+// #ifdef QMC_COMPLEX
 template class SPOSetBuilderFactoryT<std::complex<double>>;
 template class SPOSetBuilderFactoryT<std::complex<float>>;
-#else
+// #else
 template class SPOSetBuilderFactoryT<double>;
 template class SPOSetBuilderFactoryT<float>;
-#endif
+// #endif
 } // namespace qmcplusplus