From 3dce32e450b6c181a0526f7fc7999a4ed893921f Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 21:31:10 -0500
Subject: [PATCH 01/65] Review EinsplineSetBuilder::AnalyzeTwists2

---
 .../EinsplineSetBuilderCommon.cpp             | 57 ++++++++++---------
 1 file changed, 31 insertions(+), 26 deletions(-)
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 2572243a6e..43ef9182e7 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -86,10 +86,7 @@ inline TinyVector<T, 3> FracPart(const TinyVector<T, 3>& twist)
 }
 
 
-EinsplineSetBuilder::~EinsplineSetBuilder()
-{
-  DEBUG_MEMORY("EinsplineSetBuilder::~EinsplineSetBuilder");
-}
+EinsplineSetBuilder::~EinsplineSetBuilder() { DEBUG_MEMORY("EinsplineSetBuilder::~EinsplineSetBuilder"); }
 
 
 bool EinsplineSetBuilder::CheckLattice()
@@ -405,25 +402,30 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
       S(i, j) = (double)TileMatrix(i, j);
-  std::vector<PosType> superFracs;
-  std::vector<std::vector<int>> superSets;
-  { // build super twists
-    // This holds to which supercell kpoint each primitive k-point belongs
-    std::vector<int> superIndex;
-    const int numPrimTwists = TwistAngles.size();
-    for (int ki = 0; ki < numPrimTwists; ki++)
+
+  const int num_prim_kpoints = TwistAngles.size();
+
+  // build a list of unique super twists that all the primitive cell k-point correspond to.
+  std::vector<PosType> superFracs; // twist super twist coordinates
+  std::vector<int>
+      superIndex; // the indices of the super twists that correpsond to all the primitive cell k-points in the unique list.
+  {
+    // scan all the primitive cell k-points
+    for (int ki = 0; ki < num_prim_kpoints; ki++)
     {
       PosType primTwist  = TwistAngles[ki];
       PosType superTwist = dot(S, primTwist);
       PosType kp         = PrimCell.k_cart(primTwist);
       PosType ks         = SuperCell.k_cart(superTwist);
+      // check the consistency of tiling, primitive and super cells.
       if (dot(ks - kp, ks - kp) > 1.0e-6)
       {
         app_error() << "Primitive and super k-points do not agree.  Error in coding.\n";
         APP_ABORT("EinsplineSetBuilder::AnalyzeTwists2");
       }
       PosType frac = FracPart(superTwist);
-      bool found   = false;
+      // verify if the super twist that correpsonds to this primitive cell k-point exists in the unique list or not.
+      bool found = false;
       for (int j = 0; j < superFracs.size(); j++)
       {
         PosType diff = frac - superFracs[j];
@@ -439,18 +441,14 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
         superFracs.push_back(frac);
       }
     }
-    const int numSuperTwists = superFracs.size();
-    app_log() << "Found " << numSuperTwists << " distinct supercell twists.\n";
-    // For each supercell twist, create a list of primitive twists which
-    // belong to it.
-    superSets.resize(numSuperTwists);
-    for (int ki = 0; ki < numPrimTwists; ki++)
-      superSets[superIndex[ki]].push_back(ki);
-    app_log() << "number of things" << std::endl;
-    app_log() << TwistSymmetry.size() << std::endl;
-    app_log() << TwistWeight.size() << std::endl;
-    //     for (int ki=0; ki<TwistSymmetry.size(); ki++)
-    //       fprintf (stderr, "%d %d %d\n",ki,TwistSymmetry[ki],TwistWeight[ki]);
+    assert(superIndex.size() == num_prim_kpoints);
+  }
+
+  const int numSuperTwists = superFracs.size();
+  {
+    app_log() << "Found " << numSuperTwists << " distinct supercell twist" << (numSuperTwists > 1 ? "s" : "")
+              << " based on " << num_prim_kpoints << " primitive cell k-point" << (num_prim_kpoints > 1 ? "s" : "")
+              << std::endl;
     if (myComm->rank() == 0)
     {
       int n_tot_irred(0);
@@ -466,9 +464,16 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
       }
     }
   }
-  const int numSuperTwists = superFracs.size();
 
-  { // determine twist_num_
+  // For each supercell twist, create a list of primitive twists which correspond to it.
+  std::vector<std::vector<int>> superSets;
+  {
+    superSets.resize(numSuperTwists);
+    for (int ki = 0; ki < num_prim_kpoints; ki++)
+      superSets[superIndex[ki]].push_back(ki);
+  }
+
+  { // look up a super cell twist and return its index in the unique list of super cell twists.
     std::function find_twist = [&](const TinyVector<double, OHMMS_DIM>& twist) {
       int twist_num  = -1;
       PosType gtFrac = FracPart(twist);

From 6b7570ddadf752d78edfe4d6857d8a0241944e1f Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 21:50:03 -0500
Subject: [PATCH 02/65] TwistAngle -> primcell_kpoints.

---
 .../BsplineFactory/BsplineReaderBase.cpp      |  6 ++--
 .../BsplineFactory/BsplineReaderBase.h        |  4 +--
 .../BsplineFactory/SplineSetReader.h          |  4 +--
 src/QMCWaveFunctions/EinsplineSetBuilder.h    |  3 +-
 .../EinsplineSetBuilderCommon.cpp             | 28 +++++++++----------
 .../EinsplineSetBuilderESHDF.fft.cpp          |  8 +++---
 .../EinsplineSetBuilderOld.cpp                | 10 +++----
 .../EinsplineSetBuilder_createSPOs.cpp        |  2 +-
 8 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp
index 2395d275c2..a387eab57f 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.cpp
@@ -220,10 +220,10 @@ void BsplineReaderBase::initialize_spo2band(int spin,
     int bi    = bigspace[i].BandIndex;
     double e  = bigspace[i].Energy;
     int nd    = (bigspace[i].MakeTwoCopies) ? 2 : 1;
-    PosType k = mybuilder->PrimCell.k_cart(mybuilder->TwistAngles[ti]);
+    PosType k = mybuilder->PrimCell.k_cart(mybuilder->primcell_kpoints[ti]);
     int s_size = std::snprintf(s.data(), s.size(), "%8d %8d %8d %8d %12.6f %7.4f %7.4f %7.4f %7.4f %7.4f %7.4f %6d\n",
-                               i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->TwistAngles[ti][0],
-                               mybuilder->TwistAngles[ti][1], mybuilder->TwistAngles[ti][2], nd);
+                               i, ns, ti, bi, e, k[0], k[1], k[2], mybuilder->primcell_kpoints[ti][0],
+                               mybuilder->primcell_kpoints[ti][1], mybuilder->primcell_kpoints[ti][2], nd);
     if (s_size < 0)
       throw std::runtime_error("Error generating bandinfo");
     o << s.data();
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
index a41fd60445..53fe185baa 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
@@ -113,7 +113,7 @@ struct BsplineReaderBase
     for (int iorb = 0; iorb < N; iorb++)
     {
       int ti                       = cur_bands[iorb].TwistIndex;
-      bspline->kPoints[iorb]       = mybuilder->PrimCell.k_cart(-mybuilder->TwistAngles[ti]);
+      bspline->kPoints[iorb]       = mybuilder->PrimCell.k_cart(-mybuilder->primcell_kpoints[ti]);
       bspline->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && cur_bands[iorb].MakeTwoCopies;
       num += bspline->MakeTwoCopies[iorb] ? 2 : 1;
     }
@@ -125,7 +125,7 @@ struct BsplineReaderBase
     if (!bspline->isComplex())
     {
       //no k-point folding, single special k point (G, L ...)
-      TinyVector<double, 3> twist0 = mybuilder->TwistAngles[bandgroup.TwistIndex];
+      TinyVector<double, 3> twist0 = mybuilder->primcell_kpoints[bandgroup.TwistIndex];
       for (int i = 0; i < 3; i++)
         if (bconds[i] && ((std::abs(std::abs(twist0[i]) - 0.5) < 1.0e-8)))
           bspline->HalfG[i] = 1;
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
index 7aedcf49b3..5d6153f7fa 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
@@ -259,7 +259,7 @@ struct SplineSetReader : public BsplineReaderBase
     if (bspline->isComplex())
     {
       if (rotate)
-        fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, mybuilder->TwistAngles[ti], rotate_phase_r,
+        fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, mybuilder->primcell_kpoints[ti], rotate_phase_r,
                              rotate_phase_i);
       else
       {
@@ -272,7 +272,7 @@ struct SplineSetReader : public BsplineReaderBase
     }
     else
     {
-      fix_phase_rotate_c2r(FFTbox, splineData_r, mybuilder->TwistAngles[ti], rotate_phase_r, rotate_phase_i);
+      fix_phase_rotate_c2r(FFTbox, splineData_r, mybuilder->primcell_kpoints[ti], rotate_phase_r, rotate_phase_i);
       einspline::set(spline_r, splineData_r.data());
     }
   }
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder.h b/src/QMCWaveFunctions/EinsplineSetBuilder.h
index f7e66293d3..ffcd9ccb7c 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder.h
@@ -214,7 +214,8 @@ class EinsplineSetBuilder : public SPOSetBuilder
   /////////////////////////////
   // The "true" twist number after analyzing twistnum, twist XML input and h5
   int twist_num_;
-  std::vector<TinyVector<double, OHMMS_DIM>> TwistAngles;
+  // primitive cell k-points from DFT calculations
+  std::vector<TinyVector<double, OHMMS_DIM>> primcell_kpoints;
   //     integer index of sym operation from the irreducible brillion zone
   std::vector<int> TwistSymmetry;
   //     number of twists equivalent to this one in the big DFT grid
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 43ef9182e7..30d20a32d1 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -196,10 +196,10 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
   //myComm->bcast(IonTypes);
   bbuffer.add(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons);
   //myComm->bcast(IonPos);
-  if (TwistAngles.size() != NumTwists)
-    TwistAngles.resize(NumTwists);
-  bbuffer.add(&TwistAngles[0][0], &TwistAngles[0][0] + OHMMS_DIM * NumTwists);
-  //myComm->bcast(TwistAngles);
+  if (primcell_kpoints.size() != NumTwists)
+    primcell_kpoints.resize(NumTwists);
+  bbuffer.add(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
+  //myComm->bcast(primcell_kpoints);
   if (TwistSymmetry.size() != NumTwists)
     TwistSymmetry.resize(NumTwists);
   bibuffer.add(&TwistSymmetry[0], &TwistSymmetry[0] + NumTwists);
@@ -236,7 +236,7 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
     for (int i = 0; i < numIons; ++i)
       bibuffer.get(IonTypes[i]);
     bbuffer.get(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons);
-    bbuffer.get(&TwistAngles[0][0], &TwistAngles[0][0] + OHMMS_DIM * NumTwists);
+    bbuffer.get(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
     bibuffer.get(&TwistSymmetry[0], &TwistSymmetry[0] + NumTwists);
     bibuffer.get(&TwistWeight[0], &TwistWeight[0] + NumTwists);
     bbuffer.get(MT_APW_radii.begin(), MT_APW_radii.end());
@@ -403,7 +403,7 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
     for (int j = 0; j < 3; j++)
       S(i, j) = (double)TileMatrix(i, j);
 
-  const int num_prim_kpoints = TwistAngles.size();
+  const int num_prim_kpoints = primcell_kpoints.size();
 
   // build a list of unique super twists that all the primitive cell k-point correspond to.
   std::vector<PosType> superFracs; // twist super twist coordinates
@@ -413,7 +413,7 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
     // scan all the primitive cell k-points
     for (int ki = 0; ki < num_prim_kpoints; ki++)
     {
-      PosType primTwist  = TwistAngles[ki];
+      PosType primTwist  = primcell_kpoints[ki];
       PosType superTwist = dot(S, primTwist);
       PosType kp         = PrimCell.k_cart(primTwist);
       PosType ks         = SuperCell.k_cart(superTwist);
@@ -577,12 +577,12 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
     int N = superSets[si].size();
     for (int i = 0; i < N; i++)
     {
-      PosType twistPrim_i  = TwistAngles[superSets[si][i]];
+      PosType twistPrim_i  = primcell_kpoints[superSets[si][i]];
       PosType twistSuper_i = dot(S, twistPrim_i);
       PosType superInt_i   = IntPart(twistSuper_i);
       for (int j = i + 1; j < N; j++)
       {
-        PosType twistPrim_j  = TwistAngles[superSets[si][j]];
+        PosType twistPrim_j  = primcell_kpoints[superSets[si][j]];
         PosType twistSuper_j = dot(S, twistPrim_j);
         PosType superInt_j   = IntPart(twistSuper_j);
         if (dot(superInt_i - superInt_j, superInt_i - superInt_j) < 1.0e-6)
@@ -606,12 +606,12 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
   for (int i = 0; i < IncludeTwists.size(); i++)
   {
     int ti          = IncludeTwists[i];
-    PosType twist_i = TwistAngles[ti];
+    PosType twist_i = primcell_kpoints[ti];
     bool distinct   = true;
     for (int j = i + 1; j < IncludeTwists.size(); j++)
     {
       int tj          = IncludeTwists[j];
-      PosType twist_j = TwistAngles[tj];
+      PosType twist_j = primcell_kpoints[tj];
       PosType sum     = twist_i + twist_j;
       PosType diff    = twist_i - twist_j;
       if (TwistPair(twist_i, twist_j))
@@ -628,11 +628,11 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
   {
     MakeTwoCopies[i] = false;
     int ti           = DistinctTwists[i];
-    PosType twist_i  = TwistAngles[ti];
+    PosType twist_i  = primcell_kpoints[ti];
     for (int j = 0; j < copyTwists.size(); j++)
     {
       int tj          = copyTwists[j];
-      PosType twist_j = TwistAngles[tj];
+      PosType twist_j = primcell_kpoints[tj];
       if (TwistPair(twist_i, twist_j))
         MakeTwoCopies[i] = true;
     }
@@ -652,7 +652,7 @@ void EinsplineSetBuilder::AnalyzeTwists2(const int twist_num_inp, const TinyVect
   for (int i = 0; i < DistinctTwists.size(); i++)
   {
     int ti        = DistinctTwists[i];
-    PosType twist = TwistAngles[ti];
+    PosType twist = primcell_kpoints[ti];
     for (int j = 0; j < OHMMS_DIM; j++)
       if (std::abs(twist[j] - 0.0) > MatchingTol && std::abs(twist[j] - 0.5) > MatchingTol &&
           std::abs(twist[j] + 0.5) > MatchingTol)
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index cccae49b56..32c61a674d 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -264,16 +264,16 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
   ///////////////////////////
   // Read the twist angles //
   ///////////////////////////
-  TwistAngles.resize(NumTwists);
+  primcell_kpoints.resize(NumTwists);
   TwistSymmetry.resize(NumTwists);
   TwistWeight.resize(NumTwists);
   for (int ti = 0; ti < NumTwists; ti++)
   {
     std::ostringstream path;
     path << "/electrons/kpoint_" << ti << "/reduced_k";
-    TinyVector<double, OHMMS_DIM> TwistAngles_DP;
-    H5File.read(TwistAngles_DP, path.str());
-    TwistAngles[ti] = TwistAngles_DP;
+    TinyVector<double, OHMMS_DIM> primcell_kpoints_DP;
+    H5File.read(primcell_kpoints_DP, path.str());
+    primcell_kpoints[ti] = primcell_kpoints_DP;
     if ((Version[0] >= 2) and (Version[1] >= 1))
     {
       std::ostringstream sym_path;
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
index 862dfaccc8..c4d316445a 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
@@ -138,7 +138,7 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
   ///////////////////////////
   // Read the twist angles //
   ///////////////////////////
-  TwistAngles.resize(NumTwists);
+  primcell_kpoints.resize(NumTwists);
   for (int ti = 0; ti < NumTwists; ti++)
   {
     std::ostringstream path;
@@ -146,11 +146,11 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
       path << eigenstatesGroup << "/twist_" << ti << "/twist_angle";
     else
       path << eigenstatesGroup << "/twist/twist_angle";
-    TinyVector<double, OHMMS_DIM> TwistAngles_DP;
-    H5File.read(TwistAngles_DP, path.str());
-    TwistAngles[ti] = TwistAngles_DP;
+    TinyVector<double, OHMMS_DIM> primcell_kpoints_DP;
+    H5File.read(primcell_kpoints_DP, path.str());
+    primcell_kpoints[ti] = primcell_kpoints_DP;
     int length      = std::snprintf(buff.data(), buff.size(), "  Found twist angle (%6.3f, %6.3f, %6.3f)\n",
-                               TwistAngles[ti][0], TwistAngles[ti][1], TwistAngles[ti][2]);
+                               primcell_kpoints[ti][0], primcell_kpoints[ti][1], primcell_kpoints[ti][2]);
     if (length < 0)
       throw std::runtime_error("Error converting twist angle to string");
     app_log() << std::string_view(buff.data(), length);
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
index 7e670592e6..223e689f58 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
@@ -346,7 +346,7 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
       for (int iorb = 0, num = 0; iorb < NumDistinctOrbitals; iorb++)
       {
         int ti                               = (*FullBands[spinSet])[iorb].TwistIndex;
-        temp_OrbitalSet->kPoints[iorb]       = PrimCell.k_cart(-TwistAngles[ti]);
+        temp_OrbitalSet->kPoints[iorb]       = PrimCell.k_cart(-primcell_kpoints[ti]);
         temp_OrbitalSet->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && (*FullBands[spinSet])[iorb].MakeTwoCopies;
         num += temp_OrbitalSet->MakeTwoCopies[iorb] ? 2 : 1;
       }

From ed56bdc4db62805f7897c5f0e78632ebeeff5a3e Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 21:57:18 -0500
Subject: [PATCH 03/65] Remove TwistSymmetry TwistWeight

---
 src/QMCWaveFunctions/EinsplineSetBuilder.h            |  4 ----
 src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp    |  9 ---------
 src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp | 11 -----------
 3 files changed, 24 deletions(-)

diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder.h b/src/QMCWaveFunctions/EinsplineSetBuilder.h
index ffcd9ccb7c..edfaf9fc89 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder.h
@@ -216,10 +216,6 @@ class EinsplineSetBuilder : public SPOSetBuilder
   int twist_num_;
   // primitive cell k-points from DFT calculations
   std::vector<TinyVector<double, OHMMS_DIM>> primcell_kpoints;
-  //     integer index of sym operation from the irreducible brillion zone
-  std::vector<int> TwistSymmetry;
-  //     number of twists equivalent to this one in the big DFT grid
-  std::vector<int> TwistWeight;
 
   TinyVector<int, OHMMS_DIM> TileFactor;
   Tensor<int, OHMMS_DIM> TileMatrix;
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 30d20a32d1..f35ffe73aa 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -199,13 +199,6 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
   if (primcell_kpoints.size() != NumTwists)
     primcell_kpoints.resize(NumTwists);
   bbuffer.add(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
-  //myComm->bcast(primcell_kpoints);
-  if (TwistSymmetry.size() != NumTwists)
-    TwistSymmetry.resize(NumTwists);
-  bibuffer.add(&TwistSymmetry[0], &TwistSymmetry[0] + NumTwists);
-  if (TwistWeight.size() != NumTwists)
-    TwistWeight.resize(NumTwists);
-  bibuffer.add(&TwistWeight[0], &TwistWeight[0] + NumTwists);
   bbuffer.add(MT_APW_radii.begin(), MT_APW_radii.end());
   bibuffer.add(MT_APW_lmax.begin(), MT_APW_lmax.end());
   bibuffer.add(MT_APW_num_radial_points.begin(), MT_APW_num_radial_points.end());
@@ -237,8 +230,6 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
       bibuffer.get(IonTypes[i]);
     bbuffer.get(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons);
     bbuffer.get(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
-    bibuffer.get(&TwistSymmetry[0], &TwistSymmetry[0] + NumTwists);
-    bibuffer.get(&TwistWeight[0], &TwistWeight[0] + NumTwists);
     bbuffer.get(MT_APW_radii.begin(), MT_APW_radii.end());
     bibuffer.get(MT_APW_lmax.begin(), MT_APW_lmax.end());
     bibuffer.get(MT_APW_num_radial_points.begin(), MT_APW_num_radial_points.end());
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index 32c61a674d..32f547a50b 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -265,8 +265,6 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
   // Read the twist angles //
   ///////////////////////////
   primcell_kpoints.resize(NumTwists);
-  TwistSymmetry.resize(NumTwists);
-  TwistWeight.resize(NumTwists);
   for (int ti = 0; ti < NumTwists; ti++)
   {
     std::ostringstream path;
@@ -274,15 +272,6 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
     TinyVector<double, OHMMS_DIM> primcell_kpoints_DP;
     H5File.read(primcell_kpoints_DP, path.str());
     primcell_kpoints[ti] = primcell_kpoints_DP;
-    if ((Version[0] >= 2) and (Version[1] >= 1))
-    {
-      std::ostringstream sym_path;
-      sym_path << "/electrons/kpoint_" << ti << "/symgroup";
-      H5File.readEntry(TwistSymmetry[ti], sym_path.str());
-      std::ostringstream nsym_path;
-      nsym_path << "/electrons/kpoint_" << ti << "/numsym";
-      H5File.readEntry(TwistWeight[ti], nsym_path.str());
-    }
   }
   if (qmc_common.use_density)
   {

From 302ba2d2dd927fa20a4f4549bbc01dd780837b9b Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 22:51:13 -0500
Subject: [PATCH 04/65] Remove EinsplineSet and AtomicOrbital,

---
 src/QMCWaveFunctions/AtomicOrbital.cpp        |  152 --
 src/QMCWaveFunctions/AtomicOrbital.h          |  657 ---------
 src/QMCWaveFunctions/BandInfo.h               |    4 +-
 .../BsplineFactory/BsplineReaderBase.h        |    7 +-
 .../BsplineFactory/SplineSetReader.h          |   57 -
 src/QMCWaveFunctions/CMakeLists.txt           |    4 -
 src/QMCWaveFunctions/EinsplineSet.cpp         | 1285 -----------------
 src/QMCWaveFunctions/EinsplineSet.h           |  359 -----
 src/QMCWaveFunctions/EinsplineSetBuilder.h    |   23 +-
 .../EinsplineSetBuilderCommon.cpp             |   94 +-
 .../EinsplineSetBuilderESHDF.fft.cpp          |   65 +-
 .../EinsplineSetBuilderOld.cpp                |   72 +-
 .../EinsplineSetBuilder_createSPOs.cpp        |  104 +-
 .../EinsplineSpinorSetBuilder.cpp             |    4 +-
 14 files changed, 17 insertions(+), 2870 deletions(-)
 delete mode 100644 src/QMCWaveFunctions/AtomicOrbital.cpp
 delete mode 100644 src/QMCWaveFunctions/AtomicOrbital.h
 delete mode 100644 src/QMCWaveFunctions/EinsplineSet.cpp
 delete mode 100644 src/QMCWaveFunctions/EinsplineSet.h

diff --git a/src/QMCWaveFunctions/AtomicOrbital.cpp b/src/QMCWaveFunctions/AtomicOrbital.cpp
deleted file mode 100644
index 2042461443..0000000000
--- a/src/QMCWaveFunctions/AtomicOrbital.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "AtomicOrbital.h"
-
-namespace qmcplusplus
-{
-template<>
-void AtomicOrbital<std::complex<double>>::allocate()
-{
-  Numlm = (lMax + 1) * (lMax + 1);
-  YlmVec.resize(Numlm);
-  dYlm_dthetaVec.resize(Numlm);
-  dYlm_dphiVec.resize(Numlm);
-  ulmVec.resize(Numlm * NumBands);
-  dulmVec.resize(Numlm * NumBands);
-  d2ulmVec.resize(Numlm * NumBands);
-  PolyCoefs.resize(PolyOrder + 1, NumBands, Numlm);
-  BCtype_z bc;
-  bc.lCode = NATURAL;
-  bc.rCode = NATURAL;
-  Ugrid grid;
-  grid.start = 0.0;
-  grid.end   = SplineRadius;
-  grid.num   = SplinePoints;
-  // if (RadialSpline) destroy_Bspline (RadialSpline);
-  RadialSpline = create_multi_UBspline_1d_z(grid, bc, Numlm * NumBands);
-  TwistAngles.resize(NumBands);
-}
-
-template<>
-void AtomicOrbital<double>::allocate()
-{
-  Numlm = (lMax + 1) * (lMax + 1);
-  YlmVec.resize(Numlm);
-  dYlm_dthetaVec.resize(Numlm);
-  dYlm_dphiVec.resize(Numlm);
-  ulmVec.resize(Numlm * NumBands);
-  dulmVec.resize(Numlm * NumBands);
-  d2ulmVec.resize(Numlm * NumBands);
-  PolyCoefs.resize(PolyOrder + 1, NumBands, Numlm);
-  BCtype_d bc;
-  bc.lCode = NATURAL;
-  bc.rCode = NATURAL;
-  Ugrid grid;
-  grid.start   = 0.0;
-  grid.end     = SplineRadius;
-  grid.num     = SplinePoints;
-  RadialSpline = create_multi_UBspline_1d_d(grid, bc, Numlm * NumBands);
-  TwistAngles.resize(NumBands);
-}
-
-
-template<>
-void AtomicOrbital<std::complex<double>>::set_band(int band,
-                                                   Array<std::complex<double>, 2>& spline_data,
-                                                   Array<std::complex<double>, 2>& poly_coefs,
-                                                   PosType twist)
-{
-  std::vector<std::complex<double>> one_spline(SplinePoints);
-  for (int lm = 0; lm < Numlm; lm++)
-  {
-    int index = band * Numlm + lm;
-    for (int i = 0; i < SplinePoints; i++)
-      one_spline[i] = spline_data(i, lm);
-    set_multi_UBspline_1d_z(RadialSpline, index, &one_spline[0]);
-    for (int n = 0; n <= PolyOrder; n++)
-      PolyCoefs(n, band, lm) = poly_coefs(n, lm);
-  }
-  TwistAngles[band] = twist;
-}
-
-
-// Here, we convert the complex Ylm representation to the real Ylm representation
-template<>
-void AtomicOrbital<double>::set_band(int band,
-                                     Array<std::complex<double>, 2>& spline_data,
-                                     Array<std::complex<double>, 2>& poly_coefs,
-                                     PosType twist)
-{
-  std::vector<double> one_spline(SplinePoints);
-  for (int l = 0; l <= lMax; l++)
-  {
-    // Set spline for m=0
-    for (int i = 0; i < SplinePoints; i++)
-      one_spline[i] = spline_data(i, l * (l + 1)).real();
-    int index = band * Numlm + l * (l + 1);
-    set_multi_UBspline_1d_d(RadialSpline, index, &one_spline[0]);
-    // Set poly ofr m=0
-    for (int n = 0; n <= PolyOrder; n++)
-      PolyCoefs(n, band, l * (l + 1)) = poly_coefs(n, l * (l + 1)).real();
-    // Set spline and poly for |m| > 0
-    double minus_1_to_m = -1.0;
-    for (int m = 1; m <= l; m++)
-    {
-      int lmp = l * (l + 1) + m;
-      int lmm = l * (l + 1) - m;
-      index   = band * Numlm + lmp;
-      for (int i = 0; i < SplinePoints; i++)
-        one_spline[i] = (spline_data(i, lmp).real() + minus_1_to_m * spline_data(i, lmm).real());
-      set_multi_UBspline_1d_d(RadialSpline, index, &one_spline[0]);
-      index = band * Numlm + lmm;
-      for (int i = 0; i < SplinePoints; i++)
-        one_spline[i] = (-spline_data(i, lmp).imag() + minus_1_to_m * spline_data(i, lmm).imag());
-      set_multi_UBspline_1d_d(RadialSpline, index, &one_spline[0]);
-      for (int n = 0; n <= PolyOrder; n++)
-      {
-        PolyCoefs(n, band, lmp) = (poly_coefs(n, lmp).real() + minus_1_to_m * poly_coefs(n, lmm).real());
-        PolyCoefs(n, band, lmm) = (-poly_coefs(n, lmp).imag() + minus_1_to_m * poly_coefs(n, lmm).imag());
-      }
-      minus_1_to_m *= -1.0;
-    }
-  }
-  TwistAngles[band] = twist;
-  // AtomicOrbital<std::complex<double> > zorb;
-  // zorb.set_pos (Pos);
-  // zorb.set_lmax(lMax);
-  // zorb.set_cutoff(CutoffRadius);
-  // zorb.set_spline(SplineRadius, SplinePoints);
-  // zorb.set_polynomial (PolyRadius, PolyOrder);
-  // zorb.set_num_bands(NumBands);
-  // zorb.allocate();
-  // zorb.set_band(band, spline_data, poly_coefs, twist);
-  // PosType dir(0.324, -0.8, 1.3);
-  // dir = (1.0/std::sqrt(dot(dir,dir)))*dir;
-  // std::ostringstream fname;
-  // fname << "TestAtomic_" << band << ".dat";
-  // FILE *fout = fopen (fname.str().c_str(), "w");
-  // Vector<double> zval(NumBands), val(NumBands);
-  // Vector<double> zlapl(NumBands), lapl(NumBands);
-  // Vector<PosType> zgrad(NumBands), grad(NumBands);
-  // for (double u=-1.00001; u<=1.0; u+= 0.001) {
-  //   PosType r = u*CutoffRadius * dir + Pos;
-  //   zorb.evaluate(r, zval, zgrad, zlapl);
-  //   evaluate(r, val, grad, lapl);
-  //   fprintf (fout, "%12.8f %12.8f %12.8f  %14.8e %14.8e\n",
-  // 	       r[0], r[1], r[2], lapl[band], lapl[band]);
-  // }
-  // fclose (fout);
-}
-} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/AtomicOrbital.h b/src/QMCWaveFunctions/AtomicOrbital.h
deleted file mode 100644
index 1465d1c98b..0000000000
--- a/src/QMCWaveFunctions/AtomicOrbital.h
+++ /dev/null
@@ -1,657 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#ifndef ATOMIC_ORBITAL_H
-#define ATOMIC_ORBITAL_H
-
-#include "CPU/math.hpp"
-#include "einspline/multi_bspline.h"
-#include "QMCWaveFunctions/SPOSet.h"
-#include "Lattice/CrystalLattice.h"
-#include <Configuration.h>
-#include "Utilities/TimerManager.h"
-
-
-namespace qmcplusplus
-{
-/******************************************************************
-// This is just a template trick to avoid template specialization //
-// in AtomicOrbital.                                              //
-******************************************************************/
-
-template<typename StorageType>
-struct AtomicOrbitalTraits
-{};
-template<>
-struct AtomicOrbitalTraits<double>
-{
-  using SplineType = multi_UBspline_1d_d;
-};
-template<>
-struct AtomicOrbitalTraits<std::complex<double>>
-{
-  using SplineType = multi_UBspline_1d_z;
-};
-
-inline void EinsplineMultiEval(multi_UBspline_1d_d* spline, double x, double* val)
-{
-  eval_multi_UBspline_1d_d(spline, x, val);
-}
-inline void EinsplineMultiEval(multi_UBspline_1d_z* spline, double x, std::complex<double>* val)
-{
-  eval_multi_UBspline_1d_z(spline, x, val);
-}
-inline void EinsplineMultiEval(multi_UBspline_1d_d* spline, double x, double* val, double* grad, double* lapl)
-{
-  eval_multi_UBspline_1d_d_vgl(spline, x, val, grad, lapl);
-}
-inline void EinsplineMultiEval(multi_UBspline_1d_z* spline,
-                               double x,
-                               std::complex<double>* val,
-                               std::complex<double>* grad,
-                               std::complex<double>* lapl)
-{
-  eval_multi_UBspline_1d_z_vgl(spline, x, val, grad, lapl);
-}
-
-
-template<typename StorageType>
-class AtomicOrbital
-{
-public:
-  using PosType            = QMCTraits::PosType;
-  using RealType           = QMCTraits::RealType;
-  using UnitCellType       = CrystalLattice<RealType, OHMMS_DIM>;
-  using RealValueVector    = Vector<double>;
-  using RealGradVector     = Vector<TinyVector<double, OHMMS_DIM>>;
-  using ComplexValueVector = Vector<std::complex<double>>;
-  using ComplexGradVector  = Vector<TinyVector<std::complex<double>, OHMMS_DIM>>;
-  using RealHessVector     = Vector<Tensor<double, OHMMS_DIM>>;
-  using ComplexHessVector  = Vector<Tensor<std::complex<double>, OHMMS_DIM>>;
-  using SplineType         = typename AtomicOrbitalTraits<StorageType>::SplineType;
-
-private:
-  // Store in order
-  // Index = l*(l+1) + m.  There are (lMax+1)^2 Ylm's
-  std::vector<StorageType> YlmVec, dYlm_dthetaVec, dYlm_dphiVec, ulmVec, dulmVec, d2ulmVec;
-
-  SplineType* RadialSpline;
-  // The first index is n in r^n, the second is lm = l*(l+1)+m
-  Array<StorageType, 3> PolyCoefs;
-  NewTimer &YlmTimer, &SplineTimer, &SumTimer;
-  RealType rmagLast;
-  std::vector<PosType> TwistAngles;
-
-public:
-  PosType Pos;
-  RealType CutoffRadius, SplineRadius, PolyRadius;
-  int SplinePoints;
-  int PolyOrder;
-  int lMax, Numlm, NumBands;
-  UnitCellType Lattice;
-
-  inline void set_pos(PosType pos) { Pos = pos; }
-  inline void set_lmax(int lmax) { lMax = lmax; }
-  inline void set_cutoff(RealType cutoff) { CutoffRadius = cutoff; }
-  inline void set_spline(RealType radius, int points)
-  {
-    SplineRadius = radius;
-    SplinePoints = points;
-  }
-  inline void set_polynomial(RealType radius, int order)
-  {
-    PolyRadius = radius;
-    PolyOrder  = order;
-  }
-  inline void set_num_bands(int num_bands) { NumBands = num_bands; }
-  SplineType* get_radial_spline() { return RadialSpline; }
-  Array<StorageType, 3>& get_poly_coefs() { return PolyCoefs; }
-
-  inline void registerTimers()
-  {
-    YlmTimer.reset();
-    SplineTimer.reset();
-  }
-
-  void allocate();
-
-  void set_band(int band,
-                Array<std::complex<double>, 2>& spline_data,
-                Array<std::complex<double>, 2>& poly_coefs,
-                PosType twist);
-  inline void CalcYlm(PosType rhat,
-                      std::vector<std::complex<double>>& Ylm,
-                      std::vector<std::complex<double>>& dYlm_dtheta,
-                      std::vector<std::complex<double>>& dYlm_dphi);
-
-  inline void CalcYlm(PosType rhat,
-                      std::vector<double>& Ylm,
-                      std::vector<double>& dYlm_dtheta,
-                      std::vector<double>& dYlm_dphi);
-
-  inline bool evaluate(PosType r, ComplexValueVector& vals);
-  inline bool evaluate(PosType r, ComplexValueVector& val, ComplexGradVector& grad, ComplexValueVector& lapl);
-  inline bool evaluate(PosType r, ComplexValueVector& val, ComplexGradVector& grad, ComplexHessVector& lapl);
-  inline bool evaluate(PosType r, RealValueVector& vals);
-  inline bool evaluate(PosType r, RealValueVector& val, RealGradVector& grad, RealValueVector& lapl);
-  inline bool evaluate(PosType r, RealValueVector& val, RealGradVector& grad, RealHessVector& lapl);
-
-
-  AtomicOrbital()
-      : RadialSpline(NULL),
-        YlmTimer(createGlobalTimer("AtomicOrbital::CalcYlm")),
-        SplineTimer(createGlobalTimer("AtomicOrbital::1D spline")),
-        SumTimer(createGlobalTimer("AtomicOrbital::Summation")),
-        rmagLast(std::numeric_limits<RealType>::max())
-  {
-    // Nothing else for now
-  }
-};
-
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r, ComplexValueVector& vals)
-{
-  PosType dr = r - Pos;
-  PosType u  = Lattice.toUnit(dr);
-  PosType img;
-  for (int i = 0; i < OHMMS_DIM; i++)
-  {
-    img[i] = round(u[i]);
-    u[i] -= img[i];
-  }
-  dr        = Lattice.toCart(u);
-  double r2 = dot(dr, dr);
-  if (r2 > CutoffRadius * CutoffRadius)
-    return false;
-  double rmag  = std::sqrt(r2);
-  PosType rhat = (1.0 / rmag) * dr;
-  // Evaluate Ylm's
-  CalcYlm(rhat, YlmVec, dYlm_dthetaVec, dYlm_dphiVec);
-  if (std::abs(rmag - rmagLast) > 1.0e-6)
-  {
-    // Evaluate radial functions
-    if (rmag > PolyRadius)
-      EinsplineMultiEval(RadialSpline, rmag, &(ulmVec[0]));
-    else
-    {
-      for (int index = 0; index < ulmVec.size(); index++)
-        ulmVec[index] = StorageType();
-      double r2n = 1.0;
-      for (int n = 0; n <= PolyOrder; n++)
-      {
-        int index = 0;
-        for (int i = 0; i < vals.size(); i++)
-          for (int lm = 0; lm < Numlm; lm++)
-            ulmVec[index++] += r2n * PolyCoefs(n, i, lm);
-        r2n *= rmag;
-      }
-    }
-    rmagLast = rmag;
-  }
-  SumTimer.start();
-  int index = 0;
-  for (int i = 0; i < vals.size(); i++)
-  {
-    vals[i] = std::complex<double>();
-    for (int lm = 0; lm < Numlm; lm++)
-      vals[i] += ulmVec[index++] * YlmVec[lm];
-    double phase = -2.0 * M_PI * dot(TwistAngles[i], img);
-    // fprintf (stderr, "phase[%d] = %1.2f pi\n", i, phase/M_PI);
-    // fprintf (stderr, "img = [%f,%f,%f]\n", img[0], img[1], img[2]);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    vals[i] *= std::complex<double>(c, s);
-  }
-  SumTimer.stop();
-  return true;
-}
-
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r, RealValueVector& vals)
-{
-  PosType dr = r - Pos;
-  PosType u  = Lattice.toUnit(dr);
-  PosType img;
-  for (int i = 0; i < OHMMS_DIM; i++)
-  {
-    img[i] = round(u[i]);
-    u[i] -= img[i];
-  }
-  dr        = Lattice.toCart(u);
-  double r2 = dot(dr, dr);
-  if (r2 > CutoffRadius * CutoffRadius)
-    return false;
-  double rmag  = std::sqrt(r2);
-  PosType rhat = (1.0 / rmag) * dr;
-  // Evaluate Ylm's
-  CalcYlm(rhat, YlmVec, dYlm_dthetaVec, dYlm_dphiVec);
-  if (std::abs(rmag - rmagLast) > 1.0e-6)
-  {
-    // Evaluate radial functions
-    if (rmag > PolyRadius)
-    {
-      SplineTimer.start();
-      EinsplineMultiEval(RadialSpline, rmag, &(ulmVec[0]));
-      SplineTimer.stop();
-    }
-    else
-    {
-      for (int index = 0; index < ulmVec.size(); index++)
-        ulmVec[index] = StorageType();
-      double r2n = 1.0;
-      for (int n = 0; n <= PolyOrder; n++)
-      {
-        int index = 0;
-        for (int i = 0; i < vals.size(); i++)
-          for (int lm = 0; lm < Numlm; lm++)
-            ulmVec[index++] += r2n * PolyCoefs(n, i, lm);
-        r2n *= rmag;
-      }
-    }
-    rmagLast = rmag;
-  }
-  SumTimer.start();
-  int index = 0;
-  for (int i = 0; i < vals.size(); i++)
-  {
-    vals[i]         = 0.0;
-    StorageType tmp = 0.0;
-    for (int lm = 0; lm < Numlm; lm++, index++)
-      tmp += ulmVec[index] * YlmVec[lm];
-    //vals[i] += real(ulmVec[index++] * YlmVec[lm]);
-    // vals[i] += (ulmVec[index].real() * YlmVec[lm].real() -
-    // 	    ulmVec[index].imag() * YlmVec[lm].imag());
-    double phase = -2.0 * M_PI * dot(TwistAngles[i], img);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    vals[i] = real(std::complex<double>(c, s) * tmp);
-  }
-  SumTimer.stop();
-  return true;
-}
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r,
-                                                 RealValueVector& vals,
-                                                 RealGradVector& grads,
-                                                 RealHessVector& hess)
-{
-  APP_ABORT(" AtomicOrbital<StorageType>::evaluate not implemented for Hess. \n");
-  return true;
-}
-
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r,
-                                                 RealValueVector& vals,
-                                                 RealGradVector& grads,
-                                                 RealValueVector& lapl)
-{
-  PosType dr = r - Pos;
-  PosType u  = Lattice.toUnit(dr);
-  PosType img;
-  for (int i = 0; i < OHMMS_DIM; i++)
-  {
-    img[i] = round(u[i]);
-    u[i] -= img[i];
-  }
-  dr        = Lattice.toCart(u);
-  double r2 = dot(dr, dr);
-  if (r2 > CutoffRadius * CutoffRadius)
-    return false;
-  double rmag      = std::sqrt(r2);
-  double rInv      = 1.0 / rmag;
-  PosType rhat     = rInv * dr;
-  double costheta  = rhat[2];
-  double sintheta  = std::sqrt(1.0 - costheta * costheta);
-  double cosphi    = rhat[0] / sintheta;
-  double sinphi    = rhat[1] / sintheta;
-  PosType thetahat = PosType(costheta * cosphi, costheta * sinphi, -sintheta);
-  PosType phihat   = PosType(-sinphi, cosphi, 0.0);
-  // Evaluate Ylm's
-  CalcYlm(rhat, YlmVec, dYlm_dthetaVec, dYlm_dphiVec);
-  // Evaluate radial functions
-  if (rmag > PolyRadius)
-  {
-    SplineTimer.start();
-    EinsplineMultiEval(RadialSpline, rmag, &(ulmVec[0]), &(dulmVec[0]), &(d2ulmVec[0]));
-    SplineTimer.stop();
-  }
-  else
-  {
-    for (int index = 0; index < ulmVec.size(); index++)
-    {
-      ulmVec[index]   = StorageType();
-      dulmVec[index]  = StorageType();
-      d2ulmVec[index] = StorageType();
-    }
-    double r2n = 1.0, r2nm1 = 0.0, r2nm2 = 0.0;
-    double dn   = 0.0;
-    double dnm1 = -1.0;
-    for (int n = 0; n <= PolyOrder; n++)
-    {
-      int index = 0;
-      for (int i = 0; i < vals.size(); i++)
-        for (int lm = 0; lm < Numlm; lm++, index++)
-        {
-          StorageType c = PolyCoefs(n, i, lm);
-          ulmVec[index] += r2n * c;
-          dulmVec[index] += dn * r2nm1 * c;
-          d2ulmVec[index] += dn * dnm1 * r2nm2 * c;
-        }
-      dn += 1.0;
-      dnm1 += 1.0;
-      r2nm2 = r2nm1;
-      r2nm1 = r2n;
-      r2n *= rmag;
-    }
-  }
-  SumTimer.start();
-  int index = 0;
-  for (int i = 0; i < vals.size(); i++)
-  {
-    vals[i] = 0.0;
-    for (int j = 0; j < OHMMS_DIM; j++)
-      grads[i][j] = 0.0;
-    lapl[i] = 0.0;
-    // Compute e^{-i k.L} phase factor
-    double phase = -2.0 * M_PI * dot(TwistAngles[i], img);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e2mikr(c, s);
-    StorageType tmp_val, tmp_lapl, grad_rhat, grad_thetahat, grad_phihat;
-    tmp_val = tmp_lapl = grad_rhat = grad_thetahat = grad_phihat = StorageType();
-    int lm                                                       = 0;
-    for (int l = 0; l <= lMax; l++)
-      for (int m = -l; m <= l; m++, lm++, index++)
-      {
-        std::complex<double> im(0.0, (double)m);
-        tmp_val += ulmVec[index] * YlmVec[lm];
-        grad_rhat += dulmVec[index] * YlmVec[lm];
-        grad_thetahat += ulmVec[index] * rInv * dYlm_dthetaVec[lm];
-        grad_phihat += (ulmVec[index] * dYlm_dphiVec[lm]) / (rmag * sintheta);
-        //grad_phihat += (ulmVec[index] * im *YlmVec[lm])/(rmag*sintheta);
-        tmp_lapl += YlmVec[lm] *
-            (-(double)(l * (l + 1)) * rInv * rInv * ulmVec[index] + d2ulmVec[index] + 2.0 * rInv * dulmVec[index]);
-      }
-    vals[i]  = real(e2mikr * tmp_val);
-    lapl[i]  = real(e2mikr * tmp_lapl);
-    grads[i] = (real(e2mikr * grad_rhat) * rhat + real(e2mikr * grad_thetahat) * thetahat +
-                real(e2mikr * grad_phihat) * phihat);
-  }
-  SumTimer.stop();
-  rmagLast = rmag;
-  return true;
-}
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r,
-                                                 ComplexValueVector& vals,
-                                                 ComplexGradVector& grads,
-                                                 ComplexHessVector& hess)
-{
-  APP_ABORT(" AtomicOrbital<StorageType>::evaluate not implemented for Hess. \n");
-  return true;
-}
-
-template<typename StorageType>
-inline bool AtomicOrbital<StorageType>::evaluate(PosType r,
-                                                 ComplexValueVector& vals,
-                                                 ComplexGradVector& grads,
-                                                 ComplexValueVector& lapl)
-{
-  PosType dr = r - Pos;
-  PosType u  = Lattice.toUnit(dr);
-  PosType img;
-  for (int i = 0; i < OHMMS_DIM; i++)
-  {
-    img[i] = round(u[i]);
-    u[i] -= img[i];
-  }
-  dr        = Lattice.toCart(u);
-  double r2 = dot(dr, dr);
-  if (r2 > CutoffRadius * CutoffRadius)
-    return false;
-  double rmag      = std::sqrt(r2);
-  double rInv      = 1.0 / rmag;
-  PosType rhat     = rInv * dr;
-  double costheta  = rhat[2];
-  double sintheta  = std::sqrt(1.0 - costheta * costheta);
-  double cosphi    = rhat[0] / sintheta;
-  double sinphi    = rhat[1] / sintheta;
-  PosType thetahat = PosType(costheta * cosphi, costheta * sinphi, -sintheta);
-  PosType phihat   = PosType(-sinphi, cosphi, 0.0);
-  // Evaluate Ylm's
-  CalcYlm(rhat, YlmVec, dYlm_dthetaVec, dYlm_dphiVec);
-  // Evaluate radial functions
-  if (rmag > PolyRadius)
-  {
-    SplineTimer.start();
-    EinsplineMultiEval(RadialSpline, rmag, &(ulmVec[0]), &(dulmVec[0]), &(d2ulmVec[0]));
-    SplineTimer.stop();
-  }
-  else
-  {
-    for (int index = 0; index < ulmVec.size(); index++)
-    {
-      ulmVec[index]   = StorageType();
-      dulmVec[index]  = StorageType();
-      d2ulmVec[index] = StorageType();
-    }
-    double r2n = 1.0, r2nm1 = 0.0, r2nm2 = 0.0;
-    double dn   = 0.0;
-    double dnm1 = -1.0;
-    for (int n = 0; n <= PolyOrder; n++)
-    {
-      int index = 0;
-      for (int i = 0; i < vals.size(); i++)
-        for (int lm = 0; lm < Numlm; lm++, index++)
-        {
-          StorageType c = PolyCoefs(n, i, lm);
-          ulmVec[index] += r2n * c;
-          dulmVec[index] += dn * r2nm1 * c;
-          d2ulmVec[index] += dn * dnm1 * r2nm2 * c;
-        }
-      dn += 1.0;
-      dnm1 += 1.0;
-      r2nm2 = r2nm1;
-      r2nm1 = r2n;
-      r2n *= rmag;
-    }
-  }
-  SumTimer.start();
-  int index = 0;
-  for (int i = 0; i < vals.size(); i++)
-  {
-    vals[i] = 0.0;
-    for (int j = 0; j < OHMMS_DIM; j++)
-      grads[i][j] = 0.0;
-    lapl[i] = 0.0;
-    int lm  = 0;
-    StorageType grad_rhat, grad_thetahat, grad_phihat;
-    // Compute e^{-i k.L} phase factor
-    double phase = -2.0 * M_PI * dot(TwistAngles[i], img);
-    double s, c;
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e2mikr(c, s);
-    for (int l = 0; l <= lMax; l++)
-      for (int m = -l; m <= l; m++, lm++, index++)
-      {
-        std::complex<double> im(0.0, (double)m);
-        vals[i] += ulmVec[index] * YlmVec[lm];
-        grad_rhat += dulmVec[index] * YlmVec[lm];
-        grad_thetahat += ulmVec[index] * rInv * dYlm_dthetaVec[lm];
-        grad_phihat += (ulmVec[index] * im * YlmVec[lm]) / (rmag * sintheta);
-        lapl[i] += YlmVec[lm] *
-            (-(double)(l * (l + 1)) * rInv * rInv * ulmVec[index] + d2ulmVec[index] + 2.0 * rInv * dulmVec[index]);
-      }
-    vals[i] *= e2mikr;
-    lapl[i] *= e2mikr;
-    for (int j = 0; j < OHMMS_DIM; j++)
-    {
-      grads[i][j] = e2mikr * (grad_rhat * rhat[j] + grad_thetahat * thetahat[j] + grad_phihat * phihat[j]);
-    }
-  }
-  SumTimer.stop();
-  rmagLast = rmag;
-  return true;
-}
-
-
-// Fast implementation
-// See Geophys. J. Int. (1998) 135,pp.307-309
-template<typename StorageType>
-inline void AtomicOrbital<StorageType>::CalcYlm(PosType rhat,
-                                                std::vector<std::complex<double>>& Ylm,
-                                                std::vector<std::complex<double>>& dYlm_dtheta,
-                                                std::vector<std::complex<double>>& dYlm_dphi)
-{
-  YlmTimer.start();
-  const double fourPiInv = 0.0795774715459477;
-  double costheta        = rhat[2];
-  double sintheta        = std::sqrt(1.0 - costheta * costheta);
-  double cottheta        = costheta / sintheta;
-  double cosphi, sinphi;
-  cosphi = rhat[0] / sintheta;
-  sinphi = rhat[1] / sintheta;
-  std::complex<double> e2iphi(cosphi, sinphi);
-  double lsign = 1.0;
-  double dl    = 0.0;
-  std::vector<double> XlmVec(2 * lMax + 1), dXlmVec(2 * lMax + 1);
-  for (int l = 0; l <= lMax; l++)
-  {
-    XlmVec[2 * l]  = lsign;
-    dXlmVec[2 * l] = dl * cottheta * XlmVec[2 * l];
-    XlmVec[0]      = lsign * XlmVec[2 * l];
-    dXlmVec[0]     = lsign * dXlmVec[2 * l];
-    double dm      = dl;
-    double msign   = lsign;
-    for (int m = l; m > 0; m--)
-    {
-      double tmp         = std::sqrt((dl + dm) * (dl - dm + 1.0));
-      XlmVec[l + m - 1]  = -(dXlmVec[l + m] + dm * cottheta * XlmVec[l + m]) / tmp;
-      dXlmVec[l + m - 1] = (dm - 1.0) * cottheta * XlmVec[l + m - 1] + XlmVec[l + m] * tmp;
-      // Copy to negative m
-      XlmVec[l - (m - 1)]  = -msign * XlmVec[l + m - 1];
-      dXlmVec[l - (m - 1)] = -msign * dXlmVec[l + m - 1];
-      msign *= -1.0;
-      dm -= 1.0;
-    }
-    double sum = 0.0;
-    for (int m = -l; m <= l; m++)
-      sum += XlmVec[l + m] * XlmVec[l + m];
-    // Now, renormalize the Ylms for this l
-    double norm = std::sqrt((2.0 * dl + 1.0) * fourPiInv / sum);
-    for (int m = -l; m <= l; m++)
-    {
-      XlmVec[l + m] *= norm;
-      dXlmVec[l + m] *= norm;
-    }
-    // Multiply by azimuthal phase and store in Ylm
-    std::complex<double> e2imphi(1.0, 0.0);
-    std::complex<double> eye(0.0, 1.0);
-    for (int m = 0; m <= l; m++)
-    {
-      Ylm[l * (l + 1) + m]         = XlmVec[l + m] * e2imphi;
-      Ylm[l * (l + 1) - m]         = XlmVec[l - m] * qmcplusplus::conj(e2imphi);
-      dYlm_dphi[l * (l + 1) + m]   = (double)m * eye * XlmVec[l + m] * e2imphi;
-      dYlm_dphi[l * (l + 1) - m]   = -(double)m * eye * XlmVec[l - m] * qmcplusplus::conj(e2imphi);
-      dYlm_dtheta[l * (l + 1) + m] = dXlmVec[l + m] * e2imphi;
-      dYlm_dtheta[l * (l + 1) - m] = dXlmVec[l - m] * qmcplusplus::conj(e2imphi);
-      e2imphi *= e2iphi;
-    }
-    dl += 1.0;
-    lsign *= -1.0;
-  }
-  YlmTimer.stop();
-}
-
-// Fast implementation
-// See Geophys. J. Int. (1998) 135,pp.307-309
-template<typename StorageType>
-inline void AtomicOrbital<StorageType>::CalcYlm(PosType rhat,
-                                                std::vector<double>& Ylm,
-                                                std::vector<double>& dYlm_dtheta,
-                                                std::vector<double>& dYlm_dphi)
-{
-  YlmTimer.start();
-  const double fourPiInv = 0.0795774715459477;
-  double costheta        = rhat[2];
-  double sintheta        = std::sqrt(1.0 - costheta * costheta);
-  double cottheta        = costheta / sintheta;
-  double cosphi, sinphi;
-  cosphi = rhat[0] / sintheta;
-  sinphi = rhat[1] / sintheta;
-  std::complex<double> e2iphi(cosphi, sinphi);
-  double lsign = 1.0;
-  double dl    = 0.0;
-  std::vector<double> XlmVec(2 * lMax + 1), dXlmVec(2 * lMax + 1);
-  for (int l = 0; l <= lMax; l++)
-  {
-    XlmVec[2 * l]  = lsign;
-    dXlmVec[2 * l] = dl * cottheta * XlmVec[2 * l];
-    XlmVec[0]      = lsign * XlmVec[2 * l];
-    dXlmVec[0]     = lsign * dXlmVec[2 * l];
-    double dm      = dl;
-    double msign   = lsign;
-    for (int m = l; m > 0; m--)
-    {
-      double tmp         = std::sqrt((dl + dm) * (dl - dm + 1.0));
-      XlmVec[l + m - 1]  = -(dXlmVec[l + m] + dm * cottheta * XlmVec[l + m]) / tmp;
-      dXlmVec[l + m - 1] = (dm - 1.0) * cottheta * XlmVec[l + m - 1] + XlmVec[l + m] * tmp;
-      // Copy to negative m
-      XlmVec[l - (m - 1)]  = -msign * XlmVec[l + m - 1];
-      dXlmVec[l - (m - 1)] = -msign * dXlmVec[l + m - 1];
-      msign *= -1.0;
-      dm -= 1.0;
-    }
-    double sum = 0.0;
-    for (int m = -l; m <= l; m++)
-      sum += XlmVec[l + m] * XlmVec[l + m];
-    // Now, renormalize the Ylms for this l
-    double norm = std::sqrt((2.0 * dl + 1.0) * fourPiInv / sum);
-    for (int m = -l; m <= l; m++)
-    {
-      XlmVec[l + m] *= norm;
-      dXlmVec[l + m] *= norm;
-    }
-    // Multiply by azimuthal phase and store in Ylm
-    Ylm[l * (l + 1)]             = XlmVec[l];
-    dYlm_dphi[l * (l + 1)]       = 0.0;
-    dYlm_dtheta[l * (l + 1)]     = dXlmVec[l];
-    std::complex<double> e2imphi = e2iphi;
-    for (int m = 1; m <= l; m++)
-    {
-      Ylm[l * (l + 1) + m]         = XlmVec[l + m] * e2imphi.real();
-      Ylm[l * (l + 1) - m]         = XlmVec[l + m] * e2imphi.imag();
-      dYlm_dphi[l * (l + 1) + m]   = -(double)m * XlmVec[l + m] * e2imphi.imag();
-      dYlm_dphi[l * (l + 1) - m]   = (double)m * XlmVec[l + m] * e2imphi.real();
-      dYlm_dtheta[l * (l + 1) + m] = dXlmVec[l + m] * e2imphi.real();
-      dYlm_dtheta[l * (l + 1) - m] = dXlmVec[l + m] * e2imphi.imag();
-      e2imphi *= e2iphi;
-    }
-    dl += 1.0;
-    lsign *= -1.0;
-  }
-  YlmTimer.stop();
-}
-
-
-} // namespace qmcplusplus
-#endif
diff --git a/src/QMCWaveFunctions/BandInfo.h b/src/QMCWaveFunctions/BandInfo.h
index 0e8e8acd61..9cd070586a 100644
--- a/src/QMCWaveFunctions/BandInfo.h
+++ b/src/QMCWaveFunctions/BandInfo.h
@@ -37,11 +37,9 @@ struct BandInfo
   double Energy;
   ///  This is true if we should make distinct copies represeninting a +k, -k pair
   bool MakeTwoCopies;
-  /// True if this state is a core state
-  bool IsCoreState;
   ///default constructor
   BandInfo()
-      : TwistIndex(0), BandIndex(-1), BandGroup(0), Spin(0), Energy(1e9), MakeTwoCopies(false), IsCoreState(false)
+      : TwistIndex(0), BandIndex(-1), BandGroup(0), Spin(0), Energy(1e9), MakeTwoCopies(false)
   {}
 
   /** operator to determine the order of any band
diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
index 53fe185baa..2a69c08aaf 100644
--- a/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
+++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h
@@ -19,8 +19,11 @@
  */
 #ifndef QMCPLUSPLUS_BSPLINE_READER_BASE_H
 #define QMCPLUSPLUS_BSPLINE_READER_BASE_H
+
 #include "mpi/collectives.h"
 #include "mpi/point2point.h"
+#include <einspline/bspline_base.h>
+
 namespace qmcplusplus
 {
 struct SPOSetInputInfo;
@@ -189,10 +192,6 @@ struct BsplineReaderBase
                            const std::vector<BandInfo>& bigspace,
                            SPOSetInfo& sposet,
                            std::vector<int>& band2spo);
-
-  /** export the MultiSpline to the old class EinsplineSetExtended for the GPU calculation*/
-  virtual std::unique_ptr<multi_UBspline_3d_z> export_MultiSplineComplexDouble() = 0;
-  virtual std::unique_ptr<multi_UBspline_3d_d> export_MultiSplineDouble()        = 0;
 };
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
index 5d6153f7fa..dbeb68ff3c 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h
@@ -68,63 +68,6 @@ struct SplineSetReader : public BsplineReaderBase
   // transform cG to radial functions
   virtual void create_atomic_centers_Gspace(Vector<std::complex<double>>& cG, Communicate& band_group_comm, int iorb) {}
 
-  /** for exporting data from multi_UBspline_3d_d to multi_UBspline_3d_z
-   *  This is only used by the legacy EinsplineSet class. To be deleted together with EinsplineSet.
-   */
-  std::unique_ptr<multi_UBspline_3d_z> export_MultiSplineComplexDouble() override
-  {
-    Ugrid xyz_grid[3];
-    BCtype_d xyz_bc_d[3];
-    set_grid(bspline->HalfG, xyz_grid, xyz_bc_d);
-
-    BCtype_z xyz_bc[3];
-    for (int i = 0; i < 3; i++)
-    {
-      xyz_bc[i].lCode = xyz_bc_d[i].lCode;
-      xyz_bc[i].rCode = xyz_bc_d[i].rCode;
-    }
-
-    const auto* source = (multi_UBspline_3d_d*)bspline->SplineInst->getSplinePtr();
-    std::unique_ptr<multi_UBspline_3d_z> target;
-    target.reset(einspline::create(target.get(), xyz_grid, xyz_bc, source->num_splines / 2));
-
-    if (source->x_grid.num != target->x_grid.num || source->y_grid.num != target->y_grid.num ||
-        source->z_grid.num != target->z_grid.num)
-      throw std::runtime_error("export_MultiSplineComplexDouble failed for inconsistent grid dimensions.");
-
-    if (source->coefs_size != target->coefs_size * 2)
-      throw std::runtime_error("export_MultiSplineComplexDouble failed for inconsistent coefs_size.");
-
-    std::copy_n(source->coefs, source->coefs_size, (double*)target->coefs);
-
-    return target;
-  }
-
-  /** for exporting data from multi_UBspline_3d_d to multi_UBspline_3d_z
-   *  This is only used by the legacy EinsplineSet class. To be deleted together with EinsplineSet.
-   */
-  std::unique_ptr<multi_UBspline_3d_d> export_MultiSplineDouble() override
-  {
-    Ugrid xyz_grid[3];
-    BCtype_d xyz_bc[3];
-    set_grid(bspline->HalfG, xyz_grid, xyz_bc);
-
-    const auto* source = (multi_UBspline_3d_d*)bspline->SplineInst->getSplinePtr();
-    std::unique_ptr<multi_UBspline_3d_d> target;
-    target.reset(einspline::create(target.get(), xyz_grid, xyz_bc, source->num_splines));
-
-    if (source->x_grid.num != target->x_grid.num || source->y_grid.num != target->y_grid.num ||
-        source->z_grid.num != target->z_grid.num)
-      throw std::runtime_error("export_MultiSplineDouble failed for inconsistent grid dimensions.");
-
-    if (source->coefs_size != target->coefs_size)
-      throw std::runtime_error("export_MultiSplineDouble failed for inconsistent coefs_size.");
-
-    std::copy_n(source->coefs, source->coefs_size, target->coefs);
-
-    return target;
-  }
-
   std::unique_ptr<SPOSet> create_spline_set(const std::string& my_name,
                                             int spin,
                                             const BandInfoGroup& bandgroup) override
diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt
index 8a0611a6f5..fe26571d8f 100644
--- a/src/QMCWaveFunctions/CMakeLists.txt
+++ b/src/QMCWaveFunctions/CMakeLists.txt
@@ -80,14 +80,10 @@ if(OHMMS_DIM MATCHES 3)
   endif(QMC_COMPLEX)
 
   if(HAVE_EINSPLINE)
-    if(NOT MIXED_PRECISION)
-      set(FERMION_SRCS ${FERMION_SRCS} EinsplineSet.cpp)
-    endif(NOT MIXED_PRECISION)
     set(FERMION_SRCS
         ${FERMION_SRCS}
         EinsplineSetBuilderCommon.cpp
         EinsplineSetBuilderOld.cpp
-        AtomicOrbital.cpp
         EinsplineSetBuilderReadBands_ESHDF.cpp
         EinsplineSetBuilderESHDF.fft.cpp
         EinsplineSetBuilder_createSPOs.cpp
diff --git a/src/QMCWaveFunctions/EinsplineSet.cpp b/src/QMCWaveFunctions/EinsplineSet.cpp
deleted file mode 100644
index 95bed41a33..0000000000
--- a/src/QMCWaveFunctions/EinsplineSet.cpp
+++ /dev/null
@@ -1,1285 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#include "CPU/e2iphi.h"
-#include "EinsplineSet.h"
-#include "einspline/multi_bspline.h"
-#include "CPU/math.hpp"
-#include "type_traits/ConvertToReal.h"
-
-namespace qmcplusplus
-{
-template<typename StorageType>
-inline void EinsplineSetExtended<StorageType>::computePhaseFactors(const TinyVector<RealType, OHMMS_DIM>& r)
-{
-  APP_ABORT("EinsplineSetExtended<StorageType>::computePhaseFactors called");
-  for (int i = 0; i < kPoints.size(); i++)
-    phase[i] = -dot(r, kPoints[i]);
-  eval_e2iphi(kPoints.size(), phase.data(), eikr.data());
-  //eval_e2iphi(phase,eikr);
-  //#ifdef HAVE_MKL
-  //    for (int i=0; i<kPoints.size(); i++)
-  //      phase[i] = -dot(r, kPoints[i]);
-  //    vzCIS(OrbitalSetSize, phase, (double*)eikr.data());
-  //#else
-  //    double s, c;
-  //    for (int i=0; i<kPoints.size(); i++) {
-  //      phase[i] = -dot(r, kPoints[i]);
-  //      qmcplusplus::sincos (phase[i], &s, &c);
-  //      eikr[i] = std::complex<double>(c,s);
-  //    }
-  //#endif
-}
-
-
-EinsplineSet::UnitCellType EinsplineSet::GetLattice() { return SuperLattice; }
-
-void EinsplineSet::resetSourceParticleSet(ParticleSet& ions) {}
-
-void EinsplineSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; }
-
-// Real evaluation functions
-inline void EinsplineMultiEval(multi_UBspline_3d_d* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<double>& psi)
-{
-  eval_multi_UBspline_3d_d(spline, r[0], r[1], r[2], psi.data());
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_d* restrict spline, TinyVector<double, 3> r, std::vector<double>& psi)
-{
-  eval_multi_UBspline_3d_d(spline, r[0], r[1], r[2], &(psi[0]));
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_d* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<double>& psi,
-                               Vector<TinyVector<double, 3>>& grad)
-{
-  eval_multi_UBspline_3d_d_vg(spline, r[0], r[1], r[2], psi.data(), (double*)grad.data());
-}
-
-
-inline void EinsplineMultiEval(multi_UBspline_3d_d* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<double>& psi,
-                               Vector<TinyVector<double, 3>>& grad,
-                               Vector<Tensor<double, 3>>& hess)
-{
-  eval_multi_UBspline_3d_d_vgh(spline, r[0], r[1], r[2], psi.data(), (double*)grad.data(), (double*)hess.data());
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_d* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<double>& psi,
-                               Vector<TinyVector<double, 3>>& grad,
-                               Vector<Tensor<double, 3>>& hess,
-                               Vector<TinyVector<Tensor<double, 3>, 3>>& gradhess)
-{
-  eval_multi_UBspline_3d_d_vghgh(spline, r[0], r[1], r[2], psi.data(), (double*)grad.data(), (double*)hess.data(),
-                                 (double*)gradhess.data());
-}
-
-
-//////////////////////////////////
-// Complex evaluation functions //
-//////////////////////////////////
-inline void EinsplineMultiEval(multi_UBspline_3d_z* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<std::complex<double>>& psi)
-{
-  eval_multi_UBspline_3d_z(spline, r[0], r[1], r[2], psi.data());
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_z* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<std::complex<double>>& psi,
-                               Vector<TinyVector<std::complex<double>, 3>>& grad)
-{
-  eval_multi_UBspline_3d_z_vg(spline, r[0], r[1], r[2], psi.data(), (std::complex<double>*)grad.data());
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_z* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<std::complex<double>>& psi,
-                               Vector<TinyVector<std::complex<double>, 3>>& grad,
-                               Vector<Tensor<std::complex<double>, 3>>& hess)
-{
-  eval_multi_UBspline_3d_z_vgh(spline, r[0], r[1], r[2], psi.data(), (std::complex<double>*)grad.data(),
-                               (std::complex<double>*)hess.data());
-}
-
-inline void EinsplineMultiEval(multi_UBspline_3d_z* restrict spline,
-                               const TinyVector<double, 3>& r,
-                               Vector<std::complex<double>>& psi,
-                               Vector<TinyVector<std::complex<double>, 3>>& grad,
-                               Vector<Tensor<std::complex<double>, 3>>& hess,
-                               Vector<TinyVector<Tensor<std::complex<double>, 3>, 3>>& gradhess)
-{
-  eval_multi_UBspline_3d_z_vghgh(spline, r[0], r[1], r[2], psi.data(), (std::complex<double>*)grad.data(),
-                                 (std::complex<double>*)hess.data(), (std::complex<double>*)gradhess.data());
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::setOrbitalSetSize(int norbs)
-{
-  OrbitalSetSize = norbs;
-}
-
-#if !defined(QMC_COMPLEX)
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateValue(const ParticleSet& P, int iat, RealValueVector& psi)
-{
-  ValueTimer.start();
-  const PosType& r(P.activeR(iat));
-  // Check atomic orbitals
-  bool inAtom = false;
-  for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-  {
-    inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_);
-    if (inAtom)
-      break;
-  }
-  StorageValueVector& valVec = storage_value_vector_;
-  if (!inAtom)
-  {
-    PosType ru(PrimLattice.toUnit(r));
-    for (int i = 0; i < OHMMS_DIM; i++)
-      ru[i] -= std::floor(ru[i]);
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, valVec);
-    EinsplineTimer.stop();
-    // Add e^ikr phase to B-spline orbitals
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      PosType k = kPoints[j];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      valVec[j] *= e_mikr;
-    }
-  }
-  const int N  = storage_value_vector_.size();
-  int psiIndex = 0;
-  for (int j = 0; j < N; j++)
-  {
-    std::complex<double> psi_val = storage_value_vector_[j];
-    psi[psiIndex]                = real(psi_val);
-    psiIndex++;
-    if (MakeTwoCopies[j])
-    {
-      psi[psiIndex] = imag(psi_val);
-      psiIndex++;
-    }
-  }
-  ValueTimer.stop();
-}
-
-
-// This is an explicit specialization of the above for real orbitals
-// with a real return value, i.e. simulations at the gamma or L
-// point.
-template<>
-void EinsplineSetExtended<double>::evaluateValue(const ParticleSet& P, int iat, RealValueVector& psi)
-{
-  ValueTimer.start();
-  const PosType& r(P.activeR(iat));
-  bool inAtom = false;
-  for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-  {
-    inAtom = AtomicOrbitals[jat].evaluate(r, psi);
-    if (inAtom)
-      break;
-  }
-  if (!inAtom)
-  {
-    PosType ru(PrimLattice.toUnit(r));
-    int sign = 0;
-    for (int i = 0; i < OHMMS_DIM; i++)
-    {
-      RealType img = std::floor(ru[i]);
-      ru[i] -= img;
-      sign += HalfG[i] * (int)img;
-    }
-    // Check atomic orbitals
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, psi);
-    EinsplineTimer.stop();
-    if (sign & 1)
-      for (int j = 0; j < psi.size(); j++)
-        psi[j] *= -1.0;
-  }
-  ValueTimer.stop();
-}
-
-
-// Value, gradient, and laplacian
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateVGL(const ParticleSet& P,
-                                                    int iat,
-                                                    RealValueVector& psi,
-                                                    RealGradVector& dpsi,
-                                                    RealValueVector& d2psi)
-{
-  VGLTimer.start();
-  const PosType& r(P.activeR(iat));
-  std::complex<double> eye(0.0, 1.0);
-  bool inAtom = false;
-  for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-  {
-    inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_, storage_grad_vector_, storage_lapl_vector_);
-    if (inAtom)
-      break;
-  }
-  StorageValueVector& valVec  = storage_value_vector_;
-  StorageGradVector& gradVec  = storage_grad_vector_;
-  StorageValueVector& laplVec = storage_lapl_vector_;
-  // Finally, copy into output vectors
-  int psiIndex = 0;
-  const int N  = storage_value_vector_.size();
-  for (int j = 0; j < N; j++)
-  {
-    std::complex<double> psi_val, psi_lapl;
-    TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-    psi_val       = storage_value_vector_[j];
-    psi_grad      = storage_grad_vector_[j];
-    psi_lapl      = storage_lapl_vector_[j];
-    psi[psiIndex] = real(psi_val);
-    for (int n = 0; n < OHMMS_DIM; n++)
-      dpsi[psiIndex][n] = real(psi_grad[n]);
-    d2psi[psiIndex] = real(psi_lapl);
-    psiIndex++;
-    if (MakeTwoCopies[j])
-    {
-      psi[psiIndex] = imag(psi_val);
-      for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi[psiIndex][n] = imag(psi_grad[n]);
-      d2psi[psiIndex] = imag(psi_lapl);
-      psiIndex++;
-    }
-  }
-  VGLTimer.stop();
-}
-
-template<>
-void EinsplineSetExtended<double>::evaluateVGL(const ParticleSet& P,
-                                               int iat,
-                                               RealValueVector& psi,
-                                               RealGradVector& dpsi,
-                                               RealValueVector& d2psi)
-{
-  VGLTimer.start();
-  const PosType& r(P.activeR(iat));
-  bool inAtom = false;
-  for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-  {
-    inAtom = AtomicOrbitals[jat].evaluate(r, psi, dpsi, d2psi);
-    if (inAtom)
-      break;
-  }
-  if (!inAtom)
-  {
-    PosType ru(PrimLattice.toUnit(r));
-    int sign = 0;
-    for (int i = 0; i < OHMMS_DIM; i++)
-    {
-      RealType img = std::floor(ru[i]);
-      ru[i] -= img;
-      sign += HalfG[i] * (int)img;
-    }
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, psi, storage_grad_vector_, storage_hess_vector_);
-    EinsplineTimer.stop();
-    if (sign & 1)
-      for (int j = 0; j < psi.size(); j++)
-      {
-        psi[j] *= -1.0;
-        storage_grad_vector_[j] *= -1.0;
-        storage_hess_vector_[j] *= -1.0;
-      }
-    for (int i = 0; i < psi.size(); i++)
-    {
-      dpsi[i]  = dot(PrimLattice.G, storage_grad_vector_[i]);
-      d2psi[i] = trace(storage_hess_vector_[i], GGt);
-    }
-  }
-  VGLTimer.stop();
-}
-
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             RealValueMatrix& psi,
-                                                             RealGradMatrix& dpsi,
-                                                             RealValueMatrix& d2psi)
-{
-  std::complex<double> eye(0.0, 1.0);
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    bool inAtom = false;
-    for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-    {
-      inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_, storage_grad_vector_, storage_lapl_vector_);
-      if (inAtom)
-        break;
-    }
-    StorageValueVector& valVec  = storage_value_vector_;
-    StorageGradVector& gradVec  = storage_grad_vector_;
-    StorageValueVector& laplVec = storage_lapl_vector_;
-    // Finally, copy into output vectors
-    int psiIndex = 0;
-    const int N  = storage_value_vector_.size();
-    for (int j = 0; j < N; j++)
-    {
-      std::complex<double> psi_val, psi_lapl;
-      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-      psi_val          = storage_value_vector_[j];
-      psi_grad         = storage_grad_vector_[j];
-      psi_lapl         = storage_lapl_vector_[j];
-      psi(i, psiIndex) = real(psi_val);
-      for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-      d2psi(i, psiIndex) = real(psi_lapl);
-      psiIndex++;
-      // if (psiIndex >= dpsi.cols()) {
-      //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
-      // 	 << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
-      // }
-      if (MakeTwoCopies[j])
-      {
-        psi(i, psiIndex) = imag(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-        d2psi(i, psiIndex) = imag(psi_lapl);
-        psiIndex++;
-      }
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             RealValueMatrix& psi,
-                                                             RealGradMatrix& dpsi,
-                                                             RealHessMatrix& grad_grad_psi)
-{
-  std::complex<double> eye(0.0, 1.0);
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    bool inAtom = false;
-    for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-    {
-      inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-      if (inAtom)
-        break;
-    }
-    StorageValueVector& valVec = storage_value_vector_;
-    StorageGradVector& gradVec = storage_grad_vector_;
-    StorageHessVector& hessVec = storage_hess_vector_;
-    Tensor<std::complex<double>, OHMMS_DIM> tmphs;
-    // Finally, copy into output vectors
-    int psiIndex = 0;
-    const int N  = storage_value_vector_.size();
-    for (int j = 0; j < N; j++)
-    {
-      std::complex<double> psi_val;
-      TinyVector<std::complex<double>, OHMMS_DIM> psi_grad;
-      psi_val          = storage_value_vector_[j];
-      psi_grad         = storage_grad_vector_[j];
-      tmphs            = storage_hess_vector_[j];
-      psi(i, psiIndex) = real(psi_val);
-      for (int n = 0; n < OHMMS_DIM; n++)
-        dpsi(i, psiIndex)[n] = real(psi_grad[n]);
-      //d2psi(i,psiIndex) = real(psi_lapl);
-      // FIX FIX FIX
-      for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-        grad_grad_psi(i, psiIndex)[n] = real(tmphs(n));
-      psiIndex++;
-      // if (psiIndex >= dpsi.cols()) {
-      //   std::cerr << "Error:  out of bounds writing in EinsplineSet::evalate.\n"
-      //     << "psiIndex = " << psiIndex << "  dpsi.cols() = " << dpsi.cols() << std::endl;
-      // }
-      if (MakeTwoCopies[j])
-      {
-        psi(i, psiIndex) = imag(psi_val);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = imag(psi_grad[n]);
-        //d2psi(i,psiIndex) = imag(psi_lapl);
-        for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-          grad_grad_psi(i, psiIndex)[n] = imag(tmphs(n));
-        psiIndex++;
-      }
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateGradSource(const ParticleSet& P,
-                                                           int first,
-                                                           int last,
-                                                           const ParticleSet& source,
-                                                           int iat,
-                                                           RealGradMatrix& dpsi)
-{
-  if (hasIonDerivs())
-  {
-    // Loop over dimensions
-    for (int dim = 0; dim < OHMMS_DIM; dim++)
-    {
-      // Loop over electrons
-      for (int iel = first, i = 0; iel < last; iel++, i++)
-      {
-        const PosType& r(P.activeR(iel));
-        PosType ru(PrimLattice.toUnit(r));
-        assert(FirstOrderSplines[iat][dim]);
-        EinsplineMultiEval(FirstOrderSplines[iat][dim], ru, storage_value_vector_);
-        int dpsiIndex = 0;
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          PosType k = kPoints[j];
-          double s, c;
-          double phase = -dot(r, k);
-          qmcplusplus::sincos(phase, &s, &c);
-          std::complex<double> e_mikr(c, s);
-          storage_value_vector_[j] *= e_mikr;
-          dpsi(i, dpsiIndex)[dim] = real(storage_value_vector_[j]);
-          dpsiIndex++;
-          if (MakeTwoCopies[j])
-          {
-            dpsi(i, dpsiIndex)[dim] = imag(storage_value_vector_[j]);
-            dpsiIndex++;
-          }
-        }
-      }
-    }
-    for (int i = 0; i < (last - first); i++)
-      for (int j = 0; j < (last - first); j++)
-        dpsi(i, j) = dot(PrimLattice.G, dpsi(i, j));
-  }
-}
-
-
-// Evaluate the gradient w.r.t. to ion iat of the gradient and
-// laplacian of the orbitals w.r.t. the electrons
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateGradSource(const ParticleSet& P,
-                                                           int first,
-                                                           int last,
-                                                           const ParticleSet& source,
-                                                           int iat_src,
-                                                           RealGradMatrix& dphi,
-                                                           RealHessMatrix& dgrad_phi,
-                                                           RealGradMatrix& dlapl_phi)
-{
-  if (hasIonDerivs())
-  {
-    std::complex<double> eye(0.0, 1.0);
-    // Loop over dimensions
-    for (int dim = 0; dim < OHMMS_DIM; dim++)
-    {
-      // Loop over electrons
-      for (int iel = first, i = 0; iel < last; iel++, i++)
-      {
-        const PosType& r(P.activeR(iel));
-        PosType ru(PrimLattice.toUnit(r));
-        assert(FirstOrderSplines[iat_src][dim]);
-        EinsplineMultiEval(FirstOrderSplines[iat_src][dim], ru, storage_value_vector_, storage_grad_vector_,
-                           storage_hess_vector_);
-        int dphiIndex = 0;
-        for (int j = 0; j < NumValenceOrbs; j++)
-        {
-          storage_grad_vector_[j]                           = dot(PrimLattice.G, storage_grad_vector_[j]);
-          storage_lapl_vector_[j]                           = trace(storage_hess_vector_[j], GGt);
-          std::complex<double> u                            = storage_value_vector_[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> gradu = storage_grad_vector_[j];
-          std::complex<double> laplu                        = storage_lapl_vector_[j];
-          PosType k                                         = kPoints[j];
-          TinyVector<std::complex<double>, OHMMS_DIM> ck;
-          for (int n = 0; n < OHMMS_DIM; n++)
-            ck[n] = k[n];
-          double s, c;
-          double phase = -dot(r, k);
-          qmcplusplus::sincos(phase, &s, &c);
-          std::complex<double> e_mikr(c, s);
-          storage_value_vector_[j] = e_mikr * u;
-          storage_grad_vector_[j]  = e_mikr * (-eye * u * ck + gradu);
-          storage_lapl_vector_[j]  = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-          dphi(i, dphiIndex)[dim]  = real(storage_value_vector_[j]);
-          for (int k = 0; k < OHMMS_DIM; k++)
-            dgrad_phi(dphiIndex)[dim] = real(storage_grad_vector_[j][k]);
-          dlapl_phi(dphiIndex)[dim] = real(storage_lapl_vector_[j]);
-          dphiIndex++;
-          if (MakeTwoCopies[j])
-          {
-            dphi(i, dphiIndex)[dim] = imag(storage_value_vector_[j]);
-            for (int k = 0; k < OHMMS_DIM; k++)
-              dgrad_phi(i, dphiIndex)(dim, k) = imag(storage_grad_vector_[j][k]);
-            dlapl_phi(i, dphiIndex)[dim] = imag(storage_lapl_vector_[j]);
-            dphiIndex++;
-          }
-        }
-      }
-    }
-    for (int i = 0; i < (last - first); i++)
-      for (int j = 0; j < (last - first); j++)
-      {
-        dphi(i, j) = dot(PrimLattice.G, dphi(i, j));
-        // Check this one!
-        dgrad_phi(i, j) = dot(PrimLattice.G, dgrad_phi(i, j));
-        dlapl_phi(i, j) = dot(PrimLattice.G, dlapl_phi(i, j));
-      }
-  }
-}
-
-
-template<>
-void EinsplineSetExtended<double>::evaluateGradSource(const ParticleSet& P,
-                                                      int first,
-                                                      int last,
-                                                      const ParticleSet& source,
-                                                      int iat_src,
-                                                      RealGradMatrix& dphi,
-                                                      RealHessMatrix& dgrad_phi,
-                                                      RealGradMatrix& dlapl_phi)
-{
-  if (hasIonDerivs())
-  {
-    // Loop over dimensions
-    for (int dim = 0; dim < OHMMS_DIM; dim++)
-    {
-      assert(FirstOrderSplines[iat_src][dim]);
-      // Loop over electrons
-      for (int iel = first, i = 0; iel < last; iel++, i++)
-      {
-        const PosType& r(P.activeR(iel));
-        PosType ru(PrimLattice.toUnit(r));
-        int sign = 0;
-        for (int n = 0; n < OHMMS_DIM; n++)
-        {
-          RealType img = std::floor(ru[n]);
-          ru[n] -= img;
-          sign += HalfG[n] * (int)img;
-        }
-        for (int n = 0; n < OHMMS_DIM; n++)
-          ru[n] -= std::floor(ru[n]);
-        EinsplineMultiEval(FirstOrderSplines[iat_src][dim], ru, storage_value_vector_, storage_grad_vector_,
-                           storage_hess_vector_);
-        if (sign & 1)
-          for (int j = 0; j < OrbitalSetSize; j++)
-          {
-            dphi(i, j)[dim] = -1.0 * storage_value_vector_[j];
-            PosType g       = -1.0 * dot(PrimLattice.G, storage_grad_vector_[j]);
-            for (int k = 0; k < OHMMS_DIM; k++)
-              dgrad_phi(i, j)(dim, k) = g[k];
-            dlapl_phi(i, j)[dim] = -1.0 * trace(storage_hess_vector_[j], GGt);
-          }
-        else
-          for (int j = 0; j < OrbitalSetSize; j++)
-          {
-            dphi(i, j)[dim] = storage_value_vector_[j];
-            PosType g       = dot(PrimLattice.G, storage_grad_vector_[j]);
-            for (int k = 0; k < OHMMS_DIM; k++)
-              dgrad_phi(i, j)(dim, k) = g[k];
-            dlapl_phi(i, j)[dim] = trace(storage_hess_vector_[j], GGt);
-          }
-      }
-    }
-    for (int i = 0; i < (last - first); i++)
-      for (int j = 0; j < (last - first); j++)
-      {
-        dphi(i, j) = dot(PrimLattice.G, dphi(i, j));
-        // Check this one!
-        dgrad_phi(i, j) = dot(PrimLattice.G, dgrad_phi(i, j));
-        dlapl_phi(i, j) = dot(PrimLattice.G, dlapl_phi(i, j));
-      }
-  }
-}
-template<>
-void EinsplineSetExtended<double>::evaluateGradSource(const ParticleSet& P,
-                                                      int first,
-                                                      int last,
-                                                      const ParticleSet& source,
-                                                      int iat,
-                                                      RealGradMatrix& dpsi)
-{
-  if (hasIonDerivs())
-  {
-    // Loop over dimensions
-    for (int dim = 0; dim < OHMMS_DIM; dim++)
-    {
-      assert(FirstOrderSplines[iat][dim]);
-      // Loop over electrons
-      for (int iel = first, i = 0; iel < last; iel++, i++)
-      {
-        const PosType& r(P.activeR(iel));
-        PosType ru(PrimLattice.toUnit(r));
-        int sign = 0;
-        for (int n = 0; n < OHMMS_DIM; n++)
-        {
-          RealType img = std::floor(ru[n]);
-          ru[n] -= img;
-          sign += HalfG[n] * (int)img;
-        }
-        for (int n = 0; n < OHMMS_DIM; n++)
-          ru[n] -= std::floor(ru[n]);
-        EinsplineMultiEval(FirstOrderSplines[iat][dim], ru, storage_value_vector_);
-        if (sign & 1)
-          for (int j = 0; j < OrbitalSetSize; j++)
-            dpsi(i, j)[dim] = -1.0 * storage_value_vector_[j];
-        else
-          for (int j = 0; j < OrbitalSetSize; j++)
-            dpsi(i, j)[dim] = storage_value_vector_[j];
-      }
-    }
-    for (int i = 0; i < (last - first); i++)
-      for (int j = 0; j < (last - first); j++)
-      {
-        dpsi(i, j) = dot(PrimLattice.G, dpsi(i, j));
-      }
-  }
-}
-
-#else
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateValue(const ParticleSet& P, int iat, ComplexValueVector& psi)
-{
-  ValueTimer.start();
-  const PosType& r(P.activeR(iat));
-  PosType ru(PrimLattice.toUnit(r));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= std::floor(ru[i]);
-  EinsplineTimer.start();
-  EinsplineMultiEval(MultiSpline, ru, storage_value_vector_);
-  EinsplineTimer.stop();
-  //computePhaseFactors(r);
-  for (int i = 0; i < psi.size(); i++)
-  {
-    PosType k = kPoints[i];
-    double s, c;
-    double phase = -dot(r, k);
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e_mikr(c, s);
-    psi[i] = e_mikr * storage_value_vector_[i];
-  }
-  ValueTimer.stop();
-}
-
-// Value, gradient, and laplacian
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateVGL(const ParticleSet& P,
-                                                    int iat,
-                                                    ComplexValueVector& psi,
-                                                    ComplexGradVector& dpsi,
-                                                    ComplexValueVector& d2psi)
-{
-  VGLTimer.start();
-  const PosType& r(P.activeR(iat));
-  PosType ru(PrimLattice.toUnit(r));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= std::floor(ru[i]);
-  EinsplineTimer.start();
-  EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-  EinsplineTimer.stop();
-  //computePhaseFactors(r);
-  std::complex<double> eye(0.0, 1.0);
-  for (int j = 0; j < psi.size(); j++)
-  {
-    std::complex<double> u, laplu;
-    TinyVector<std::complex<double>, OHMMS_DIM> gradu;
-    u         = storage_value_vector_[j];
-    gradu     = dot(PrimLattice.G, storage_grad_vector_[j]);
-    laplu     = trace(storage_hess_vector_[j], GGt);
-    PosType k = kPoints[j];
-    TinyVector<std::complex<double>, OHMMS_DIM> ck;
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ck[n] = k[n];
-    double s, c;
-    double phase = -dot(r, k);
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e_mikr(c, s);
-    psi[j]  = e_mikr * u;
-    dpsi[j] = e_mikr * (-eye * u * ck + gradu);
-    //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]);
-    d2psi[j] = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-  }
-  VGLTimer.stop();
-}
-
-// Value, gradient, and laplacian
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluateVGH(const ParticleSet& P,
-                                                    int iat,
-                                                    ComplexValueVector& psi,
-                                                    ComplexGradVector& dpsi,
-                                                    ComplexHessVector& grad_grad_psi)
-{
-  VGLTimer.start();
-  const PosType& r(P.activeR(iat));
-  PosType ru(PrimLattice.toUnit(r));
-  for (int i = 0; i < OHMMS_DIM; i++)
-    ru[i] -= std::floor(ru[i]);
-  EinsplineTimer.start();
-  EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-  EinsplineTimer.stop();
-  //computePhaseFactors(r);
-  std::complex<double> eye(0.0, 1.0);
-  for (int j = 0; j < psi.size(); j++)
-  {
-    std::complex<double> u;
-    TinyVector<std::complex<double>, OHMMS_DIM> gradu;
-    Tensor<std::complex<double>, OHMMS_DIM> hs, tmphs;
-    u     = storage_value_vector_[j];
-    gradu = dot(PrimLattice.G, storage_grad_vector_[j]);
-    ////laplu = trace(storage_hess_vector_[j], GGt);
-    tmphs = dot(PrimLattice.G, storage_hess_vector_[j]);
-    //hs = dot(tmphs,PrimLattice.G);
-    hs        = dot(tmphs, PrimLattice.Gt);
-    PosType k = kPoints[j];
-    TinyVector<std::complex<double>, OHMMS_DIM> ck;
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ck[n] = k[n];
-    double s, c;
-    double phase = -dot(r, k);
-    qmcplusplus::sincos(phase, &s, &c);
-    std::complex<double> e_mikr(c, s);
-    psi[j]  = e_mikr * u;
-    dpsi[j] = e_mikr * (-eye * u * ck + gradu);
-    //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi[j]);
-    //d2psi[j] = e_mikr*(-dot(k,k)*u - 2.0*eye*dot(ck,gradu) + laplu);
-    grad_grad_psi[j] =
-        e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-  }
-  VGLTimer.stop();
-}
-#endif
-
-#if !defined(QMC_COMPLEX)
-template<>
-void EinsplineSetExtended<double>::evaluate_notranspose(const ParticleSet& P,
-                                                        int first,
-                                                        int last,
-                                                        RealValueMatrix& psi,
-                                                        RealGradMatrix& dpsi,
-                                                        RealValueMatrix& d2psi)
-{
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    bool inAtom = false;
-    for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-    {
-      inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_, storage_grad_vector_, storage_lapl_vector_);
-      if (inAtom)
-      {
-        for (int j = 0; j < OrbitalSetSize; j++)
-        {
-          psi(i, j)   = storage_value_vector_[j];
-          dpsi(i, j)  = storage_grad_vector_[j];
-          d2psi(i, j) = storage_lapl_vector_[j];
-        }
-        break;
-      }
-    }
-    if (!inAtom)
-    {
-      PosType ru(PrimLattice.toUnit(r));
-      int sign = 0;
-      for (int n = 0; n < OHMMS_DIM; n++)
-      {
-        RealType img = std::floor(ru[n]);
-        ru[n] -= img;
-        sign += HalfG[n] * (int)img;
-      }
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ru[n] -= std::floor(ru[n]);
-      EinsplineTimer.start();
-      EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-      EinsplineTimer.stop();
-      if (sign & 1)
-        for (int j = 0; j < OrbitalSetSize; j++)
-        {
-          storage_value_vector_[j] *= -1.0;
-          storage_grad_vector_[j] *= -1.0;
-          storage_hess_vector_[j] *= -1.0;
-        }
-      for (int j = 0; j < OrbitalSetSize; j++)
-      {
-        psi(i, j)   = storage_value_vector_[j];
-        dpsi(i, j)  = dot(PrimLattice.G, storage_grad_vector_[j]);
-        d2psi(i, j) = trace(storage_hess_vector_[j], GGt);
-      }
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<>
-void EinsplineSetExtended<double>::evaluate_notranspose(const ParticleSet& P,
-                                                        int first,
-                                                        int last,
-                                                        RealValueMatrix& psi,
-                                                        RealGradMatrix& dpsi,
-                                                        RealHessMatrix& grad_grad_psi)
-{
-  //APP_ABORT("evaluate_notranspose:  Check Hessian, then remove this error message.\n")
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    bool inAtom = false;
-    for (int jat = 0; jat < AtomicOrbitals.size(); jat++)
-    {
-      inAtom = AtomicOrbitals[jat].evaluate(r, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-      if (inAtom)
-      {
-        for (int j = 0; j < OrbitalSetSize; j++)
-        {
-          psi(i, j)           = storage_value_vector_[j];
-          dpsi(i, j)          = storage_grad_vector_[j];
-          grad_grad_psi(i, j) = storage_hess_vector_[j];
-        }
-        break;
-      }
-    }
-    if (!inAtom)
-    {
-      PosType ru(PrimLattice.toUnit(r));
-      int sign = 0;
-      for (int n = 0; n < OHMMS_DIM; n++)
-      {
-        RealType img = std::floor(ru[n]);
-        ru[n] -= img;
-        sign += HalfG[n] * (int)img;
-      }
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ru[n] -= std::floor(ru[n]);
-      EinsplineTimer.start();
-      EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-      EinsplineTimer.stop();
-      if (sign & 1)
-        for (int j = 0; j < OrbitalSetSize; j++)
-        {
-          storage_value_vector_[j] *= -1.0;
-          storage_grad_vector_[j] *= -1.0;
-          storage_hess_vector_[j] *= -1.0;
-        }
-      for (int j = 0; j < OrbitalSetSize; j++)
-      {
-        psi(i, j)           = storage_value_vector_[j];
-        dpsi(i, j)          = dot(PrimLattice.G, storage_grad_vector_[j]);
-        grad_grad_psi(i, j) = dot(PrimLattice.G, dot(storage_hess_vector_[j], PrimLattice.Gt));
-      }
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             RealValueMatrix& psi,
-                                                             RealGradMatrix& dpsi,
-                                                             RealHessMatrix& grad_grad_psi,
-                                                             RealGGGMatrix& grad_grad_grad_logdet)
-{
-  //      APP_ABORT(" EinsplineSetExtended<StorageType>::evaluate_notranspose not implemented for grad_grad_grad_logdet yet. \n");
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    PosType ru(PrimLattice.toUnit(r));
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ru[n] -= std::floor(ru[n]);
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_,
-                       storage_grad_hess_vector_);
-    EinsplineTimer.stop();
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      TinyVector<std::complex<double>, OHMMS_DIM> tmpg;
-      Tensor<std::complex<double>, OHMMS_DIM> tmphs;
-      TinyVector<Tensor<std::complex<double>, OHMMS_DIM>, OHMMS_DIM> tmpghs;
-      tmpg                    = dot(PrimLattice.G, storage_grad_vector_[j]);
-      storage_grad_vector_[j] = tmpg;
-      tmphs                   = dot(PrimLattice.G, storage_hess_vector_[j]);
-      storage_hess_vector_[j] = dot(tmphs, PrimLattice.Gt);
-      for (int n = 0; n < OHMMS_DIM; n++)
-      {
-        tmphs     = dot(PrimLattice.G, storage_grad_hess_vector_[j][n]);
-        tmpghs[n] = dot(tmphs, PrimLattice.Gt);
-      }
-      storage_grad_hess_vector_[j] = dot(PrimLattice.G, tmpghs);
-    }
-    std::complex<double> eye(0.0, 1.0);
-    //    StorageValueVector &valVec =
-    //      storage_value_vector_;
-    //    StorageGradVector &gradVec =
-    //      storage_grad_vector_;
-    //    StorageHessVector &hessVec =
-    //      storage_hess_vector_;
-    //    Tensor<std::complex<double>,OHMMS_DIM> tmphs;
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      //            std::complex<double> u = valVec[j];
-      //            TinyVector<std::complex<double>,OHMMS_DIM> gradu = gradVec[j];
-      //            tmphs = hessVec[j];
-      //            PosType k = kPoints[j];
-      //            TinyVector<std::complex<double>,OHMMS_DIM> ck;
-      //            for (int n=0; n<OHMMS_DIM; n++)       ck[n] = k[n];
-      //            double s,c;
-      //            double phase = -dot(r, k);
-      //            qmcplusplus::sincos (phase, &s, &c);
-      //            std::complex<double> e_mikr (c,s);
-      //            valVec[j]   = e_mikr*u;
-      //            gradVec[j]  = e_mikr*(-eye*u*ck + gradu);
-      //            hessVec[j]  = e_mikr*(tmphs -u*outerProduct(ck,ck) - eye*outerProduct(ck,gradu) - eye*outerProduct(gradu,ck));
-      std::complex<double> u                            = (storage_value_vector_[j]);
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu = (storage_grad_vector_[j]);
-      Tensor<std::complex<double>, OHMMS_DIM> tmphs     = (storage_hess_vector_[j]);
-      //        TinyVector<Tensor<std::complex<double>,OHMMS_DIM>,OHMMS_DIM> tmpghs=(storage_grad_hess_vector_[j]);
-      PosType k = kPoints[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      storage_value_vector_[j] = e_mikr * u;
-      storage_grad_vector_[j]  = e_mikr * (-eye * u * ck + gradu);
-      storage_hess_vector_[j] =
-          e_mikr * (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-      //Is this right?
-      storage_grad_hess_vector_[j] *= e_mikr;
-      for (unsigned a0(0); a0 < OHMMS_DIM; a0++)
-        for (unsigned a1(0); a1 < OHMMS_DIM; a1++)
-          for (unsigned a2(0); a2 < OHMMS_DIM; a2++)
-            storage_grad_hess_vector_[j][a0](a1, a2) += e_mikr *
-                (-1.0 * eye * (ck[a0] * tmphs(a1, a2) + ck[a1] * tmphs(a0, a2) + ck[a2] * tmphs(a0, a1)) -
-                 (ck[a0] * ck[a1] * gradu[a2] + ck[a0] * ck[a2] * gradu[a1] + ck[a1] * ck[a2] * gradu[a0]) +
-                 eye * ck[a0] * ck[a1] * ck[a2] * u);
-    }
-    int psiIndex(0);
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      if (MakeTwoCopies[j])
-      {
-        psi(i, psiIndex) = imag(storage_value_vector_[j]);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = imag(storage_grad_vector_[j][n]);
-        for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-          grad_grad_psi(i, psiIndex)[n] = imag(storage_hess_vector_[j](n));
-        for (int n = 0; n < OHMMS_DIM; n++)
-          for (int m = 0; m < OHMMS_DIM * OHMMS_DIM; m++)
-            grad_grad_grad_logdet(i, psiIndex)[n][m] = imag(storage_grad_hess_vector_[j][n](m));
-        psiIndex++;
-        psi(i, psiIndex) = real(storage_value_vector_[j]);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(storage_grad_vector_[j][n]);
-        for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-          grad_grad_psi(i, psiIndex)[n] = real(storage_hess_vector_[j](n));
-        for (int n = 0; n < OHMMS_DIM; n++)
-          for (int m = 0; m < OHMMS_DIM * OHMMS_DIM; m++)
-            grad_grad_grad_logdet(i, psiIndex)[n][m] = real(storage_grad_hess_vector_[j][n](m));
-        psiIndex++;
-      }
-      else
-      {
-        psi(i, psiIndex) = real(storage_value_vector_[j]);
-        for (int n = 0; n < OHMMS_DIM; n++)
-          dpsi(i, psiIndex)[n] = real(storage_grad_vector_[j][n]);
-        for (int n = 0; n < OHMMS_DIM * OHMMS_DIM; n++)
-          grad_grad_psi(i, psiIndex)[n] = real(storage_hess_vector_[j](n));
-        for (int n = 0; n < OHMMS_DIM; n++)
-          for (int m = 0; m < OHMMS_DIM * OHMMS_DIM; m++)
-            grad_grad_grad_logdet(i, psiIndex)[n][m] = real(storage_grad_hess_vector_[j][n](m));
-        psiIndex++;
-      }
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<>
-void EinsplineSetExtended<double>::evaluate_notranspose(const ParticleSet& P,
-                                                        int first,
-                                                        int last,
-                                                        RealValueMatrix& psi,
-                                                        RealGradMatrix& dpsi,
-                                                        RealHessMatrix& grad_grad_psi,
-                                                        RealGGGMatrix& grad_grad_grad_logdet)
-{
-  //      APP_ABORT(" EinsplineSetExtended<StorageType>::evaluate_notranspose not implemented for grad_grad_grad_logdet yet. \n");
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    bool inAtom  = false;
-    int psiIndex = 0;
-    const int N  = storage_value_vector_.size();
-    for (int j = 0; j < N; j++)
-    {
-      psi(i, psiIndex)                   = storage_value_vector_[j];
-      dpsi(i, psiIndex)                  = dot(storage_grad_vector_[j], PrimLattice.G);
-      grad_grad_psi(i, psiIndex)         = storage_hess_vector_[j];
-      grad_grad_grad_logdet(i, psiIndex) = dot(storage_grad_hess_vector_[j], PrimLattice.G);
-      psiIndex++;
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-
-#else
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             ComplexValueMatrix& psi,
-                                                             ComplexGradMatrix& dpsi,
-                                                             ComplexValueMatrix& d2psi)
-{
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    PosType ru(PrimLattice.toUnit(r));
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ru[n] -= std::floor(ru[n]);
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-    EinsplineTimer.stop();
-    //computePhaseFactors(r);
-    std::complex<double> eye(0.0, 1.0);
-    for (int j = 0; j < psi.cols(); j++)
-    {
-      std::complex<double> u, laplu;
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu;
-      u         = storage_value_vector_[j];
-      gradu     = dot(PrimLattice.G, storage_grad_vector_[j]);
-      laplu     = trace(storage_hess_vector_[j], GGt);
-      PosType k = kPoints[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      psi(i, j) = e_mikr * u;
-      //psi(j,i) = e_mikr * u;
-      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
-      //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j));
-      d2psi(i, j) = e_mikr * (-dot(k, k) * u - 2.0 * eye * dot(ck, gradu) + laplu);
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             ComplexValueMatrix& psi,
-                                                             ComplexGradMatrix& dpsi,
-                                                             ComplexHessMatrix& grad_grad_psi)
-{
-  VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    PosType ru(PrimLattice.toUnit(r));
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ru[n] -= std::floor(ru[n]);
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_);
-    EinsplineTimer.stop();
-    //computePhaseFactors(r);
-    std::complex<double> eye(0.0, 1.0);
-    for (int j = 0; j < OrbitalSetSize; j++)
-    {
-      std::complex<double> u;
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu;
-      Tensor<std::complex<double>, OHMMS_DIM> hs, tmphs;
-      u     = storage_value_vector_[j];
-      gradu = dot(PrimLattice.G, storage_grad_vector_[j]);
-      // tmphs = dot(transpose(PrimLattice.G),storage_hess_vector_[j]);
-      tmphs = dot(PrimLattice.G, storage_hess_vector_[j]);
-      hs    = dot(tmphs, PrimLattice.Gt);
-      //laplu = trace(storage_hess_vector_[j], GGt);
-      PosType k = kPoints[j];
-      TinyVector<std::complex<double>, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      psi(i, j) = e_mikr * u;
-      //psi(j,i) = e_mikr * u;
-      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
-      //convertVec(e_mikr*(-eye*u*ck + gradu), dpsi(i,j));
-      grad_grad_psi(i, j) =
-          e_mikr * (hs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-    }
-  }
-  VGLMatTimer.stop();
-}
-
-template<>
-void EinsplineSetExtended<double>::evaluate_notranspose(const ParticleSet& P,
-                                                        int first,
-                                                        int last,
-                                                        ComplexValueMatrix& psi,
-                                                        ComplexGradMatrix& dpsi,
-                                                        ComplexHessMatrix& grad_grad_psi,
-                                                        ComplexGGGMatrix& grad_grad_grad_logdet)
-{
-  APP_ABORT(
-      " EinsplineSetExtended<StorageType>::evaluate_notranspose not implemented for grad_grad_grad_logdet yet. \n");
-}
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::evaluate_notranspose(const ParticleSet& P,
-                                                             int first,
-                                                             int last,
-                                                             ComplexValueMatrix& psi,
-                                                             ComplexGradMatrix& dpsi,
-                                                             ComplexHessMatrix& grad_grad_psi,
-                                                             ComplexGGGMatrix& grad_grad_grad_logdet)
-{
-  //      APP_ABORT(" EinsplineSetExtended<StorageType>::evaluate_notranspose not implemented for grad_grad_grad_logdet yet. \n");
-  //    VGLMatTimer.start();
-  for (int iat = first, i = 0; iat < last; iat++, i++)
-  {
-    const PosType& r(P.activeR(iat));
-    PosType ru(PrimLattice.toUnit(r));
-    for (int n = 0; n < OHMMS_DIM; n++)
-      ru[n] -= std::floor(ru[n]);
-    EinsplineTimer.start();
-    EinsplineMultiEval(MultiSpline, ru, storage_value_vector_, storage_grad_vector_, storage_hess_vector_,
-                       storage_grad_hess_vector_);
-    EinsplineTimer.stop();
-    Tensor<double, OHMMS_DIM> PG;
-    PG = PrimLattice.G;
-    Tensor<double, OHMMS_DIM> TPG;
-    TPG = transpose(PrimLattice.G);
-    Tensor<std::complex<double>, OHMMS_DIM> hs, tmphs;
-    TinyVector<Tensor<std::complex<double>, OHMMS_DIM>, OHMMS_DIM> tmpghs, hvdot;
-    for (int j = 0; j < NumValenceOrbs; j++)
-    {
-      storage_grad_vector_[j] = dot(PG, storage_grad_vector_[j]);
-      tmphs                   = dot(PG, storage_hess_vector_[j]);
-      storage_hess_vector_[j] = dot(tmphs, TPG);
-      for (int n = 0; n < OHMMS_DIM; n++)
-      {
-        tmpghs[n]                       = dot(PG, storage_grad_hess_vector_[j][n]);
-        storage_grad_hess_vector_[j][n] = dot(tmpghs[n], TPG);
-      }
-      storage_grad_hess_vector_[j] = dot(PG, storage_grad_hess_vector_[j]);
-      //              grad_grad_grad_logdet(i,j)=storage_grad_hess_vector_[j];
-      //              grad_grad_psi(i,j)=storage_hess_vector_[j];
-      //              dpsi(i,j)=storage_grad_vector_[j];
-      //              psi(i,j)=storage_value_vector_[j];
-    }
-    const std::complex<double> eye(0.0, 1.0);
-    const std::complex<double> meye(0.0, -1.0);
-
-    // NumValenceOrbs appears to be the same as OrbitalSetSize in some code paths.
-    // In order to handle orbital rotation where OrbitalSetSize is not
-    // the same as the number of columns of psi, OrbitalSetSize was
-    // replaced with the columns of psi in other places.
-    // The minimum is used here just in case NumValenceOrbs might be smaller.
-    size_t loop_bound = NumValenceOrbs < psi.cols() ? NumValenceOrbs : psi.cols();
-    for (int j = 0; j < loop_bound; j++)
-    {
-      std::complex<double> u(storage_value_vector_[j]);
-      TinyVector<std::complex<double>, OHMMS_DIM> gradu(storage_grad_vector_[j]);
-      tmphs     = storage_hess_vector_[j];
-      tmpghs    = storage_grad_hess_vector_[j];
-      PosType k = kPoints[j];
-      TinyVector<double, OHMMS_DIM> ck;
-      for (int n = 0; n < OHMMS_DIM; n++)
-        ck[n] = k[n];
-      double s, c;
-      double phase = -dot(r, k);
-      qmcplusplus::sincos(phase, &s, &c);
-      std::complex<double> e_mikr(c, s);
-      psi(i, j)  = e_mikr * u;
-      dpsi(i, j) = e_mikr * (-eye * u * ck + gradu);
-      grad_grad_psi(i, j) =
-          e_mikr * (tmphs - u * outerProduct(ck, ck) - eye * outerProduct(ck, gradu) - eye * outerProduct(gradu, ck));
-      //Is this right?
-      storage_grad_hess_vector_[j] *= e_mikr;
-      for (unsigned a0(0); a0 < OHMMS_DIM; a0++)
-        for (unsigned a1(0); a1 < OHMMS_DIM; a1++)
-          for (unsigned a2(0); a2 < OHMMS_DIM; a2++)
-            storage_grad_hess_vector_[j][a0](a1, a2) += e_mikr *
-                (meye * (ck[a0] * tmphs(a1, a2) + ck[a1] * tmphs(a0, a2) + ck[a2] * tmphs(a0, a1)) -
-                 (ck[a0] * ck[a1] * gradu[a2] + ck[a0] * ck[a2] * gradu[a1] + ck[a1] * ck[a2] * gradu[a0]) +
-                 eye * ck[a0] * ck[a1] * ck[a2] * u);
-      grad_grad_grad_logdet(i, j) = storage_grad_hess_vector_[j];
-    }
-  }
-}
-
-#endif
-
-template<typename StorageType>
-std::string EinsplineSetExtended<StorageType>::Type()
-{
-  return "EinsplineSetExtended";
-}
-
-
-template<typename StorageType>
-void EinsplineSetExtended<StorageType>::registerTimers()
-{
-  ValueTimer.reset();
-  VGLTimer.reset();
-  VGLMatTimer.reset();
-  EinsplineTimer.reset();
-}
-
-
-template<typename StorageType>
-std::unique_ptr<SPOSet> EinsplineSetExtended<StorageType>::makeClone() const
-{
-  auto clone = std::make_unique<EinsplineSetExtended<StorageType>>(*this);
-  clone->registerTimers();
-  for (int iat = 0; iat < clone->AtomicOrbitals.size(); iat++)
-    clone->AtomicOrbitals[iat].registerTimers();
-  return clone;
-}
-
-template class EinsplineSetExtended<std::complex<double>>;
-template class EinsplineSetExtended<double>;
-} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/EinsplineSet.h b/src/QMCWaveFunctions/EinsplineSet.h
deleted file mode 100644
index b4c65a0b8e..0000000000
--- a/src/QMCWaveFunctions/EinsplineSet.h
+++ /dev/null
@@ -1,359 +0,0 @@
-//////////////////////////////////////////////////////////////////////////////////////
-// This file is distributed under the University of Illinois/NCSA Open Source License.
-// See LICENSE file in top directory for details.
-//
-// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
-//
-// File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//                    Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
-//                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
-//                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
-//                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
-//                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
-//
-// File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
-//////////////////////////////////////////////////////////////////////////////////////
-
-
-#ifndef QMCPLUSPLUS_EINSPLINE_SET_H
-#define QMCPLUSPLUS_EINSPLINE_SET_H
-
-#include "Configuration.h"
-#include "QMCWaveFunctions/BasisSetBase.h"
-#include "QMCWaveFunctions/SPOSet.h"
-#include "QMCWaveFunctions/AtomicOrbital.h"
-#include "Utilities/TimerManager.h"
-#include "spline/einspline_engine.hpp"
-
-namespace qmcplusplus
-{
-class EinsplineSetBuilder;
-
-class EinsplineSet : public SPOSet
-{
-  friend class EinsplineSetBuilder;
-
-public:
-  //////////////////////
-  // Type definitions //
-  //////////////////////
-  using UnitCellType = CrystalLattice<ParticleSet::Scalar_t, OHMMS_DIM>;
-
-  ///////////
-  // Flags //
-  ///////////
-  /// True if all Lattice is diagonal, i.e. 90 degree angles
-  bool Orthorhombic;
-  /// True if we are using localize orbitals
-  bool Localized;
-  /// True if we are tiling the primitive cell
-  bool Tiling;
-
-  //////////////////////////
-  // Lattice and geometry //
-  //////////////////////////
-  TinyVector<int, 3> TileFactor;
-  Tensor<int, OHMMS_DIM> TileMatrix;
-  UnitCellType SuperLattice, PrimLattice;
-  /// The "Twist" variables are in reduced coords, i.e. from 0 to1.
-  /// The "k" variables are in Cartesian coordinates.
-  PosType TwistVector, kVector;
-  /// This stores which "true" twist vector this clone is using.
-  /// "True" indicates the physical twist angle after untiling
-  int TwistNum;
-  /// metric tensor to handle generic unitcell
-  Tensor<RealType, OHMMS_DIM> GGt;
-
-  int NumValenceOrbs;
-
-public:
-  UnitCellType GetLattice();
-  void resetSourceParticleSet(ParticleSet& ions);
-  void setOrbitalSetSize(int norbs) override;
-  inline std::string Type() { return "EinsplineSet"; }
-  EinsplineSet(const std::string& my_name) : SPOSet(my_name), TwistNum(0), NumValenceOrbs(0) {}
-
-  virtual std::string getClassName() const override { return "EinsplineSet"; }
-};
-
-////////////////////////////////////////////////////////////////////
-// This is just a template trick to avoid template specialization //
-// in EinsplineSetExtended.                                       //
-////////////////////////////////////////////////////////////////////
-template<typename StorageType, int dim>
-struct MultiOrbitalTraits
-{};
-
-template<>
-struct MultiOrbitalTraits<double, 2>
-{
-  using SplineType = multi_UBspline_2d_d;
-};
-
-template<>
-struct MultiOrbitalTraits<std::complex<double>, 2>
-{
-  using SplineType = multi_UBspline_2d_z;
-};
-
-template<>
-struct MultiOrbitalTraits<float, 2>
-{
-  using SplineType = multi_UBspline_2d_s;
-};
-
-template<>
-struct MultiOrbitalTraits<std::complex<float>, 2>
-{
-  using SplineType = multi_UBspline_2d_c;
-};
-
-template<>
-struct MultiOrbitalTraits<double, 3>
-{
-  using SplineType = multi_UBspline_3d_d;
-  using BCType     = BCtype_d;
-  using DataType   = double;
-};
-
-template<>
-struct MultiOrbitalTraits<std::complex<double>, 3>
-{
-  using SplineType = multi_UBspline_3d_z;
-  using BCType     = BCtype_z;
-  using DataType   = std::complex<double>;
-};
-
-
-template<>
-struct MultiOrbitalTraits<float, 3>
-{
-  using SplineType = multi_UBspline_3d_s;
-  using BCType     = BCtype_s;
-  using DataType   = float;
-};
-
-template<>
-struct MultiOrbitalTraits<std::complex<float>, 3>
-{
-  using SplineType = multi_UBspline_3d_c;
-  using BCType     = BCtype_c;
-  using DataType   = std::complex<float>;
-};
-
-////////////////////////////////////////////////////////////////////
-// Template class for evaluating multiple extended Bloch orbitals //
-// quickly.  Currently uses einspline library.                    //
-////////////////////////////////////////////////////////////////////
-template<typename StorageType>
-class EinsplineSetExtended : public EinsplineSet
-{
-  friend class EinsplineSetBuilder;
-
-protected:
-  //////////////////////
-  // Type definitions //
-  //////////////////////
-  //using UnitCellType = CrystalLattice<RealType,OHMMS_DIM>;
-  using SplineType = typename MultiOrbitalTraits<StorageType, OHMMS_DIM>::SplineType;
-  using BCType     = typename MultiOrbitalTraits<StorageType, OHMMS_DIM>::BCType;
-
-  using StorageValueVector    = typename OrbitalSetTraits<StorageType>::ValueVector;
-  using StorageGradVector     = typename OrbitalSetTraits<StorageType>::GradVector;
-  using StorageHessVector     = typename OrbitalSetTraits<StorageType>::HessVector;
-  using StorageGradHessVector = typename OrbitalSetTraits<StorageType>::GradHessVector;
-  using RealValueVector       = Vector<double>;
-  using ComplexValueVector    = Vector<std::complex<double>>;
-  using RealGradVector        = Vector<TinyVector<double, OHMMS_DIM>>;
-  using ComplexGradVector     = Vector<TinyVector<std::complex<double>, OHMMS_DIM>>;
-  using RealHessType          = Tensor<double, OHMMS_DIM>;
-  using ComplexHessType       = Tensor<std::complex<double>, OHMMS_DIM>;
-  using RealHessVector        = Vector<RealHessType>;
-  using RealHessMatrix        = Matrix<RealHessType>;
-  using ComplexHessVector     = Vector<ComplexHessType>;
-  using ComplexHessMatrix     = Matrix<ComplexHessType>;
-  using RealValueMatrix       = Matrix<double>;
-  using ComplexValueMatrix    = Matrix<std::complex<double>>;
-  using RealGradMatrix        = Matrix<TinyVector<double, OHMMS_DIM>>;
-  using ComplexGradMatrix     = Matrix<TinyVector<std::complex<double>, OHMMS_DIM>>;
-  using RealGGGType           = TinyVector<RealHessType, 3>;
-  using RealGGGVector         = Vector<RealGGGType>;
-  using RealGGGMatrix         = Matrix<RealGGGType>;
-  using ComplexGGGType        = TinyVector<ComplexHessType, 3>;
-  using ComplexGGGVector      = Vector<ComplexGGGType>;
-  using ComplexGGGMatrix      = Matrix<ComplexGGGType>;
-
-  /////////////////////////////
-  /// Orbital storage object //
-  /////////////////////////////
-  SplineType* MultiSpline;
-
-  //////////////////////////////////////
-  // Radial/Ylm orbitals around atoms //
-  //////////////////////////////////////
-  std::vector<AtomicOrbital<StorageType>> AtomicOrbitals;
-
-  // First-order derivative w.r.t. the ion positions
-  std::vector<TinyVector<SplineType*, OHMMS_DIM>> FirstOrderSplines;
-  // Temporary storage for Eispline calls
-  StorageValueVector storage_value_vector_, storage_lapl_vector_;
-  StorageGradVector storage_grad_vector_;
-  StorageHessVector storage_hess_vector_;
-  StorageGradHessVector storage_grad_hess_vector_;
-
-  // True if we should unpack this orbital into two copies
-  std::vector<bool> MakeTwoCopies;
-  /** kpoints for each unique orbitals.
-   * Note: for historic reason, this sign is opposite to what was used in DFT when orbitals were generated.
-   * Changing the sign requires updating all the evaluation code.
-   */
-  Vector<TinyVector<double, OHMMS_DIM>> kPoints;
-
-  ///////////////////
-  // Phase factors //
-  ///////////////////
-  Vector<double> phase;
-  Vector<std::complex<double>> eikr;
-  void computePhaseFactors(const TinyVector<double, OHMMS_DIM>& r);
-  // For running at half G-vectors with real orbitals;
-  // 0 if the twist is zero, 1 if the twist is G/2.
-  TinyVector<int, OHMMS_DIM> HalfG;
-
-  ////////////
-  // Timers //
-  ////////////
-  NewTimer &ValueTimer, &VGLTimer, &VGLMatTimer;
-  NewTimer& EinsplineTimer;
-
-public:
-  /** create MultiSpline
-   * @param xyz_g grid data
-   * @param xyz_bc boundary conditions
-   */
-  template<typename GT, typename BCT>
-  void allocate(GT& xyz_g, BCT& xyz_bc, int nv)
-  {
-    SplineType* dummy = nullptr;
-    MultiSpline       = einspline::create(dummy, xyz_g, xyz_bc, nv);
-  }
-
-  inline void resizeStorage(int n, int nvals)
-  {
-    kPoints.resize(n);
-    MakeTwoCopies.resize(n);
-    storage_value_vector_.resize(n);
-    storage_lapl_vector_.resize(n);
-    storage_grad_vector_.resize(n);
-    storage_hess_vector_.resize(n);
-    storage_grad_hess_vector_.resize(n);
-    phase.resize(n);
-    eikr.resize(n);
-    NumValenceOrbs = nvals;
-  }
-
-#if !defined(QMC_COMPLEX)
-  // Real return values
-  void evaluateValue(const ParticleSet& P, int iat, RealValueVector& psi) override;
-  void evaluateVGL(const ParticleSet& P,
-                   int iat,
-                   RealValueVector& psi,
-                   RealGradVector& dpsi,
-                   RealValueVector& d2psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            RealValueMatrix& psi,
-                            RealGradMatrix& dpsi,
-                            RealValueMatrix& d2psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            RealValueMatrix& psi,
-                            RealGradMatrix& dpsi,
-                            RealHessMatrix& grad_grad_psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            RealValueMatrix& psi,
-                            RealGradMatrix& dpsi,
-                            RealHessMatrix& grad_grad_psi,
-                            RealGGGMatrix& grad_grad_grad_logdet) override;
-
-  //    void evaluate (const ParticleSet& P, const PosType& r, std::vector<double> &psi);
-  // This is the gradient of the orbitals w.r.t. the ion iat
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          RealGradMatrix& gradphi) override;
-  // Evaluate the gradient w.r.t. to ion iat of the gradient and
-  // laplacian of the orbitals w.r.t. the electrons
-  void evaluateGradSource(const ParticleSet& P,
-                          int first,
-                          int last,
-                          const ParticleSet& source,
-                          int iat_src,
-                          RealGradMatrix& dphi,
-                          RealHessMatrix& dgrad_phi,
-                          RealGradMatrix& dlaplphi) override;
-#else
-  // Complex return values
-  void evaluateValue(const ParticleSet& P, int iat, ComplexValueVector& psi) override;
-  void evaluateVGL(const ParticleSet& P,
-                   int iat,
-                   ComplexValueVector& psi,
-                   ComplexGradVector& dpsi,
-                   ComplexValueVector& d2psi) override;
-  void evaluateVGH(const ParticleSet& P,
-                   int iat,
-                   ComplexValueVector& psi,
-                   ComplexGradVector& dpsi,
-                   ComplexHessVector& grad_grad_psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ComplexValueMatrix& psi,
-                            ComplexGradMatrix& dpsi,
-                            ComplexValueMatrix& d2psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ComplexValueMatrix& psi,
-                            ComplexGradMatrix& dpsi,
-                            ComplexHessMatrix& grad_grad_psi) override;
-  void evaluate_notranspose(const ParticleSet& P,
-                            int first,
-                            int last,
-                            ComplexValueMatrix& psi,
-                            ComplexGradMatrix& dpsi,
-                            ComplexHessMatrix& grad_grad_psi,
-                            ComplexGGGMatrix& grad_grad_grad_logdet) override;
-#endif
-
-  void setOrbitalSetSize(int norbs) override;
-  std::string Type();
-
-  void registerTimers();
-  PosType get_k(int orb) override { return kPoints[orb]; }
-
-  virtual std::string getClassName() const override { return "EinsplineSetExtended"; }
-  bool hasIonDerivs() const override { return true; }
-
-  std::unique_ptr<SPOSet> makeClone() const override;
-
-  EinsplineSetExtended(const std::string& my_name)
-      : EinsplineSet(my_name),
-        MultiSpline(NULL),
-        ValueTimer(createGlobalTimer("EinsplineSetExtended::ValueOnly")),
-        VGLTimer(createGlobalTimer("EinsplineSetExtended::VGL")),
-        VGLMatTimer(createGlobalTimer("EinsplineSetExtended::VGLMatrix")),
-        EinsplineTimer(createGlobalTimer("libeinspline"))
-  {
-    for (int i = 0; i < OHMMS_DIM; i++)
-      HalfG[i] = 0;
-  }
-};
-
-} // namespace qmcplusplus
-#endif
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder.h b/src/QMCWaveFunctions/EinsplineSetBuilder.h
index edfaf9fc89..9e95677c48 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder.h
@@ -25,7 +25,6 @@
 
 #include "QMCWaveFunctions/SPOSetBuilder.h"
 #include "QMCWaveFunctions/BandInfo.h"
-#include "QMCWaveFunctions/AtomicOrbital.h"
 #include <filesystem>
 #include <map>
 
@@ -168,7 +167,6 @@ class EinsplineSetBuilder : public SPOSetBuilder
   TinyVector<int, 3> Version;
   std::string parameterGroup, ionsGroup, eigenstatesGroup;
   std::vector<int> Occ;
-  bool HasCoreOrbs;
   bool ReadOrbitalInfo(bool skipChecks = false);
   bool ReadOrbitalInfo_ESHDF(bool skipChecks = false);
   void BroadcastOrbitalInfo();
@@ -197,7 +195,7 @@ class EinsplineSetBuilder : public SPOSetBuilder
 
   Tensor<double, OHMMS_DIM> Lattice, RecipLattice, LatticeInv, SuperLattice, GGt;
   UnitCellType SuperCell, PrimCell, PrimCellInv;
-  int NumBands, NumElectrons, NumSpins, NumTwists, NumCoreStates;
+  int NumBands, NumElectrons, NumSpins, NumTwists;
   int MaxNumGvecs;
   double MeshFactor;
   RealType MatchingTol;
@@ -226,7 +224,7 @@ class EinsplineSetBuilder : public SPOSetBuilder
   std::vector<int> IncludeTwists, DistinctTwists;
   /// if false, splines are conceptually complex valued
   bool use_real_splines_;
-  int NumDistinctOrbitals, NumCoreOrbs, NumValenceOrbs;
+  int NumDistinctOrbitals;
   // This is true if the corresponding twist in DistinctTwists should
   // should be used to generate two distinct orbitals from the real and
   // imaginary parts.
@@ -240,21 +238,9 @@ class EinsplineSetBuilder : public SPOSetBuilder
 
   void CopyBands(int numOrbs);
 
-  /////////////////////////////
-  // Muffin-tin information  //
-  /////////////////////////////
-  int NumMuffinTins;
-  std::vector<double> MT_APW_radii;
-  std::vector<Vector<double>> MT_APW_rgrids;
-  std::vector<int> MT_APW_lmax;
-  std::vector<int> MT_APW_num_radial_points;
-  std::vector<TinyVector<double, OHMMS_DIM>> MT_centers;
-
   ////////////////////////////////
   // Atomic orbital information //
   ////////////////////////////////
-  std::vector<AtomicOrbital<std::complex<double>>> AtomicOrbitals;
-
   struct CenterInfo
   {
     std::vector<int> lmax, spline_npoints, GroupID;
@@ -281,8 +267,6 @@ class EinsplineSetBuilder : public SPOSetBuilder
   // This returns the path in the HDF5 file to the group for orbital
   // with twist ti and band bi
   std::string OrbitalPath(int ti, int bi);
-  std::string CoreStatePath(int ti, int bi);
-  std::string MuffinTinPath(int ti, int bi, int tin);
 
   /////////////////////////////////////////////////////////////
   // Information to avoid storing the same orbitals twice in //
@@ -298,9 +282,8 @@ class EinsplineSetBuilder : public SPOSetBuilder
   /** broadcast SortBands
    * @param N number of state
    * @param root true if it is the i/o node
-   * @return true, if core is found
    */
-  bool bcastSortBands(int splin, int N, bool root);
+  void bcastSortBands(int splin, int N, bool root);
 
   /** a specific but clean code path in createSPOSetFromXML, for PBC, double, ESHDF
    * @param cur the current xml node
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index f35ffe73aa..054e27d949 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -48,12 +48,10 @@ EinsplineSetBuilder::EinsplineSetBuilder(ParticleSet& p, const PSetMap& psets, C
       NumElectrons(0),
       NumSpins(0),
       NumTwists(0),
-      NumCoreStates(0),
       MeshFactor(1.0),
       MeshSize(0, 0, 0),
       twist_num_(-1),
       TileFactor(1, 1, 1),
-      NumMuffinTins(0),
       LastSpinSet(-1),
       NumOrbitalsRead(-1),
       makeRotations(false)
@@ -127,7 +125,6 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
   if (myComm->size() == 1)
     return;
   int numIons           = IonTypes.size();
-  int numAtomicOrbitals = AtomicOrbitals.size();
   int numDensityGvecs   = TargetPtcl.DensityReducedGvecs.size();
   PooledData<double> abuffer;
   PooledData<int> aibuffer;
@@ -142,8 +139,6 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
   aibuffer.add(NumSpins);                                //myComm->bcast(NumSpins);
   aibuffer.add(NumTwists);                               //myComm->bcast(NumTwists);
   aibuffer.add(numIons);                                 //myComm->bcast(numIons);
-  aibuffer.add(NumMuffinTins);
-  aibuffer.add(numAtomicOrbitals);
   aibuffer.add(numDensityGvecs);
   aibuffer.add(HaveOrbDerivs);
   myComm->bcast(abuffer);
@@ -163,26 +158,11 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
     aibuffer.get(NumSpins);
     aibuffer.get(NumTwists);
     aibuffer.get(numIons);
-    aibuffer.get(NumMuffinTins);
-    aibuffer.get(numAtomicOrbitals);
     aibuffer.get(numDensityGvecs);
     aibuffer.get(HaveOrbDerivs);
-    MT_APW_radii.resize(NumMuffinTins);
-    MT_APW_lmax.resize(NumMuffinTins);
-    MT_APW_rgrids.resize(NumMuffinTins);
-    MT_APW_num_radial_points.resize(NumMuffinTins);
-    MT_centers.resize(NumMuffinTins);
     TargetPtcl.DensityReducedGvecs.resize(numDensityGvecs);
     TargetPtcl.Density_G.resize(numDensityGvecs);
-    AtomicOrbitals.resize(numAtomicOrbitals);
   }
-  std::vector<int> rgrids_sizes(NumMuffinTins);
-  for (int tin = 0; tin < NumMuffinTins; tin++)
-    rgrids_sizes[tin] = MT_APW_rgrids[tin].size();
-  myComm->bcast(rgrids_sizes);
-  if (myComm->rank())
-    for (int tin = 0; tin < NumMuffinTins; tin++)
-      MT_APW_rgrids[tin].resize(rgrids_sizes[tin]);
   if (IonTypes.size() != numIons)
   {
     IonTypes.resize(numIons);
@@ -199,27 +179,9 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
   if (primcell_kpoints.size() != NumTwists)
     primcell_kpoints.resize(NumTwists);
   bbuffer.add(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
-  bbuffer.add(MT_APW_radii.begin(), MT_APW_radii.end());
-  bibuffer.add(MT_APW_lmax.begin(), MT_APW_lmax.end());
-  bibuffer.add(MT_APW_num_radial_points.begin(), MT_APW_num_radial_points.end());
-  bbuffer.add(&(MT_centers[0][0]), &(MT_centers[0][0]) + OHMMS_DIM * NumMuffinTins);
-  for (int i = 0; i < NumMuffinTins; i++)
-    bbuffer.add(MT_APW_rgrids[i].begin(), MT_APW_rgrids[i].end());
   bibuffer.add(&(TargetPtcl.DensityReducedGvecs[0][0]),
                &(TargetPtcl.DensityReducedGvecs[0][0]) + numDensityGvecs * OHMMS_DIM);
   bbuffer.add(&(TargetPtcl.Density_G[0]), &(TargetPtcl.Density_G[0]) + numDensityGvecs);
-  for (int iat = 0; iat < numAtomicOrbitals; iat++)
-  {
-    AtomicOrbital<std::complex<double>>& orb = AtomicOrbitals[iat];
-    bibuffer.add(orb.SplinePoints);
-    bibuffer.add(orb.PolyOrder);
-    bibuffer.add(orb.lMax);
-    bibuffer.add(orb.Numlm);
-    bbuffer.add(&orb.Pos[0], &orb.Pos[0] + OHMMS_DIM);
-    bbuffer.add(orb.CutoffRadius);
-    bbuffer.add(orb.SplineRadius);
-    bbuffer.add(orb.PolyRadius);
-  }
   myComm->bcast(bbuffer);
   myComm->bcast(bibuffer);
   if (myComm->rank())
@@ -230,27 +192,9 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
       bibuffer.get(IonTypes[i]);
     bbuffer.get(&IonPos[0][0], &IonPos[0][0] + OHMMS_DIM * numIons);
     bbuffer.get(&primcell_kpoints[0][0], &primcell_kpoints[0][0] + OHMMS_DIM * NumTwists);
-    bbuffer.get(MT_APW_radii.begin(), MT_APW_radii.end());
-    bibuffer.get(MT_APW_lmax.begin(), MT_APW_lmax.end());
-    bibuffer.get(MT_APW_num_radial_points.begin(), MT_APW_num_radial_points.end());
-    bbuffer.get(&(MT_centers[0][0]), &(MT_centers[0][0]) + OHMMS_DIM * NumMuffinTins);
-    for (int i = 0; i < NumMuffinTins; i++)
-      bbuffer.get(MT_APW_rgrids[i].begin(), MT_APW_rgrids[i].end());
     bibuffer.get(&(TargetPtcl.DensityReducedGvecs[0][0]),
                  &(TargetPtcl.DensityReducedGvecs[0][0]) + numDensityGvecs * OHMMS_DIM);
     bbuffer.get(&(TargetPtcl.Density_G[0]), &(TargetPtcl.Density_G[0]) + numDensityGvecs);
-    for (int iat = 0; iat < numAtomicOrbitals; iat++)
-    {
-      AtomicOrbital<std::complex<double>>& orb = AtomicOrbitals[iat];
-      bibuffer.get(orb.SplinePoints);
-      bibuffer.get(orb.PolyOrder);
-      bibuffer.get(orb.lMax);
-      bibuffer.get(orb.Numlm);
-      bbuffer.get(&orb.Pos[0], &orb.Pos[0] + OHMMS_DIM);
-      bbuffer.get(orb.CutoffRadius);
-      bbuffer.get(orb.SplineRadius);
-      bbuffer.get(orb.PolyRadius);
-    }
   }
   //buffer to bcast hybrid representation atomic orbital info
   PooledData<double> cbuffer;
@@ -714,7 +658,6 @@ void EinsplineSetBuilder::OccupyBands(int spin, int sortBands, int numOrbs, bool
     for (int bi = 0; bi < NumBands; bi++)
     {
       BandInfo band;
-      band.IsCoreState   = false;
       band.TwistIndex    = tindex;
       band.BandIndex     = bi;
       band.MakeTwoCopies = MakeTwoCopies[ti];
@@ -744,53 +687,31 @@ void EinsplineSetBuilder::OccupyBands(int spin, int sortBands, int numOrbs, bool
           SortBands.push_back(band);
       }
     }
-    // Now, read core states
-    for (int cs = 0; cs < NumCoreStates; cs++)
-    {
-      BandInfo band;
-      band.IsCoreState   = true;
-      band.TwistIndex    = tindex;
-      band.BandIndex     = cs;
-      band.MakeTwoCopies = MakeTwoCopies[ti];
-      H5File.read(band.Energy, CoreStatePath(ti, cs) + "eigenvalue");
-      if (band.Energy > -1.0e100)
-        SortBands.push_back(band);
-    }
   }
   int orbIndex        = 0;
   int numOrbs_counter = 0;
-  NumValenceOrbs      = 0;
-  NumCoreOrbs         = 0;
   while (numOrbs_counter < numOrbs)
   {
     if (SortBands[orbIndex].MakeTwoCopies)
       numOrbs_counter += 2;
     else
       numOrbs_counter++;
-    if (SortBands[orbIndex].IsCoreState)
-      NumCoreOrbs++;
-    else
-      NumValenceOrbs++;
     orbIndex++;
   }
   NumDistinctOrbitals = orbIndex;
   app_log() << "We will read " << NumDistinctOrbitals << " distinct orbitals.\n";
-  app_log() << "There are " << NumCoreOrbs << " core states and " << NumValenceOrbs << " valence states.\n";
 }
 
-bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
+void EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
 {
   std::vector<BandInfo>& SortBands(*FullBands[spin]);
 
-  TinyVector<int, 4> nbands(int(SortBands.size()), n, NumValenceOrbs, NumCoreOrbs);
+  TinyVector<int, 2> nbands(int(SortBands.size()), n);
   mpi::bcast(*myComm, nbands);
 
   //buffer to serialize BandInfo
-  PooledData<OHMMS_PRECISION_FULL> misc(nbands[0] * 5);
-  bool isCore = false;
+  PooledData<OHMMS_PRECISION_FULL> misc(nbands[0] * 4);
   n = NumDistinctOrbitals = nbands[1];
-  NumValenceOrbs          = nbands[2];
-  NumCoreOrbs             = nbands[3];
 
   if (root)
   {
@@ -801,9 +722,6 @@ bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
       misc.put(SortBands[i].BandIndex);
       misc.put(SortBands[i].Energy);
       misc.put(SortBands[i].MakeTwoCopies);
-      misc.put(SortBands[i].IsCoreState);
-
-      isCore |= SortBands[i].IsCoreState;
     }
 
     for (int i = n; i < SortBands.size(); ++i)
@@ -812,7 +730,6 @@ bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
       misc.put(SortBands[i].BandIndex);
       misc.put(SortBands[i].Energy);
       misc.put(SortBands[i].MakeTwoCopies);
-      misc.put(SortBands[i].IsCoreState);
     }
   }
   myComm->bcast(misc);
@@ -827,9 +744,6 @@ bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
       misc.get(SortBands[i].BandIndex);
       misc.get(SortBands[i].Energy);
       misc.get(SortBands[i].MakeTwoCopies);
-      misc.get(SortBands[i].IsCoreState);
-
-      isCore |= SortBands[i].IsCoreState;
     }
     for (int i = n; i < SortBands.size(); ++i)
     {
@@ -837,10 +751,8 @@ bool EinsplineSetBuilder::bcastSortBands(int spin, int n, bool root)
       misc.get(SortBands[i].BandIndex);
       misc.get(SortBands[i].Energy);
       misc.get(SortBands[i].MakeTwoCopies);
-      misc.get(SortBands[i].IsCoreState);
     }
   }
-  return isCore;
 }
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index 32f547a50b..5f855d6257 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -76,20 +76,15 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
     for (int j = 0; j < 3; j++)
       LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI);
   int have_dpsi         = false;
-  int NumAtomicOrbitals = 0;
-  NumCoreStates = NumMuffinTins = NumTwists = NumSpins = NumBands = NumAtomicOrbitals = 0;
+  NumTwists = NumSpins = NumBands = 0;
   NumElectrons = TargetPtcl.getTotalNum();
   H5File.read(NumBands, "/electrons/kpoint_0/spin_0/number_of_states");
-  H5File.readEntry(NumCoreStates, "/electrons/kpoint_0/spin_0/number_of_core_states");
   H5File.readEntry(NumSpins, "/electrons/number_of_spins");
   H5File.read(NumTwists, "/electrons/number_of_kpoints");
-  H5File.readEntry(NumMuffinTins, "/muffin_tins/number_of_tins");
   H5File.readEntry(have_dpsi, "/electrons/have_dpsi");
-  H5File.readEntry(NumAtomicOrbitals, "/electrons/number_of_atomic_orbitals");
   HaveOrbDerivs = have_dpsi;
   app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists
-            << ", muffin tins=" << NumMuffinTins << ", core states=" << NumCoreStates << std::endl;
-  app_log() << "atomic orbital=" << NumAtomicOrbitals << std::endl;
+            << std::endl;
   if (TileFactor[0] != 1 || TileFactor[1] != 1 || TileFactor[2] != 1)
     app_log() << "  Using a " << TileFactor[0] << "x" << TileFactor[1] << "x" << TileFactor[2] << " tiling factor.\n";
   //////////////////////////////////
@@ -229,38 +224,6 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
         AtomicCentersInfo.lmax[center_idx] = source_species(lmax_ind, my_GroupID);
     }
   }
-  /////////////////////////////////////
-  // Read atomic orbital information //
-  /////////////////////////////////////
-  AtomicOrbitals.resize(NumAtomicOrbitals);
-  for (int iat = 0; iat < NumAtomicOrbitals; iat++)
-  {
-    AtomicOrbital<std::complex<double>>& orb = AtomicOrbitals[iat];
-    int lmax, polynomial_order, spline_points;
-    RealType cutoff_radius, polynomial_radius, spline_radius;
-    PosType position;
-    double cutoff_radius_DP, polynomial_radius_DP, spline_radius_DP;
-    TinyVector<double, OHMMS_DIM> position_DP;
-    std::ostringstream groupstream;
-    groupstream << "/electrons/atomic_orbital_" << iat << "/";
-    std::string groupname = groupstream.str();
-    H5File.read(lmax, groupname + "lmax");
-    H5File.read(polynomial_order, groupname + "polynomial_order");
-    H5File.read(spline_points, groupname + "spline_points");
-    H5File.read(cutoff_radius_DP, groupname + "cutoff_radius");
-    H5File.read(polynomial_radius_DP, groupname + "polynomial_radius");
-    H5File.read(spline_radius_DP, groupname + "spline_radius");
-    H5File.read(position_DP, groupname + "position");
-    cutoff_radius     = cutoff_radius_DP;
-    polynomial_radius = polynomial_radius_DP;
-    spline_radius     = spline_radius_DP;
-    position          = position_DP;
-    orb.set_pos(position);
-    orb.set_lmax(lmax);
-    orb.set_cutoff(cutoff_radius);
-    orb.set_spline(spline_radius, spline_points);
-    orb.set_polynomial(polynomial_radius, polynomial_order);
-  }
   ///////////////////////////
   // Read the twist angles //
   ///////////////////////////
@@ -398,7 +361,6 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
     for (int bi = 0; bi < NumBands; bi++)
     {
       BandInfo band;
-      band.IsCoreState   = false;
       band.TwistIndex    = tindex;
       band.BandIndex     = bi;
       band.MakeTwoCopies = MakeTwoCopies[ti];
@@ -410,22 +372,6 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
       else
         maxOrbs++;
     }
-    // Now, read core states
-    for (int cs = 0; cs < NumCoreStates; cs++)
-    {
-      BandInfo band;
-      band.IsCoreState   = true;
-      band.TwistIndex    = tindex;
-      band.BandIndex     = cs;
-      band.MakeTwoCopies = MakeTwoCopies[ti];
-      H5File.read(band.Energy, CoreStatePath(ti, cs) + "eigenvalue");
-      if (band.Energy > -1.0e100)
-        SortBands.push_back(band);
-      if (MakeTwoCopies[ti])
-        maxOrbs += 2;
-      else
-        maxOrbs++;
-    }
   }
 
   app_log() << SortBands.size() << " complex-valued orbitals supplied by h5 can be expanded up to " << maxOrbs
@@ -594,23 +540,16 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
   //}
   int orbIndex        = 0;
   int numOrbs_counter = 0;
-  NumValenceOrbs      = 0;
-  NumCoreOrbs         = 0;
   while (numOrbs_counter < numOrbs)
   {
     if (SortBands[orbIndex].MakeTwoCopies)
       numOrbs_counter += 2;
     else
       numOrbs_counter++;
-    if (SortBands[orbIndex].IsCoreState)
-      NumCoreOrbs++;
-    else
-      NumValenceOrbs++;
     orbIndex++;
   }
   NumDistinctOrbitals = orbIndex;
   app_log() << "We will read " << NumDistinctOrbitals << " distinct complex-valued orbitals from h5.\n";
-  app_log() << "There are " << NumCoreOrbs << " core states and " << NumValenceOrbs << " valence states.\n";
 }
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
index c4d316445a..0ba6be3cb0 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
@@ -97,39 +97,15 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
       LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI);
-  NumCoreStates = NumMuffinTins = 0;
   H5File.read(NumBands, parameterGroup + "/num_bands");
-  H5File.read(NumCoreStates, parameterGroup + "/num_core_states");
   H5File.read(NumElectrons, parameterGroup + "/num_electrons");
   H5File.read(NumSpins, parameterGroup + "/num_spins");
   H5File.read(NumTwists, parameterGroup + "/num_twists");
-  H5File.read(NumMuffinTins, parameterGroup + "/muffin_tins/num_tins");
   app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists
-            << ", muffin tins=" << NumMuffinTins << std::endl;
+            << std::endl;
   if (TileFactor[0] != 1 || TileFactor[1] != 1 || TileFactor[2] != 1)
     app_log() << "  Using a " << TileFactor[0] << "x" << TileFactor[1] << "x" << TileFactor[2] << " tiling factor.\n";
-  /////////////////////////////////
-  // Read muffin tin information //
-  /////////////////////////////////
-  MT_APW_radii.resize(NumMuffinTins);
-  MT_APW_rgrids.resize(NumMuffinTins);
-  MT_APW_lmax.resize(NumMuffinTins);
-  MT_APW_num_radial_points.resize(NumMuffinTins);
-  MT_centers.resize(NumMuffinTins);
-  for (int tin = 0; tin < NumMuffinTins; tin++)
-  {
-    std::ostringstream MTstream;
-    if (NumMuffinTins > 1)
-      MTstream << parameterGroup << "/muffin_tins/muffin_tin_" << tin;
-    else
-      MTstream << parameterGroup << "/muffin_tins/muffin_tin";
-    std::string MTgroup = MTstream.str();
-    H5File.read(MT_APW_lmax[tin], MTgroup + "/lmax");
-    H5File.read(MT_APW_num_radial_points[tin], MTgroup + "/num_radial_points");
-    H5File.read(MT_APW_radii[tin], MTgroup + "/radius");
-    H5File.read(MT_centers[tin], MTgroup + "/center");
-    H5File.read(MT_APW_rgrids[tin], MTgroup + "/r");
-  }
+
   //////////////////////////////////
   // Read ion types and locations //
   //////////////////////////////////
@@ -188,48 +164,4 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
   return true;
 }
 
-
-std::string EinsplineSetBuilder::OrbitalPath(int ti, int bi)
-{
-  std::string eigenstatesGroup;
-  if (Version[0] == 0 && Version[1] == 11)
-    eigenstatesGroup = "/eigenstates_3";
-  else if (Version[0] == 0 && Version[1] == 20)
-    eigenstatesGroup = "/eigenstates";
-  std::ostringstream groupPath;
-  if ((Version[0] == 0 && Version[1] == 11) || NumTwists > 1)
-    groupPath << eigenstatesGroup << "/twist_" << ti << "/band_" << bi << "/";
-  else if (NumBands > 1)
-    groupPath << eigenstatesGroup << "/twist/band_" << bi << "/";
-  else
-    groupPath << eigenstatesGroup << "/twist/band/";
-  return groupPath.str();
-}
-
-std::string EinsplineSetBuilder::CoreStatePath(int ti, int cs)
-{
-  std::string eigenstatesGroup;
-  if (Version[0] == 0 && Version[1] == 11)
-    eigenstatesGroup = "/eigenstates_3";
-  else if (Version[0] == 0 && Version[1] == 20)
-    eigenstatesGroup = "/eigenstates";
-  std::ostringstream groupPath;
-  if ((Version[0] == 0 && Version[1] == 11) || NumTwists > 1)
-    groupPath << eigenstatesGroup << "/twist_" << ti << "/core_state_" << cs << "/";
-  else if (NumBands > 1)
-    groupPath << eigenstatesGroup << "/twist/core_state_" << cs << "/";
-  else
-    groupPath << eigenstatesGroup << "/twist/core_state/";
-  return groupPath.str();
-}
-
-std::string EinsplineSetBuilder::MuffinTinPath(int ti, int bi, int tin)
-{
-  std::ostringstream groupPath;
-  if (NumMuffinTins > 0)
-    groupPath << OrbitalPath(ti, bi) << "muffin_tin_" << tin << "/";
-  else
-    groupPath << OrbitalPath(ti, bi) << "muffin_tin/";
-  return groupPath.str();
-}
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
index 223e689f58..e610b6bac8 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
@@ -29,9 +29,6 @@
 #include <fftw3.h>
 #include "Utilities/ProgressReportEngine.h"
 #include "QMCWaveFunctions/einspline_helper.hpp"
-#if !defined(MIXED_PRECISION)
-#include "QMCWaveFunctions/EinsplineSet.h"
-#endif
 #include "QMCWaveFunctions/BsplineFactory/BsplineReaderBase.h"
 #include "QMCWaveFunctions/BsplineFactory/BsplineSet.h"
 #include "QMCWaveFunctions/BsplineFactory/createBsplineReader.h"
@@ -104,8 +101,6 @@ void EinsplineSetBuilder::set_metadata(int numOrbs,
   PrimCell.set(Lattice);
   SuperCell.set(SuperLattice);
   GGt = dot(transpose(PrimCell.G), PrimCell.G);
-  for (int iat = 0; iat < AtomicOrbitals.size(); iat++)
-    AtomicOrbitals[iat].Lattice.set(Lattice);
 
   // Now, analyze the k-point mesh to figure out the what k-points  are needed
   AnalyzeTwists2(twist_num_inp, twist_inp);
@@ -149,7 +144,6 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
     a.add(GPUsharing, "gpusharing"); // split spline across GPUs visible per rank
     a.add(spo_prec, "precision");
     a.add(truncate, "truncate");
-    a.add(use_einspline_set_extended, "use_old_spline");
     a.add(myName, "tag");
     a.add(skip_checks, "skip_checks");
 
@@ -270,9 +264,6 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
     kid = kid->next;
   }
 
-  if (has_backflow && use_einspline_set_extended == "yes" && use_real_splines_)
-    myComm->barrier_and_abort("backflow optimization is broken with use_real_splines_");
-
   //////////////////////////////////
   // Create the OrbitalSet object
   //////////////////////////////////
@@ -316,104 +307,11 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
   MixedSplineReader->setCommon(XMLRoot);
   // temporary disable the following function call, Ye Luo
   // RotateBands_ESHDF(spinSet, dynamic_cast<EinsplineSetExtended<std::complex<double> >*>(OrbitalSet));
-  HasCoreOrbs     = bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0);
+  bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0);
   auto OrbitalSet = MixedSplineReader->create_spline_set(spinSet, spo_cur);
   if (!OrbitalSet)
     myComm->barrier_and_abort("Failed to create SPOSet*");
-#if defined(MIXED_PRECISION)
-  if (use_einspline_set_extended == "yes")
-    myComm->barrier_and_abort("Option use_old_spline is not supported by the mixed precision build!");
-#else
-  if (use_einspline_set_extended == "yes")
-  {
-    std::unique_ptr<EinsplineSet> new_OrbitalSet;
-    if (use_real_splines_)
-    {
-      auto temp_OrbitalSet                      = std::make_unique<EinsplineSetExtended<double>>(spo_object_name);
-      temp_OrbitalSet->MultiSpline              = MixedSplineReader->export_MultiSplineDouble().release();
-      temp_OrbitalSet->MultiSpline->num_splines = NumDistinctOrbitals;
-      temp_OrbitalSet->resizeStorage(NumDistinctOrbitals, NumValenceOrbs);
-      //set the flags for anti periodic boundary conditions
-      temp_OrbitalSet->HalfG = dynamic_cast<BsplineSet&>(*OrbitalSet).getHalfG();
-      new_OrbitalSet         = std::move(temp_OrbitalSet);
-    }
-    else
-    {
-      auto temp_OrbitalSet         = std::make_unique<EinsplineSetExtended<std::complex<double>>>(spo_object_name);
-      temp_OrbitalSet->MultiSpline = MixedSplineReader->export_MultiSplineComplexDouble().release();
-      temp_OrbitalSet->MultiSpline->num_splines = NumDistinctOrbitals;
-      temp_OrbitalSet->resizeStorage(NumDistinctOrbitals, NumValenceOrbs);
-      for (int iorb = 0, num = 0; iorb < NumDistinctOrbitals; iorb++)
-      {
-        int ti                               = (*FullBands[spinSet])[iorb].TwistIndex;
-        temp_OrbitalSet->kPoints[iorb]       = PrimCell.k_cart(-primcell_kpoints[ti]);
-        temp_OrbitalSet->MakeTwoCopies[iorb] = (num < (numOrbs - 1)) && (*FullBands[spinSet])[iorb].MakeTwoCopies;
-        num += temp_OrbitalSet->MakeTwoCopies[iorb] ? 2 : 1;
-      }
-      new_OrbitalSet = std::move(temp_OrbitalSet);
-    }
-    //set the internal parameters
-    setTiling(new_OrbitalSet.get(), numOrbs);
-    OrbitalSet = std::move(new_OrbitalSet);
-  }
-#endif
   app_log() << "Time spent in creating B-spline SPOs " << mytimer.elapsed() << "sec" << std::endl;
-#ifdef Ye_debug
-#ifndef QMC_COMPLEX
-  if (myComm->rank() == 0 && OrbitalSet->MuffinTins.size() > 0)
-  {
-    FILE* fout = fopen("TestMuffins.dat", "w");
-    Vector<double> phi(numOrbs), lapl(numOrbs);
-    Vector<PosType> grad(numOrbs);
-    ParticleSet P;
-    P.R.resize(6);
-    for (int i = 0; i < P.R.size(); i++)
-      P.R[i] = PosType(0.0, 0.0, 0.0);
-    PosType N = 0.25 * PrimCell.a(0) + 0.25 * PrimCell.a(1) + 0.25 * PrimCell.a(2);
-    for (double x = -1.0; x <= 1.0; x += 0.0000500113412)
-    {
-      // for (double x=-0.003; x<=0.003; x+=0.0000011329343481381) {
-      P.R[0]    = x * (PrimCell.a(0) + 0.914 * PrimCell.a(1) + 0.781413 * PrimCell.a(2));
-      double r  = std::sqrt(dot(P.R[0], P.R[0]));
-      double rN = std::sqrt(dot(P.R[0] - N, P.R[0] - N));
-      OrbitalSet->evaluate(P, 0, phi, grad, lapl);
-      // OrbitalSet->evaluate(P, 0, phi);
-      fprintf(fout, "%1.12e ", r * x / std::abs(x));
-      for (int j = 0; j < numOrbs; j++)
-      {
-        double gmag = std::sqrt(dot(grad[j], grad[j]));
-        fprintf(fout, "%16.12e ",
-                /*phi[j]*phi[j]**/ (-5.0 / r - 0.5 * lapl[j] / phi[j]));
-        // double E = -5.0/r -0.5*lapl[j]/phi[j];
-        fprintf(fout, "%16.12e ", phi[j]);
-        fprintf(fout, "%16.12e ", gmag);
-      }
-      fprintf(fout, "\n");
-    }
-    fclose(fout);
-  }
-#endif
-#endif
-  //if (sourceName.size() && (ParticleSets.find(sourceName) == ParticleSets.end()))
-  //{
-  //  app_log() << "  EinsplineSetBuilder creates a ParticleSet " << sourceName << std::endl;
-  //  ParticleSet* ions=new ParticleSet;
-  //  ions->Lattice=TargetPtcl.Lattice;
-  //  ESHDFIonsParser ap(*ions,H5FileID,myComm);
-  //  ap.put(XMLRoot);
-  //  ap.expand(TileMatrix);
-  //  ions->setName(sourceName);
-  //  ParticleSets[sourceName]=ions;
-  //  //overwrite the lattice and assign random
-  //  if(TargetPtcl.Lattice.SuperCellEnum)
-  //  {
-  //    TargetPtcl.Lattice=ions->Lattice;
-  //    makeUniformRandom(TargetPtcl.R);
-  //    TargetPtcl.R.setUnit(PosUnit::LatticeUnit);
-  //    TargetPtcl.convert2Cart(TargetPtcl.R);
-  //    TargetPtcl.createSK();
-  //  }
-  //}
   OrbitalSet->finalizeConstruction();
   SPOSetMap[aset] = OrbitalSet.get();
   return OrbitalSet;
diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
index 85a2bf4509..c028ccc630 100644
--- a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
+++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
@@ -199,12 +199,12 @@ std::unique_ptr<SPOSet> EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt
   MixedSplineReader->setRotate(false);
 
   //Make the up spin set.
-  HasCoreOrbs       = bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0);
+  bcastSortBands(spinSet, NumDistinctOrbitals, myComm->rank() == 0);
   auto bspline_zd_u = MixedSplineReader->create_spline_set(spinSet, spo_cur);
 
   //Make the down spin set.
   OccupyBands(spinSet2, sortBands, numOrbs, skipChecks);
-  HasCoreOrbs       = bcastSortBands(spinSet2, NumDistinctOrbitals, myComm->rank() == 0);
+  bcastSortBands(spinSet2, NumDistinctOrbitals, myComm->rank() == 0);
   auto bspline_zd_d = MixedSplineReader->create_spline_set(spinSet2, spo_cur);
 
   //register with spin set and we're off to the races.

From b1de0bc997eaf7f91ef017af32aea29da068a825 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 23:06:28 -0500
Subject: [PATCH 05/65] Remove TileFactor

---
 src/QMCWaveFunctions/EinsplineSetBuilder.h    | 28 +++----------------
 .../EinsplineSetBuilderCommon.cpp             |  3 +-
 .../EinsplineSetBuilderESHDF.fft.cpp          |  2 --
 .../EinsplineSetBuilderOld.cpp                |  2 --
 .../EinsplineSetBuilder_createSPOs.cpp        |  8 +++---
 5 files changed, 9 insertions(+), 34 deletions(-)

diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder.h b/src/QMCWaveFunctions/EinsplineSetBuilder.h
index 9e95677c48..f7794dbba4 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder.h
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder.h
@@ -177,22 +177,6 @@ class EinsplineSetBuilder : public SPOSetBuilder
    */
   bool ReadGvectors_ESHDF();
 
-  /** set tiling properties of oset
-   * @param oset spline-orbital engine to be initialized
-   * @param numOrbs number of orbitals that belong to oset
-   */
-  template<typename SPE>
-  inline void setTiling(SPE* oset, int numOrbs)
-  {
-    oset->TileFactor = TileFactor;
-    oset->Tiling     = (TileFactor[0] * TileFactor[1] * TileFactor[2] != 1);
-    oset->PrimLattice.set(Lattice);
-    oset->SuperLattice.set(SuperLattice);
-    oset->GGt = GGt;
-    oset->setOrbitalSetSize(numOrbs);
-  }
-
-
   Tensor<double, OHMMS_DIM> Lattice, RecipLattice, LatticeInv, SuperLattice, GGt;
   UnitCellType SuperCell, PrimCell, PrimCellInv;
   int NumBands, NumElectrons, NumSpins, NumTwists;
@@ -214,12 +198,9 @@ class EinsplineSetBuilder : public SPOSetBuilder
   int twist_num_;
   // primitive cell k-points from DFT calculations
   std::vector<TinyVector<double, OHMMS_DIM>> primcell_kpoints;
-
-  TinyVector<int, OHMMS_DIM> TileFactor;
+  // primitive cell to supercell tiling matrix
   Tensor<int, OHMMS_DIM> TileMatrix;
-  TinyVector<int, OHMMS_DIM> TwistMesh;
-  // This vector stores which twist indices will be used by this
-  // clone
+  // This vector stores which twist indices will be used by this clone
   std::vector<TinyVector<int, OHMMS_DIM>> UseTwists;
   std::vector<int> IncludeTwists, DistinctTwists;
   /// if false, splines are conceptually complex valued
@@ -229,15 +210,14 @@ class EinsplineSetBuilder : public SPOSetBuilder
   // should be used to generate two distinct orbitals from the real and
   // imaginary parts.
   std::vector<bool> MakeTwoCopies;
-  inline bool TwistPair(PosType a, PosType b);
   // This maps a 3-integer twist index into the twist number in the file
   std::map<TinyVector<int, OHMMS_DIM>, int, Int3less> TwistMap;
+
+  bool TwistPair(PosType a, PosType b) const;
   void TileIons();
   void OccupyBands(int spin, int sortBands, int numOrbs, bool skipChecks = false);
   void OccupyBands_ESHDF(int spin, int sortBands, int numOrbs);
 
-  void CopyBands(int numOrbs);
-
   ////////////////////////////////
   // Atomic orbital information //
   ////////////////////////////////
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 054e27d949..101b587317 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -51,7 +51,6 @@ EinsplineSetBuilder::EinsplineSetBuilder(ParticleSet& p, const PSetMap& psets, C
       MeshFactor(1.0),
       MeshSize(0, 0, 0),
       twist_num_(-1),
-      TileFactor(1, 1, 1),
       LastSpinSet(-1),
       NumOrbitalsRead(-1),
       makeRotations(false)
@@ -319,7 +318,7 @@ void EinsplineSetBuilder::TileIons()
 }
 
 
-bool EinsplineSetBuilder::TwistPair(PosType a, PosType b)
+bool EinsplineSetBuilder::TwistPair(PosType a, PosType b) const
 {
   bool pair = true;
   for (int n = 0; n < OHMMS_DIM; n++)
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index 5f855d6257..0c8d9095a7 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -85,8 +85,6 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
   HaveOrbDerivs = have_dpsi;
   app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists
             << std::endl;
-  if (TileFactor[0] != 1 || TileFactor[1] != 1 || TileFactor[2] != 1)
-    app_log() << "  Using a " << TileFactor[0] << "x" << TileFactor[1] << "x" << TileFactor[2] << " tiling factor.\n";
   //////////////////////////////////
   // Read ion types and locations //
   //////////////////////////////////
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
index 0ba6be3cb0..7d10e81eab 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
@@ -103,8 +103,6 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
   H5File.read(NumTwists, parameterGroup + "/num_twists");
   app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists
             << std::endl;
-  if (TileFactor[0] != 1 || TileFactor[1] != 1 || TileFactor[2] != 1)
-    app_log() << "  Using a " << TileFactor[0] << "x" << TileFactor[1] << "x" << TileFactor[2] << " tiling factor.\n";
 
   //////////////////////////////////
   // Read ion types and locations //
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
index e610b6bac8..9c02d343a9 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilder_createSPOs.cpp
@@ -58,12 +58,11 @@ void EinsplineSetBuilder::set_metadata(int numOrbs,
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
       matrixNotSet = matrixNotSet && (TileMatrix(i, j) == 0);
-  // then set the matrix to what may have been specified in the
-  // tiling vector
+  // then set the matrix to identity.
   if (matrixNotSet)
     for (int i = 0; i < 3; i++)
       for (int j = 0; j < 3; j++)
-        TileMatrix(i, j) = (i == j) ? TileFactor[i] : 0;
+        TileMatrix(i, j) = (i == j) ? 1 : 0;
   if (myComm->rank() == 0)
   {
     std::array<char, 1000> buff;
@@ -130,9 +129,10 @@ std::unique_ptr<SPOSet> EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur)
   ScopedTimer spo_timer_scope(createGlobalTimer("einspline::CreateSPOSetFromXML", timer_level_medium));
 
   {
+    TinyVector<int, OHMMS_DIM> TileFactor_do_not_use;
     OhmmsAttributeSet a;
     a.add(H5FileName, "href");
-    a.add(TileFactor, "tile");
+    a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED);
     a.add(sortBands, "sort");
     a.add(TileMatrix, "tilematrix");
     a.add(twist_num_inp, "twistnum");

From f4361925c5db3879e380c4ad427e188c7b704264 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 23:34:53 -0500
Subject: [PATCH 06/65] cut down EinsplineSetBuilder::ReadOrbitalInfo

---
 .../EinsplineSetBuilderOld.cpp                | 121 +-----------------
 1 file changed, 6 insertions(+), 115 deletions(-)

diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
index 7d10e81eab..2e84a56383 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderOld.cpp
@@ -33,10 +33,10 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
 {
   if (!H5File.open(H5FileName, H5F_ACC_RDONLY))
   {
-    app_error() << "Could not open HDF5 file \"" << H5FileName
-                << "\" in EinsplineSetBuilder::ReadOrbitalInfo.  Aborting.\n";
-    APP_ABORT("EinsplineSetBuilder::ReadOrbitalInfo");
+    app_error() << "Could not open HDF5 file \"" << H5FileName << "\" in EinsplineSetBuilder::ReadOrbitalInfo.\n";
+    return false;
   }
+
   // Read format
   std::string format;
   H5File.read(format, "/format");
@@ -47,119 +47,10 @@ bool EinsplineSetBuilder::ReadOrbitalInfo(bool skipChecks)
     Format = ESHDF;
     return ReadOrbitalInfo_ESHDF(skipChecks);
   }
-  //////////////////////////////////////////////////
-  // Read basic parameters from the orbital file. //
-  //////////////////////////////////////////////////
-  // Check the version
-  if (Version[0] == 0 && Version[1] == 11)
-  {
-    parameterGroup   = "/parameters_0";
-    ionsGroup        = "/ions_2";
-    eigenstatesGroup = "/eigenstates_3";
-  }
-  else if (Version[0] == 0 && Version[1] == 20)
-  {
-    parameterGroup   = "/parameters";
-    ionsGroup        = "/ions";
-    eigenstatesGroup = "/eigenstates";
-  }
-  else
-  {
-    std::ostringstream o;
-    o << "Unknown HDF5 orbital file version " << Version[0] << "." << Version[1] << "." << Version[2] << "\n";
-    APP_ABORT(o.str());
-  }
-  H5File.read(Lattice, parameterGroup + "/lattice");
-  H5File.read(RecipLattice, parameterGroup + "/reciprocal_lattice");
-  SuperLattice = dot(TileMatrix, Lattice);
-  std::array<char, 1000> buff;
-  int length = std::snprintf(buff.data(), buff.size(),
-                             "  Lattice = \n    [ %8.5f %8.5f %8.5f\n"
-                             "      %8.5f %8.5f %8.5f\n"
-                             "      %8.5f %8.5f %8.5f ]\n",
-                             Lattice(0, 0), Lattice(0, 1), Lattice(0, 2), Lattice(1, 0), Lattice(1, 1), Lattice(1, 2),
-                             Lattice(2, 0), Lattice(2, 1), Lattice(2, 2));
-  if (length < 0)
-    throw std::runtime_error("Error generating Lattice string");
-  app_log() << std::string_view(buff.data(), length);
-  length =
-      std::snprintf(buff.data(), buff.size(),
-                    "  SuperLattice = \n    [ %13.12f %13.12f %13.12f\n"
-                    "      %13.12f %13.12f %13.12f\n"
-                    "      %13.12f %13.12f %13.12f ]\n",
-                    SuperLattice(0, 0), SuperLattice(0, 1), SuperLattice(0, 2), SuperLattice(1, 0), SuperLattice(1, 1),
-                    SuperLattice(1, 2), SuperLattice(2, 0), SuperLattice(2, 1), SuperLattice(2, 2));
-  if (length < 0)
-    throw std::runtime_error("Error generating SuperLattice string");
-  if (!CheckLattice())
-    APP_ABORT("CheckLattice failed");
-  app_log() << std::string_view(buff.data(), length);
-  for (int i = 0; i < 3; i++)
-    for (int j = 0; j < 3; j++)
-      LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI);
-  H5File.read(NumBands, parameterGroup + "/num_bands");
-  H5File.read(NumElectrons, parameterGroup + "/num_electrons");
-  H5File.read(NumSpins, parameterGroup + "/num_spins");
-  H5File.read(NumTwists, parameterGroup + "/num_twists");
-  app_log() << "bands=" << NumBands << ", elecs=" << NumElectrons << ", spins=" << NumSpins << ", twists=" << NumTwists
-            << std::endl;
 
-  //////////////////////////////////
-  // Read ion types and locations //
-  //////////////////////////////////
-  H5File.read(IonTypes, ionsGroup + "/atom_types");
-  H5File.read(IonPos, ionsGroup + "/pos");
-  ///////////////////////////
-  // Read the twist angles //
-  ///////////////////////////
-  primcell_kpoints.resize(NumTwists);
-  for (int ti = 0; ti < NumTwists; ti++)
-  {
-    std::ostringstream path;
-    if ((Version[0] == 0 && Version[1] == 11) || NumTwists > 1)
-      path << eigenstatesGroup << "/twist_" << ti << "/twist_angle";
-    else
-      path << eigenstatesGroup << "/twist/twist_angle";
-    TinyVector<double, OHMMS_DIM> primcell_kpoints_DP;
-    H5File.read(primcell_kpoints_DP, path.str());
-    primcell_kpoints[ti] = primcell_kpoints_DP;
-    int length      = std::snprintf(buff.data(), buff.size(), "  Found twist angle (%6.3f, %6.3f, %6.3f)\n",
-                               primcell_kpoints[ti][0], primcell_kpoints[ti][1], primcell_kpoints[ti][2]);
-    if (length < 0)
-      throw std::runtime_error("Error converting twist angle to string");
-    app_log() << std::string_view(buff.data(), length);
-  }
-  //////////////////////////////////////////////////////////
-  // If the density has not been set in TargetPtcl, and   //
-  // the density is available, read it in and save it     //
-  // in TargetPtcl.                                       //
-  //////////////////////////////////////////////////////////
-  if (TargetPtcl.Density_G.empty())
-  {
-    Array<double, OHMMS_DIM> Density_r_DP;
-    H5File.read(TargetPtcl.DensityReducedGvecs, "/density/reduced_gvecs");
-    H5File.read(Density_r_DP, "/density/rho_r");
-    TargetPtcl.Density_r = Density_r_DP;
-    int numG             = TargetPtcl.DensityReducedGvecs.size();
-    // Convert primitive G-vectors to supercell G-vectors
-    for (int iG = 0; iG < numG; iG++)
-      TargetPtcl.DensityReducedGvecs[iG] = dot(TileMatrix, TargetPtcl.DensityReducedGvecs[iG]);
-    app_log() << "  Read " << numG << " density G-vectors.\n";
-    if (TargetPtcl.DensityReducedGvecs.size())
-    {
-      app_log() << "  EinsplineSetBuilder found density in the HDF5 file.\n";
-      std::vector<std::complex<double>> Density_G_DP;
-      H5File.read(Density_G_DP, "/density/rho_G");
-      TargetPtcl.Density_G.assign(Density_G_DP.begin(), Density_G_DP.end());
-      if (!TargetPtcl.Density_G.size())
-      {
-        app_error() << "  Density reduced G-vectors defined, but not the"
-                    << " density.\n";
-        abort();
-      }
-    }
-  }
-  return true;
+  app_error()
+      << "EinsplineSetBuilder::ReadOrbitalInfo too old h5 file which is not in ESHDF format! Regenerate the h5 file";
+  return false;
 }
 
 } // namespace qmcplusplus

From 8de1b42b4a646a61a3d04983ae7d8b829938d548 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 27 Jul 2023 23:35:41 -0500
Subject: [PATCH 07/65] Formatting.

---
 src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp    | 4 ++--
 src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
index 101b587317..04f2492455 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderCommon.cpp
@@ -123,8 +123,8 @@ void EinsplineSetBuilder::BroadcastOrbitalInfo()
 {
   if (myComm->size() == 1)
     return;
-  int numIons           = IonTypes.size();
-  int numDensityGvecs   = TargetPtcl.DensityReducedGvecs.size();
+  int numIons         = IonTypes.size();
+  int numDensityGvecs = TargetPtcl.DensityReducedGvecs.size();
   PooledData<double> abuffer;
   PooledData<int> aibuffer;
   aibuffer.add(Version.begin(), Version.end()); //myComm->bcast(Version);
diff --git a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
index 0c8d9095a7..82c4507df6 100644
--- a/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
+++ b/src/QMCWaveFunctions/EinsplineSetBuilderESHDF.fft.cpp
@@ -75,9 +75,9 @@ bool EinsplineSetBuilder::ReadOrbitalInfo_ESHDF(bool skipChecks)
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
       LatticeInv(i, j) = RecipLattice(i, j) / (2.0 * M_PI);
-  int have_dpsi         = false;
+  int have_dpsi = false;
   NumTwists = NumSpins = NumBands = 0;
-  NumElectrons = TargetPtcl.getTotalNum();
+  NumElectrons                    = TargetPtcl.getTotalNum();
   H5File.read(NumBands, "/electrons/kpoint_0/spin_0/number_of_states");
   H5File.readEntry(NumSpins, "/electrons/number_of_spins");
   H5File.read(NumTwists, "/electrons/number_of_kpoints");
@@ -411,7 +411,8 @@ void EinsplineSetBuilder::OccupyBands_ESHDF(int spin, int sortBands, int numOrbs
   }
   if (occ_format == "energy")
   {
-    app_log() << "  Occupying bands based on energy in mode " << (Occ.size() > 0? "\"excited\"" : "\"ground\"") << std::endl;
+    app_log() << "  Occupying bands based on energy in mode " << (Occ.size() > 0 ? "\"excited\"" : "\"ground\"")
+              << std::endl;
     // To get the occupations right.
     std::vector<int> Removed(0, 0);
     std::vector<int> Added(0, 0);

From 6910bd7c92f953b8a665732fe28424e8e4f470a8 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Fri, 28 Jul 2023 13:38:21 -0500
Subject: [PATCH 08/65] Fix complex build.

---
 src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
index c028ccc630..a1a1b1b046 100644
--- a/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
+++ b/src/QMCWaveFunctions/EinsplineSpinorSetBuilder.cpp
@@ -55,8 +55,9 @@ std::unique_ptr<SPOSet> EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt
 
   {
     OhmmsAttributeSet a;
+    TinyVector<int, OHMMS_DIM> TileFactor_do_not_use;
     a.add(H5FileName, "href");
-    a.add(TileFactor, "tile");
+    a.add(TileFactor_do_not_use, "tile", {}, TagStatus::DELETED);
     a.add(sortBands, "sort");
     a.add(TileMatrix, "tilematrix");
     a.add(twist_num_inp, "twistnum");

From 4cf0bbce218dd9863f4895b93f0a24b58b0751ee Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Mon, 31 Jul 2023 17:12:27 -0500
Subject: [PATCH 09/65] Add tests for orbital rotation for Be

Test case is a Be atom with an STO basis set.

Python scripts compute reference values for
 1. single determinant
 2. multi-determinant code path with one determinant
 3. multi-determinant code path with two determinants
---
 src/QMCWaveFunctions/tests/CMakeLists.txt     |   5 +-
 src/QMCWaveFunctions/tests/construct_rot.py   | 103 +++++++
 src/QMCWaveFunctions/tests/read_qmcpack.py    |  30 +-
 .../tests/rot_Be_STO.wfnoj.xml                |  51 ++++
 src/QMCWaveFunctions/tests/rot_be_sto_wf.py   | 181 ++++++++++++
 .../tests/rot_multi_1det_Be_STO.wfnoj.xml     |  52 ++++
 .../tests/rot_multi_2det_Be_STO.wfnoj.xml     |  54 ++++
 .../tests/rot_multi_be_sto_wf.py              | 194 +++++++++++++
 src/QMCWaveFunctions/tests/slater_orbitals.py |  69 +++++
 .../tests/test_RotatedSPOs_LCAO.cpp           | 265 ++++++++++++++++++
 10 files changed, 998 insertions(+), 6 deletions(-)
 create mode 100644 src/QMCWaveFunctions/tests/construct_rot.py
 create mode 100644 src/QMCWaveFunctions/tests/rot_Be_STO.wfnoj.xml
 create mode 100644 src/QMCWaveFunctions/tests/rot_be_sto_wf.py
 create mode 100644 src/QMCWaveFunctions/tests/rot_multi_1det_Be_STO.wfnoj.xml
 create mode 100644 src/QMCWaveFunctions/tests/rot_multi_2det_Be_STO.wfnoj.xml
 create mode 100644 src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
 create mode 100644 src/QMCWaveFunctions/tests/slater_orbitals.py

diff --git a/src/QMCWaveFunctions/tests/CMakeLists.txt b/src/QMCWaveFunctions/tests/CMakeLists.txt
index de46dd437b..ec066f8735 100644
--- a/src/QMCWaveFunctions/tests/CMakeLists.txt
+++ b/src/QMCWaveFunctions/tests/CMakeLists.txt
@@ -71,7 +71,10 @@ set(FILES_TO_COPY
     C_diamond-twist-third.structure.xml
     C_diamond-twist-third.wfj.xml
     cartesian_order.wfnoj.xml
-    dirac_order.wfnoj.xml)
+    dirac_order.wfnoj.xml
+    rot_Be_STO.wfnoj.xml
+    rot_multi_1det_Be_STO.wfnoj.xml
+    rot_multi_2det_Be_STO.wfnoj.xml)
 
 foreach(fname ${FILES_TO_COPY})
   execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${fname}" ${UTEST_DIR})
diff --git a/src/QMCWaveFunctions/tests/construct_rot.py b/src/QMCWaveFunctions/tests/construct_rot.py
new file mode 100644
index 0000000000..f805561944
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/construct_rot.py
@@ -0,0 +1,103 @@
+
+# Generate a constructor for a skew symmetric matrix from a list of parameters
+# Output is Python code to be used in further scripts
+
+# Autodiff expects matrices to be constructed and not updated.  This makes it
+# difficult to use autodiff with programmatically constructed rotation matrices.
+
+import numpy as np
+
+
+# Rotation indices for a ground state
+# This should produce the same list of indices as
+# QMCWaveFunctions/RotatedSPOs.cpp::createRotationIndices
+def get_rotation_indices(nel, nmo):
+    rot_ind = list()
+    for i in range(nel):
+        for j in range(nel,nmo):
+            rot_ind.append( (i,j) )
+
+    return rot_ind
+
+# Rotation indices including one excited state.
+# This implementation is specific to the first excited state being
+# included in one of the determinants.
+# Should produce the same list of indices as
+# QMCWaveFunctions/Fermion/MultiDiracDeterminant.cpp::buildOptVariables
+# for this problem.
+def get_rotation_indices_ex(nel, nmo):
+    rot_ind = list()
+    for i in range(nel+1):
+        for j in range(i+1,nmo):
+            rot_ind.append( (i,j) )
+
+    return rot_ind
+
+# Full rotation indices (for global rotation)
+# This should produce the same list of indices as
+# QMCWaveFunctions/RotatedSPOs.cpp::createRotationIndicesFull
+def get_full_rotation_indices(nel, nmo):
+    rot_ind = list()
+    for i in range(nel):
+        for j in range(nel,nmo):
+            rot_ind.append( (i,j) )
+
+    for i in range(nel):
+        for j in range(i+1,nel):
+            rot_ind.append( (i,j) )
+
+    for i in range(nel,nmo):
+        for j in range(i+1,nmo):
+            rot_ind.append( (i,j) )
+
+    return rot_ind
+
+
+# Populate a skew symmetric matrix with corresponding indices
+def construct_antisym(nmo,rot_ind):
+   rot = np.zeros((nmo,nmo),dtype=np.int64)
+   for idx in range(len(rot_ind)):
+        p,q = rot_ind[idx]
+        rot[q,p] = idx+1
+        rot[p,q] = -(idx+1)
+
+   return rot
+
+# Convert the output of construct_antisym to a numpy array constructor
+def print_anti(rot_mat):
+    print("np.array([")
+    for i in range(rot_mat.shape[0]):
+        print("[ ",end='')
+        for j in range(rot_mat.shape[1]):
+            idx = np.abs(rot_mat[i,j])
+            sign = ""
+            if rot_mat[i,j] < 0:
+                sign = "-"
+            if idx == 0:
+                print(" 0",end='')
+            else:
+                print(" {}p[{}]".format(sign,idx-1),end='')
+            if j != rot_mat.shape[1]-1:
+                print(",",end='')
+        print("]",end='')
+        if i != rot_mat.shape[0]-1:
+            print(",")
+    print("])")
+
+
+if __name__ == "__main__"
+    # For the Be problem
+    nel = 2
+    nmo = 7
+
+    # For rot_be_sto_wf.py
+    #rot_ind = get_rotation_indices(nel,nmo)
+
+    # For rot_multi_be_sto_wf.py
+    rot_ind = get_rotation_indices_ex(nel, nmo)
+
+    #rot_ind = get_full_rotation_indices(nel,nmo)
+    rot_mat = construct_antisym(nmo, rot_ind)
+    print_anti(rot_mat)
+
+
diff --git a/src/QMCWaveFunctions/tests/read_qmcpack.py b/src/QMCWaveFunctions/tests/read_qmcpack.py
index fbd5db1375..f54f296375 100644
--- a/src/QMCWaveFunctions/tests/read_qmcpack.py
+++ b/src/QMCWaveFunctions/tests/read_qmcpack.py
@@ -5,6 +5,7 @@
 from collections import namedtuple, defaultdict
 import math
 import gaussian_orbitals
+import slater_orbitals
 import numpy as np
 
 
@@ -25,24 +26,40 @@ def read_basis_groups(atomic_basis_set):
     basis_groups =  atomic_basis_set.findall('basisGroup')
     #print basis_groups
     basis_set = []
+    is_gaussian = False
+    is_slater = False
     for basis_group in basis_groups:
-        if basis_group.attrib['type'] != 'Gaussian':
-            print('Expecting Gaussian type basisGroup')
+        if basis_group.attrib['type'] == 'Gaussian':
+            is_gaussian = True
+        if basis_group.attrib['type'] == 'Slater':
+            is_slater = True
         #print basis_group.attrib['n']
-        n = int(basis_group.attrib['n'])
+        if is_gaussian:
+            n = int(basis_group.attrib['n'])
         #print basis_group.attrib['l']
         ang_mom_l = int(basis_group.attrib['l'])
         #print basis_group.attrib['type']
         zeta_list = []
         coef_list = []
+        n_list = []
         radfuncs = basis_group.findall('radfunc')
         for radfunc in radfuncs:
             zeta = float(radfunc.attrib['exponent'])
-            contraction_coef =  float(radfunc.attrib['contraction'])
+            if is_slater:
+                n = int(radfunc.attrib['n'])
+            contraction_coef = 1.0
+            if "contraction" in radfunc.attrib:
+                contraction_coef =  float(radfunc.attrib['contraction'])
+
+            if is_slater:
+                n_list.append(n)
             zeta_list.append(zeta)
             coef_list.append(contraction_coef)
 
-        cg = gaussian_orbitals.CG_basis(ang_mom_l, len(zeta_list), zeta_list, coef_list)
+        if is_gaussian:
+            cg = gaussian_orbitals.CG_basis(ang_mom_l, len(zeta_list), zeta_list, coef_list)
+        if is_slater:
+            cg = slater_orbitals.CG_basis(ang_mom_l, len(zeta_list), n_list, zeta_list, coef_list)
         basis_set.append(cg)
 
     return basis_set
@@ -68,6 +85,9 @@ def parse_qmc_wf(fname, element_list):
 
     #  Just use the first one for now - assume up and down MO's are the same
     MO_coeff_node = tree.find('.//determinant/coefficient')
+    if MO_coeff_node is None:
+        MO_coeff_node = tree.find('.//sposet/coefficient')
+
     MO_matrix = None
     if MO_coeff_node is None:
         print('Molecular orbital coefficients not found')
diff --git a/src/QMCWaveFunctions/tests/rot_Be_STO.wfnoj.xml b/src/QMCWaveFunctions/tests/rot_Be_STO.wfnoj.xml
new file mode 100644
index 0000000000..8fec7e7a45
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/rot_Be_STO.wfnoj.xml
@@ -0,0 +1,51 @@
+<qmcsystem>
+  <wavefunction name="psi0" target="e">
+    <!-- From Bunge, Barrientos, and Bunge, Atomic Data and Nuclear Data Tables 53, 113-162(1993) -->
+    <sposet_collection type="MolecularOrbital">
+      <basisset keyword="STO" transform="no">
+        <atomicBasisSet elementType="Be" normalized="no" type="STO">
+          <basisGroup l="0" m="0" rid="C0" type="Slater">
+             <radfunc n="1" exponent= "5.7531" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C1" type="Slater">
+             <radfunc n="1" exponent= "3.7156" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C2" type="Slater">
+             <radfunc n="3" exponent= "9.9670" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C3" type="Slater">
+             <radfunc n="3" exponent= "3.7128" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C4" type="Slater">
+             <radfunc n="2" exponent= "4.4661" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C5" type="Slater">
+             <radfunc n="2" exponent= "1.2919" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C6" type="Slater">
+             <radfunc n="2" exponent= "0.8555" />
+          </basisGroup>
+        </atomicBasisSet>
+      </basisset>
+      <rotated_sposet name="rot-spo-up" method="global">
+      <sposet basisset="LCAOBSet" name="spo-up">
+          <coefficient id="updetC" size="7" type="Array">
+-0.3203716272  -0.3404403028  -0.09736649354 -0.4377945266   0.02422331125 -0.7356371679   0.1963907521 
+ 0.6670416431   0.05589542384 -0.008824049451 0.008662699137 0.6847608591  -0.2754997442   0.08355057464 
+-0.07564611156 -0.02925862449  0.9855001816  -0.1207470799   0.08664604529 -0.01033460848 -0.004097813485 
+ 0.1073578588   0.7549795709  -0.0300084091  -0.6086077017  -0.1715415418  -0.06645371134 -0.1154716512 
+ 0.6371603738  -0.4486557886   0.04882715285 -0.2885587972  -0.5478377162   0.06267642488 -0.05504428678 
+-0.1551054775  -0.2897122992  -0.1210613174  -0.5804289448   0.4111220408   0.6019922521   0.09507611705 
+ 0.07168243573  0.1577993958   0.03592848464  0.05580124477 -0.1563146302   0.1099633686   0.963862095 
+          </coefficient>
+      </sposet>
+    </rotated_sposet>
+    </sposet_collection>
+    <determinantset key="STO" source="ion0" transform="no" type="MO">
+      <slaterdeterminant batch="no">
+        <determinant id="rot-spo-up"/>
+        <determinant id="rot-spo-up"/>
+      </slaterdeterminant>
+    </determinantset>
+ </wavefunction>
+</qmcsystem>
diff --git a/src/QMCWaveFunctions/tests/rot_be_sto_wf.py b/src/QMCWaveFunctions/tests/rot_be_sto_wf.py
new file mode 100644
index 0000000000..7fdcc70ad5
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/rot_be_sto_wf.py
@@ -0,0 +1,181 @@
+import autograd.numpy as np
+from autograd import hessian, grad
+from run_qmc import run_qmc
+import read_qmcpack
+from slater_orbitals import STO
+
+import scipy.linalg
+
+# From construct_rot.py
+def construct_antisym(p):
+    return np.array(
+        [
+            [0, 0, -p[0], -p[1], -p[2], -p[3], -p[4]],
+            [0, 0, -p[5], -p[6], -p[7], -p[8], -p[9]],
+            [p[0], p[5], 0, 0, 0, 0, 0],
+            [p[1], p[6], 0, 0, 0, 0, 0],
+            [p[2], p[7], 0, 0, 0, 0, 0],
+            [p[3], p[8], 0, 0, 0, 0, 0],
+            [p[4], p[9], 0, 0, 0, 0, 0],
+        ]
+    )
+
+
+def det2(phi):
+    return phi[0, 0] * phi[1, 1] - phi[1, 0] * phi[0, 1]
+
+
+def mat_exp(m):
+    # Simple approximation good enough for derivatives at zero rotation
+    # Might only need to go up to the linear term
+    return np.eye(m.shape[0]) + m + np.dot(m, m) / 2
+
+
+class Wavefunction_Be_STO:
+    def __init__(self, basis, mo_coeff):
+        self.sto = STO()
+        self.sto.set_basis(basis)
+        self.mo_coeff = mo_coeff
+        self.hess = list()
+        for i in range(4):
+            self.hess.append(hessian(self.psi_internal, i))
+
+        self.nmo = 7
+
+        self.param_size = 10
+
+        self.dpsi = grad(self.psi, 1)
+
+        self.dlocal_energy = grad(self.local_energy, 1)
+
+    def mag(self, r):
+        return np.sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2])
+
+    def psi_internal(self, r1, r2, r3, r4, VP):
+
+        param = VP[0 : self.param_size]
+        rot = construct_antisym(param)
+        # Could use this if not doing autodiff
+        # rot_mat = scipy.linalg.expm(-rot)
+        rot_mat = mat_exp(-rot)
+        rot_mo = np.dot(rot_mat, self.mo_coeff)
+
+        mo_size = self.mo_coeff.shape[0]
+        phi0_1 = [self.sto.eval_v2(i, r1) for i in range(mo_size)]
+        phi0_2 = [self.sto.eval_v2(i, r2) for i in range(mo_size)]
+        phi0_a = np.array([phi0_1, phi0_2])
+        phi0 = np.dot(rot_mo, phi0_a.T)
+
+        phi1_1 = [self.sto.eval_v2(i, r3) for i in range(mo_size)]
+        phi1_2 = [self.sto.eval_v2(i, r4) for i in range(mo_size)]
+        phi1_a = np.array([phi1_1, phi1_2])
+        phi1 = np.dot(rot_mo, phi1_a.T)
+
+        d1 = det2(phi0)
+        d2 = det2(phi1)
+        return d1 * d2
+
+    def psi(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        return self.psi_internal(r1, r2, r3, r4, VP)
+
+    def dpsi(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        return self.dpsi_internal(r1, r2, r3, r4, VP)
+
+    def en_pot(self, r):
+        Z = 4.0
+        total = 0.0
+        for i in range(r.shape[0]):
+            r_mag = self.mag(r[i, :])
+            total += -Z / r_mag
+        return total
+
+    def ee_pot(self, r):
+        total = 0.0
+        for i in range(r.shape[0]):
+            for j in range(i):
+                rij = r[j, :] - r[i, :]
+                rij_mag = self.mag(rij)
+                total += 1.0 / rij_mag
+        return total
+
+    def lap(self, r1, r2, r3, r4, VP):
+        h = 0.0
+        for i in range(4):
+            h += np.sum(np.diag(self.hess[i](r1, r2, r3, r4, VP)))
+        return h
+
+    def local_energy(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        pot = self.en_pot(r) + self.ee_pot(r)
+        psi_val = self.psi_internal(r1, r2, r3, r4, VP)
+        lapl = self.lap(r1, r2, r3, r4, VP)
+        h = -0.5 * lapl / psi_val + pot
+        return h
+
+    def dlocal_energy(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        pot = self.en_pot(r) + self.ee_pot(r)
+        psi_val = self.psi_internal(r1, r2, r3, r4, VP)
+        lapl = self.lap(r1, r2, r3, r4, VP)
+        h = -0.5 * lapl / psi_val + pot
+        return h
+
+# Create reference values for
+# "Rotated LCAO Be single determinant" and "Rotated LCAO Be multi determinant with one determinant"
+# in test_RotatedSPOs_LCAO.cpp
+def generate_point_values():
+    fname = "rot_Be_STO.wfnoj.xml"
+    basis, mo = read_qmcpack.parse_qmc_wf(fname, ["Be"])
+    wf = Wavefunction_Be_STO(basis["Be"], mo)
+
+    r = np.array([[0.7, 2.0, 3.0], [1.2, 1.5, 0.5], [1.5, 1.6, 1.5], [0.7, 1.0, 1.2]])
+
+    VP = np.zeros(wf.param_size)
+    p = wf.psi(r, VP)
+    print("psi = ", p)
+    print("log psi = ", np.log(abs(p)))
+
+    dp = wf.dpsi(r, VP)
+    print("dpsi = ", dp)
+
+    print("dlogpsi = ", dp / p)
+    dlogpsi = dp / p
+    for i in range(dlogpsi.shape[0]):
+        print("    CHECK(dlogpsi[{}] == ValueApprox({}));".format(i, dlogpsi[i]))
+
+    en = wf.local_energy(r, VP)
+    print("en = ", en)
+
+    den = wf.dlocal_energy(r, VP)
+    print("den = ", den)
+
+    for i in range(den.shape[0]):
+        print("    CHECK(dhpsioverpsi[{}] == ValueApprox({}));".format(i, den[i]))
+
+
+def run():
+    fname = "rot_Be_STO.wfnoj.xml"
+    basis, mo = read_qmcpack.parse_qmc_wf(fname, ["Be"])
+    wf = Wavefunction_Be_STO(basis["Be"], mo)
+    VP = np.zeros(wf.param_size)
+    r = np.array([[0.7, 2.0, 3.0], [1.2, 1.5, 0.5], [1.5, 1.6, 1.5], [0.7, 1.0, 1.2]])
+    run_qmc(r, wf, VP, nstep=100, nblock=10)
+
+
+if __name__ == "__main__":
+    generate_point_values()
+    #run()
diff --git a/src/QMCWaveFunctions/tests/rot_multi_1det_Be_STO.wfnoj.xml b/src/QMCWaveFunctions/tests/rot_multi_1det_Be_STO.wfnoj.xml
new file mode 100644
index 0000000000..43e6f8ac9c
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/rot_multi_1det_Be_STO.wfnoj.xml
@@ -0,0 +1,52 @@
+<qmcsystem>
+  <wavefunction name="psi0" target="e">
+    <!-- From Bunge, Barrientos, and Bunge, Atomic Data and Nuclear Data Tables 53, 113-162(1993) -->
+    <sposet_collection type="MolecularOrbital">
+      <basisset keyword="STO" transform="no">
+        <atomicBasisSet elementType="Be" normalized="no" type="STO">
+          <basisGroup l="0" m="0" rid="C0" type="Slater">
+             <radfunc n="1" exponent= "5.7531" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C1" type="Slater">
+             <radfunc n="1" exponent= "3.7156" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C2" type="Slater">
+             <radfunc n="3" exponent= "9.9670" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C3" type="Slater">
+             <radfunc n="3" exponent= "3.7128" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C4" type="Slater">
+             <radfunc n="2" exponent= "4.4661" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C5" type="Slater">
+             <radfunc n="2" exponent= "1.2919" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C6" type="Slater">
+             <radfunc n="2" exponent= "0.8555" />
+          </basisGroup>
+        </atomicBasisSet>
+      </basisset>
+      <rotated_sposet name="rot-spo-up" method="global">
+      <sposet basisset="LCAOBSet" name="spo-up">
+          <coefficient id="updetC" size="7" type="Array">
+-0.3203716272  -0.3404403028  -0.09736649354 -0.4377945266   0.02422331125 -0.7356371679   0.1963907521 
+ 0.6670416431   0.05589542384 -0.008824049451 0.008662699137 0.6847608591  -0.2754997442   0.08355057464 
+-0.07564611156 -0.02925862449  0.9855001816  -0.1207470799   0.08664604529 -0.01033460848 -0.004097813485 
+ 0.1073578588   0.7549795709  -0.0300084091  -0.6086077017  -0.1715415418  -0.06645371134 -0.1154716512 
+ 0.6371603738  -0.4486557886   0.04882715285 -0.2885587972  -0.5478377162   0.06267642488 -0.05504428678 
+-0.1551054775  -0.2897122992  -0.1210613174  -0.5804289448   0.4111220408   0.6019922521   0.09507611705 
+ 0.07168243573  0.1577993958   0.03592848464  0.05580124477 -0.1563146302   0.1099633686   0.963862095 
+          </coefficient>
+      </sposet>
+    </rotated_sposet>
+    </sposet_collection>
+    <determinantset>
+      <multideterminant optimize="no" spo_0="rot-spo-up" spo_1="rot-spo-up" algorithm="precomputed_table_method">
+        <detlist size="1" type="DETS" nc0="0" ne0="2" nc1="0" ne1="2" nstates="4" cutoff="1e-20">
+          <ci id="CI0" coeff="1.0" occ0="1100" occ1="1100"/>
+        </detlist>
+       </multideterminant>
+    </determinantset>
+ </wavefunction>
+</qmcsystem>
diff --git a/src/QMCWaveFunctions/tests/rot_multi_2det_Be_STO.wfnoj.xml b/src/QMCWaveFunctions/tests/rot_multi_2det_Be_STO.wfnoj.xml
new file mode 100644
index 0000000000..a131605524
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/rot_multi_2det_Be_STO.wfnoj.xml
@@ -0,0 +1,54 @@
+<qmcsystem>
+  <wavefunction name="psi0" target="e">
+    <!-- From Bunge, Barrientos, and Bunge, Atomic Data and Nuclear Data Tables 53, 113-162(1993) -->
+    <sposet_collection type="MolecularOrbital">
+      <basisset keyword="STO" transform="no">
+        <atomicBasisSet elementType="Be" normalized="no" type="STO">
+          <basisGroup l="0" m="0" rid="C0" type="Slater">
+             <radfunc n="1" exponent= "5.7531" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C1" type="Slater">
+             <radfunc n="1" exponent= "3.7156" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C2" type="Slater">
+             <radfunc n="3" exponent= "9.9670" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C3" type="Slater">
+             <radfunc n="3" exponent= "3.7128" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C4" type="Slater">
+             <radfunc n="2" exponent= "4.4661" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C5" type="Slater">
+             <radfunc n="2" exponent= "1.2919" />
+          </basisGroup>
+          <basisGroup l="0" m="0" rid="C6" type="Slater">
+             <radfunc n="2" exponent= "0.8555" />
+          </basisGroup>
+        </atomicBasisSet>
+      </basisset>
+      <rotated_sposet name="rot-spo-up" method="global">
+      <sposet basisset="LCAOBSet" name="spo-up">
+          <coefficient id="updetC" size="7" type="Array">
+-0.3203716272  -0.3404403028  -0.09736649354 -0.4377945266   0.02422331125 -0.7356371679   0.1963907521 
+ 0.6670416431   0.05589542384 -0.008824049451 0.008662699137 0.6847608591  -0.2754997442   0.08355057464 
+-0.07564611156 -0.02925862449  0.9855001816  -0.1207470799   0.08664604529 -0.01033460848 -0.004097813485 
+ 0.1073578588   0.7549795709  -0.0300084091  -0.6086077017  -0.1715415418  -0.06645371134 -0.1154716512 
+ 0.6371603738  -0.4486557886   0.04882715285 -0.2885587972  -0.5478377162   0.06267642488 -0.05504428678 
+-0.1551054775  -0.2897122992  -0.1210613174  -0.5804289448   0.4111220408   0.6019922521   0.09507611705 
+ 0.07168243573  0.1577993958   0.03592848464  0.05580124477 -0.1563146302   0.1099633686   0.963862095 
+          </coefficient>
+      </sposet>
+    </rotated_sposet>
+    </sposet_collection>
+    <determinantset>
+      <multideterminant optimize="yes" spo_0="rot-spo-up" spo_1="rot-spo-up" algorithm="precomputed_table_method">
+        <detlist size="2" type="DETS" nc0="0" ne0="2" nc1="0" ne1="2" nstates="4" cutoff="1e-20">
+          <ci id="CI0" coeff="1.0" occ0="1100" occ1="1100"/>
+          <ci id="CI1" coeff="0.1" occ0="1010" occ1="1010"/>
+        </detlist>
+       </multideterminant>
+    </determinantset>
+
+ </wavefunction>
+</qmcsystem>
diff --git a/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py b/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
new file mode 100644
index 0000000000..fb8ba622b1
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
@@ -0,0 +1,194 @@
+import autograd.numpy as np
+from autograd import hessian, grad
+
+from run_qmc import run_qmc
+import read_qmcpack
+from slater_orbitals import STO
+
+import scipy.linalg
+
+# From construct_rot.py
+def construct_antisym_ex(p):
+    return np.array(
+        [
+            [0, -p[0], -p[1], -p[2], -p[3], -p[4], -p[5]],
+            [p[0], 0, -p[6], -p[7], -p[8], -p[9], -p[10]],
+            [p[1], p[6], 0, -p[11], -p[12], -p[13], -p[14]],
+            [p[2], p[7], p[11], 0, 0, 0, 0],
+            [p[3], p[8], p[12], 0, 0, 0, 0],
+            [p[4], p[9], p[13], 0, 0, 0, 0],
+            [p[5], p[10], p[14], 0, 0, 0, 0],
+        ]
+    )
+
+
+# 2x2 determinant between two states
+def det2_ex(phi, i, j):
+    return phi[i, 0] * phi[j, 1] - phi[j, 0] * phi[i, 1]
+
+
+def mat_exp(m):
+    # Simple approximation good enough for derivatives at zero rotation
+    # Might only need to go up to the linear term
+    return np.eye(m.shape[0]) + m + np.dot(m, m) / 2
+
+
+class Wavefunction_Be_STO:
+    def __init__(self, basis, mo_coeff):
+        self.sto = STO()
+        self.sto.set_basis(basis)
+        self.mo_coeff = mo_coeff
+        self.hess = list()
+        for i in range(4):
+            self.hess.append(hessian(self.psi_internal, i))
+
+        self.nmo = 7
+
+        self.rot_param_size = 15  # size of p in construct_antisym_ex
+
+        self.dpsi = grad(self.psi, 1)
+
+        self.dlocal_energy = grad(self.local_energy, 1)
+
+    def mag(self, r):
+        return np.sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2])
+
+    def psi_internal(self, r1, r2, r3, r4, VP):
+
+        param = VP[1 : self.rot_param_size + 1]
+
+        rot = construct_antisym_ex(param)
+        # Can use this line if not doing autodiff
+        # rot_mat = scipy.linalg.expm(-rot)
+        rot_mat = mat_exp(-rot)
+        rot_mo = np.dot(rot_mat, self.mo_coeff)
+
+        mo_size = self.mo_coeff.shape[0]
+        phi0_1 = [self.sto.eval_v2(i, r1) for i in range(mo_size)]
+        phi0_2 = [self.sto.eval_v2(i, r2) for i in range(mo_size)]
+        phi0_a = np.array([phi0_1, phi0_2])
+        phi0 = np.dot(rot_mo, phi0_a.T)
+
+        phi1_1 = [self.sto.eval_v2(i, r3) for i in range(mo_size)]
+        phi1_2 = [self.sto.eval_v2(i, r4) for i in range(mo_size)]
+        phi1_a = np.array([phi1_1, phi1_2])
+        phi1 = np.dot(rot_mo, phi1_a.T)
+
+        d1 = det2_ex(phi0, 0, 1)
+        d2 = det2_ex(phi1, 0, 1)
+        c1 = 1.0
+
+        d3 = det2_ex(phi0, 0, 2)
+        d4 = det2_ex(phi1, 0, 2)
+        c2 = VP[0]
+
+        return c1 * d1 * d2 + c2 * d3 * d4
+
+    def psi(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        return self.psi_internal(r1, r2, r3, r4, VP)
+
+    def dpsi(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        return self.dpsi_internal(r1, r2, r3, r4, VP)
+
+    def en_pot(self, r):
+        Z = 4.0
+        total = 0.0
+        for i in range(r.shape[0]):
+            r_mag = self.mag(r[i, :])
+            total += -Z / r_mag
+        return total
+
+    def ee_pot(self, r):
+        total = 0.0
+        for i in range(r.shape[0]):
+            for j in range(i):
+                rij = r[j, :] - r[i, :]
+                rij_mag = self.mag(rij)
+                total += 1.0 / rij_mag
+        return total
+
+    def lap(self, r1, r2, r3, r4, VP):
+        h = 0.0
+        for i in range(4):
+            h += np.sum(np.diag(self.hess[i](r1, r2, r3, r4, VP)))
+        return h
+
+    def local_energy(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        pot = self.en_pot(r) + self.ee_pot(r)
+        psi_val = self.psi_internal(r1, r2, r3, r4, VP)
+        lapl = self.lap(r1, r2, r3, r4, VP)
+        h = -0.5 * lapl / psi_val + pot
+        return h
+
+    def dlocal_energy(self, r, VP):
+        r1 = r[0, :]
+        r2 = r[1, :]
+        r3 = r[2, :]
+        r4 = r[3, :]
+        pot = self.en_pot(r) + self.ee_pot(r)
+        psi_val = self.psi_internal(r1, r2, r3, r4, VP)
+        lapl = self.lap(r1, r2, r3, r4, VP)
+        h = -0.5 * lapl / psi_val + pot
+        return h
+
+
+# Create reference values for
+# "Rotated LCAO Be two determinant" in test_RotatedSPOs_LCAO.cpp
+def gen_point_derivatives():
+    # only uses the basis set
+    fname = "rot_multi_2det_Be_STO.wfnoj.xml"
+    basis, mo = read_qmcpack.parse_qmc_wf(fname, ["Be"])
+    wf = Wavefunction_Be_STO(basis["Be"], mo)
+
+    r = np.array([[0.7, 2.0, 3.0], [1.2, 1.5, 0.5], [1.5, 1.6, 1.5], [0.7, 1.0, 1.2]])
+
+    VP = np.zeros(wf.rot_param_size + 1)
+    VP[0] = 0.1
+    p = wf.psi(r, VP)
+    print("psi = ", p)
+    print("log psi = ", np.log(abs(p)))
+
+    dp = wf.dpsi(r, VP)
+    print("dpsi = ", dp)
+
+    print("dlogpsi = ", dp / p)
+    dlogpsi = dp / p
+    for i in range(dlogpsi.shape[0]):
+        print("    CHECK(dlogpsi[{}] == ValueApprox({}));".format(i, dlogpsi[i]))
+
+    en = wf.local_energy(r, VP)
+    print("en = ", en)
+
+    den = wf.dlocal_energy(r, VP)
+    print("den = ", den)
+
+    for i in range(den.shape[0]):
+        print("   CHECK(dhpsioverpsi[{}] == ValueApprox({}));".format(i, den[i]))
+
+
+def run():
+    fname = "rot_multi_2det_Be_STO.wfnoj.xml"
+    basis, mo = read_qmcpack.parse_qmc_wf(fname, ["Be"])
+    wf = Wavefunction_Be_STO(basis["Be"], mo)
+
+    r = np.array([[0.7, 2.0, 3.0], [1.2, 1.5, 0.5], [1.5, 1.6, 1.5], [0.7, 1.0, 1.2]])
+    VP = np.zeros(wf.rot_param_size + 1)
+    VP[0] = 0.1
+    run_qmc(r, wf, VP, nstep=10, nblock=10)
+
+
+if __name__ == "__main__":
+    gen_point_derivatives()
+    #run()
diff --git a/src/QMCWaveFunctions/tests/slater_orbitals.py b/src/QMCWaveFunctions/tests/slater_orbitals.py
new file mode 100644
index 0000000000..cf4a46c461
--- /dev/null
+++ b/src/QMCWaveFunctions/tests/slater_orbitals.py
@@ -0,0 +1,69 @@
+from sympy import *
+from sympy.utilities.lambdify import lambdastr
+from collections import namedtuple, defaultdict
+
+# import numpy as np
+from autograd import grad
+import autograd.numpy as np
+import math
+
+
+# n, zeta, and contraction_coeff are lists of size nbasis
+CG_basis = namedtuple(
+    "CG_basis", ["orbtype", "nbasis", "n", "zeta", "contraction_coeff"]
+)
+
+
+class STO:
+    def __init__(self):
+        x, y, z = symbols("x y z", real=True)
+        zeta = Symbol("zeta", positive=True, real=True)
+        r = Symbol("r", real=True, nonnegative=True)
+        N = Symbol("N")
+        self.N = N
+        n = Symbol("n", integer=True, positive=True)
+        norm = (2 * zeta) ** n * sqrt(2 * zeta / factorial(2 * n))
+
+        sto_sym_raw = N * r ** (n - 1) * exp(-zeta * r)
+
+        stosym = sto_sym_raw.subs(N, norm)
+
+        nmax = 3
+        self.sto = dict()
+        self.sto_dr = dict()
+        for nval in range(1, nmax + 1):
+            subs_list = {n: nval}
+            csto = stosym.subs(subs_list).evalf()
+
+            sto_str = lambdastr((r, zeta), csto).replace("math", "np")
+            self.sto[nval] = eval(sto_str)
+            # print('s',sto_str)
+
+    def set_basis(self, basis):
+        self.basis = basis
+
+    def eval_v(self, x, y, z):
+        r = np.sqrt(x * x + y * y + z * z)
+        ang_norm = 1 / np.sqrt(4 * np.pi)
+        v = 0.0
+        for basis in self.basis:
+            for i in range(basis.nbasis):
+                v += (
+                    ang_norm
+                    * basis.contraction_coeff[i]
+                    * self.sto[basis.n[i]](r, basis.zeta[i])
+                )
+        return v
+
+    def eval_v2(self, basis_idx, r):
+        r = np.sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2])
+        ang_norm = 1 / np.sqrt(4 * np.pi)
+        v = 0.0
+        basis = self.basis[basis_idx]
+        for i in range(basis.nbasis):
+            v += (
+                ang_norm
+                * basis.contraction_coeff[i]
+                * self.sto[basis.n[i]](r, basis.zeta[i])
+            )
+        return v
diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs_LCAO.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs_LCAO.cpp
index c9909c17e9..05c436933a 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOs_LCAO.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOs_LCAO.cpp
@@ -74,6 +74,56 @@ void setupParticleSetPool(ParticleSetPool& pp)
   pp.put(part_elec);
 }
 
+// Set particles for Be atom
+void setupParticleSetPoolBe(ParticleSetPool& pp)
+{
+  // See ParticleIO/tests/test_xml_io.cpp for particle parsing
+  const char* particles = R"(<qmcsystem>
+  <particleset name="ion0" size="1">
+    <group name="Be">
+      <parameter name="charge">4</parameter>
+      <parameter name="valence">4</parameter>
+      <parameter name="atomicnumber">4</parameter>
+    </group>
+    <attrib name="position" datatype="posArray">
+  0.0000000000e+00  0.0000000000e+00  0.0000000000e+00
+</attrib>
+    <attrib name="ionid" datatype="stringArray">
+ Be
+</attrib>
+  </particleset>
+
+  <particleset name="e" random="no">
+    <group name="u" size="2">
+      <parameter name="charge">-1</parameter>
+    <attrib name="position" datatype="posArray">
+      0.7 2.0 3.0
+      1.2 1.5 0.5
+    </attrib>
+    </group>
+    <group name="d" size="2">
+      <parameter name="charge">-1</parameter>
+    <attrib name="position" datatype="posArray">
+      1.5 1.6 1.5
+      0.7 1.0 1.2
+    </attrib>
+    </group>
+  </particleset>
+</qmcsystem>)";
+
+  Libxml2Document doc;
+
+  bool okay = doc.parseFromString(particles);
+  REQUIRE(okay);
+
+  xmlNodePtr root = doc.getRoot();
+
+  xmlNodePtr part_ion = xmlFirstElementChild(root);
+  pp.put(part_ion);
+  xmlNodePtr part_elec = xmlNextElementSibling(part_ion);
+  pp.put(part_elec);
+}
+
 std::string setupRotationXML(const std::string& rot_angle_up,
                              const std::string& rot_angle_down,
                              const std::string& coeff_up,
@@ -690,4 +740,219 @@ TEST_CASE("Rotated LCAO rotation consistency", "[qmcapp]")
     CHECK(new_params2[i] == Approx(expected_param[i]));
 }
 
+// Reference values from rot_be_sto_wf.py
+// Uses single determinant code path
+TEST_CASE("Rotated LCAO Be single determinant", "[qmcapp]")
+{
+  ProjectData test_project("test", ProjectData::DriverVersion::BATCH);
+  Communicate* c;
+  c = OHMMS::Controller;
+
+  ParticleSetPool pp(c);
+  setupParticleSetPoolBe(pp);
+
+  WaveFunctionPool wp(test_project.getRuntimeOptions(), pp, c);
+
+  REQUIRE(wp.empty() == true);
+
+  Libxml2Document doc;
+  bool okay = doc.parse("rot_Be_STO.wfnoj.xml");
+  REQUIRE(okay);
+  xmlNodePtr root = doc.getRoot();
+
+  wp.put(xmlFirstElementChild(root));
+
+
+  TrialWaveFunction* psi = wp.getWaveFunction("psi0");
+  REQUIRE(psi != nullptr);
+  REQUIRE(psi->getOrbitals().size() == 1);
+
+  opt_variables_type opt_vars;
+  psi->checkInVariables(opt_vars);
+  opt_vars.resetIndex();
+  psi->checkOutVariables(opt_vars);
+  psi->resetParameters(opt_vars);
+
+  ParticleSet* elec = pp.getParticleSet("e");
+  elec->update();
+
+
+  double logval = psi->evaluateLog(*elec);
+  CHECK(logval == Approx(-17.768474132175342));
+
+  using ValueType = QMCTraits::ValueType;
+  Vector<ValueType> dlogpsi(10);
+  Vector<ValueType> dhpsioverpsi(10);
+  psi->evaluateDerivatives(*elec, opt_vars, dlogpsi, dhpsioverpsi);
+
+  CHECK(dlogpsi[0] == ValueApprox(0.24797938203759148));
+  CHECK(dlogpsi[1] == ValueApprox(0.41454059122930453));
+  CHECK(dlogpsi[2] == ValueApprox(0.7539626161586822));
+  CHECK(dlogpsi[3] == ValueApprox(3.13489394217799));
+  CHECK(dlogpsi[4] == ValueApprox(8.47176722646749));
+  CHECK(dlogpsi[5] == ValueApprox(-0.48182453464906033));
+  CHECK(dlogpsi[6] == ValueApprox(2.269206401396164));
+  CHECK(dlogpsi[7] == ValueApprox(-1.883221269688377));
+  CHECK(dlogpsi[8] == ValueApprox(-19.450964163527598));
+  CHECK(dlogpsi[9] == ValueApprox(-47.28198556252034));
+
+  CHECK(dhpsioverpsi[0] == ValueApprox(0.3662586398420111));
+  CHECK(dhpsioverpsi[1] == ValueApprox(-5.544323554018982));
+  CHECK(dhpsioverpsi[2] == ValueApprox(-0.7790656028274846));
+  CHECK(dhpsioverpsi[3] == ValueApprox(24.930187483208087));
+  CHECK(dhpsioverpsi[4] == ValueApprox(71.30301022344871));
+  CHECK(dhpsioverpsi[5] == ValueApprox(-1.1614358798793771));
+  CHECK(dhpsioverpsi[6] == ValueApprox(17.678711245652913));
+  CHECK(dhpsioverpsi[7] == ValueApprox(2.491238469662668));
+  CHECK(dhpsioverpsi[8] == ValueApprox(-79.37464297365679));
+  CHECK(dhpsioverpsi[9] == ValueApprox(-227.0976672502695));
+}
+
+// Reference values from rot_be_sto_wf.py
+// Uses multi-determinant code path with one determinant
+TEST_CASE("Rotated LCAO Be multi determinant with one determinant", "[qmcapp]")
+{
+  ProjectData test_project("test", ProjectData::DriverVersion::BATCH);
+  Communicate* c;
+  c = OHMMS::Controller;
+
+  ParticleSetPool pp(c);
+  setupParticleSetPoolBe(pp);
+
+  WaveFunctionPool wp(test_project.getRuntimeOptions(), pp, c);
+
+  REQUIRE(wp.empty() == true);
+
+  Libxml2Document doc;
+  bool okay = doc.parse("rot_multi_1det_Be_STO.wfnoj.xml");
+  REQUIRE(okay);
+  xmlNodePtr root = doc.getRoot();
+
+  wp.put(xmlFirstElementChild(root));
+
+  TrialWaveFunction* psi = wp.getWaveFunction("psi0");
+  REQUIRE(psi != nullptr);
+  REQUIRE(psi->getOrbitals().size() == 1);
+
+  opt_variables_type opt_vars;
+  psi->checkInVariables(opt_vars);
+  opt_vars.resetIndex();
+  psi->checkOutVariables(opt_vars);
+  psi->resetParameters(opt_vars);
+
+  ParticleSet* elec = pp.getParticleSet("e");
+  elec->update();
+
+
+  double logval = psi->evaluateLog(*elec);
+  CHECK(logval == Approx(-17.768474132175342));
+
+  using ValueType = QMCTraits::ValueType;
+  Vector<ValueType> dlogpsi(10);
+  Vector<ValueType> dhpsioverpsi(10);
+  psi->evaluateDerivatives(*elec, opt_vars, dlogpsi, dhpsioverpsi);
+
+  CHECK(dlogpsi[0] == ValueApprox(0.24797938203759148));
+  CHECK(dlogpsi[1] == ValueApprox(0.41454059122930453));
+  CHECK(dlogpsi[2] == ValueApprox(0.7539626161586822));
+  CHECK(dlogpsi[3] == ValueApprox(3.13489394217799));
+  CHECK(dlogpsi[4] == ValueApprox(8.47176722646749));
+  CHECK(dlogpsi[5] == ValueApprox(-0.48182453464906033));
+  CHECK(dlogpsi[6] == ValueApprox(2.269206401396164));
+  CHECK(dlogpsi[7] == ValueApprox(-1.883221269688377));
+  CHECK(dlogpsi[8] == ValueApprox(-19.450964163527598));
+  CHECK(dlogpsi[9] == ValueApprox(-47.28198556252034));
+
+  CHECK(dhpsioverpsi[0] == ValueApprox(0.3662586398420111));
+  CHECK(dhpsioverpsi[1] == ValueApprox(-5.544323554018982));
+  CHECK(dhpsioverpsi[2] == ValueApprox(-0.7790656028274846));
+  CHECK(dhpsioverpsi[3] == ValueApprox(24.930187483208087));
+  CHECK(dhpsioverpsi[4] == ValueApprox(71.30301022344871));
+  CHECK(dhpsioverpsi[5] == ValueApprox(-1.1614358798793771));
+  CHECK(dhpsioverpsi[6] == ValueApprox(17.678711245652913));
+  CHECK(dhpsioverpsi[7] == ValueApprox(2.491238469662668));
+  CHECK(dhpsioverpsi[8] == ValueApprox(-79.37464297365679));
+  CHECK(dhpsioverpsi[9] == ValueApprox(-227.0976672502695));
+}
+
+// Reference values from rot_multi_be_sto_wf.py
+// Uses multi-determinant code path with two determinants
+TEST_CASE("Rotated LCAO Be two determinant", "[qmcapp]")
+{
+  ProjectData test_project("test", ProjectData::DriverVersion::BATCH);
+  Communicate* c;
+  c = OHMMS::Controller;
+
+  ParticleSetPool pp(c);
+  setupParticleSetPoolBe(pp);
+
+  WaveFunctionPool wp(test_project.getRuntimeOptions(), pp, c);
+
+  REQUIRE(wp.empty() == true);
+
+  Libxml2Document doc;
+  bool okay = doc.parse("rot_multi_2det_Be_STO.wfnoj.xml");
+  REQUIRE(okay);
+  xmlNodePtr root = doc.getRoot();
+
+  wp.put(xmlFirstElementChild(root));
+
+  TrialWaveFunction* psi = wp.getWaveFunction("psi0");
+  REQUIRE(psi != nullptr);
+  REQUIRE(psi->getOrbitals().size() == 1);
+
+  opt_variables_type opt_vars;
+  psi->checkInVariables(opt_vars);
+  opt_vars.resetIndex();
+  psi->checkOutVariables(opt_vars);
+  psi->resetParameters(opt_vars);
+
+  ParticleSet* elec = pp.getParticleSet("e");
+  elec->update();
+
+
+  double logval = psi->evaluateLog(*elec);
+  CHECK(logval == Approx(-17.762687110866413));
+
+  using ValueType = QMCTraits::ValueType;
+  Vector<ValueType> dlogpsi(16);
+  Vector<ValueType> dhpsioverpsi(16);
+  psi->evaluateDerivatives(*elec, opt_vars, dlogpsi, dhpsioverpsi);
+
+  CHECK(dlogpsi[0] == ValueApprox(0.05770308755290168));
+  CHECK(dlogpsi[1] == ValueApprox(0.00593995768443123));
+  CHECK(dlogpsi[2] == ValueApprox(0.24654846443828843));
+  CHECK(dlogpsi[3] == ValueApprox(0.4214539468865001));
+  CHECK(dlogpsi[4] == ValueApprox(0.7484015451192123));
+  CHECK(dlogpsi[5] == ValueApprox(3.076586144487743));
+  CHECK(dlogpsi[6] == ValueApprox(8.329621106110908));
+  CHECK(dlogpsi[7] == ValueApprox(-0.4311398324864351));
+  CHECK(dlogpsi[8] == ValueApprox(2.2561123798306273));
+  CHECK(dlogpsi[9] == ValueApprox(-1.8723545015077454));
+  CHECK(dlogpsi[10] == ValueApprox(-19.33872609471596));
+  CHECK(dlogpsi[11] == ValueApprox(-47.00915390726143));
+  CHECK(dlogpsi[12] == ValueApprox(-0.05463186141658209));
+  CHECK(dlogpsi[13] == ValueApprox(0.045055811131004785));
+  CHECK(dlogpsi[14] == ValueApprox(0.46675941272234));
+  CHECK(dlogpsi[15] == ValueApprox(1.1352711502777513));
+
+
+  CHECK(dhpsioverpsi[0] == ValueApprox(0.2761674423047662));
+  CHECK(dhpsioverpsi[1] == ValueApprox(0.022999975062422046));
+  CHECK(dhpsioverpsi[2] == ValueApprox(0.3572968312376671));
+  CHECK(dhpsioverpsi[3] == ValueApprox(-5.459873357259045));
+  CHECK(dhpsioverpsi[4] == ValueApprox(-0.792225084691375));
+  CHECK(dhpsioverpsi[5] == ValueApprox(24.453138754349123));
+  CHECK(dhpsioverpsi[6] == ValueApprox(70.0280297306038));
+  CHECK(dhpsioverpsi[7] == ValueApprox(-1.0272848501840672));
+  CHECK(dhpsioverpsi[8] == ValueApprox(17.514031530576368));
+  CHECK(dhpsioverpsi[9] == ValueApprox(2.52887169464403));
+  CHECK(dhpsioverpsi[10] == ValueApprox(-78.37945447401765));
+  CHECK(dhpsioverpsi[11] == ValueApprox(-224.4814690906403));
+  CHECK(dhpsioverpsi[12] == ValueApprox(-0.6346957697642424));
+  CHECK(dhpsioverpsi[13] == ValueApprox(0.03270289146243591));
+  CHECK(dhpsioverpsi[14] == ValueApprox(3.263830358386392));
+  CHECK(dhpsioverpsi[15] == ValueApprox(8.944714289946793));
+}
+
 } // namespace qmcplusplus

From 2998b67d173254b21897f83618541690d75f7cc0 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 3 Aug 2023 18:11:59 -0500
Subject: [PATCH 10/65] Add SYCL memory printing.

---
 src/Platforms/MemoryUsage.cpp      | 10 ++++++++++
 src/Platforms/SYCL/SYCLruntime.cpp |  4 ++++
 src/Platforms/SYCL/SYCLruntime.hpp |  3 +++
 3 files changed, 17 insertions(+)

diff --git a/src/Platforms/MemoryUsage.cpp b/src/Platforms/MemoryUsage.cpp
index 7635015efb..756693fb89 100644
--- a/src/Platforms/MemoryUsage.cpp
+++ b/src/Platforms/MemoryUsage.cpp
@@ -20,6 +20,10 @@
 #include "CUDA/CUDAallocator.hpp"
 #include "CUDA/CUDAruntime.hpp"
 #endif
+#ifdef ENABLE_SYCL
+#include "SYCL/SYCLallocator.hpp"
+#include "SYCL/SYCLruntime.hpp"
+#endif
 
 namespace qmcplusplus
 {
@@ -40,6 +44,12 @@ void print_mem(const std::string& title, std::ostream& log)
   log << "Free memory available on default device    : " << std::setw(7) << (getCUDAdeviceFreeMem() >> 20) << " MiB"
       << std::endl;
 #endif
+#ifdef ENABLE_SYCL
+  log << "Device memory allocated via SYCL allocator : " << std::setw(7) << (getSYCLdeviceMemAllocated() >> 20)
+      << " MiB" << std::endl;
+  log << "Free memory available on default device    : " << std::setw(7) << (getSYCLdeviceFreeMem() >> 20) << " MiB"
+      << std::endl;
+#endif
 #ifdef ENABLE_OFFLOAD
   log << "Device memory allocated via OpenMP offload : " << std::setw(7) << (getOMPdeviceMemAllocated() >> 20) << " MiB"
       << std::endl;
diff --git a/src/Platforms/SYCL/SYCLruntime.cpp b/src/Platforms/SYCL/SYCLruntime.cpp
index aae8d4c990..535969d6c0 100644
--- a/src/Platforms/SYCL/SYCLruntime.cpp
+++ b/src/Platforms/SYCL/SYCLruntime.cpp
@@ -16,4 +16,8 @@
 namespace qmcplusplus
 {
 sycl::queue& getSYCLDefaultDeviceDefaultQueue() { return SYCLDeviceManager::getDefaultDeviceDefaultQueue(); }
+size_t getSYCLdeviceFreeMem()
+{
+  return getSYCLDefaultDeviceDefaultQueue().get_device().get_info<sycl::info::device::global_mem_size>();
+}
 } // namespace qmcplusplus
diff --git a/src/Platforms/SYCL/SYCLruntime.hpp b/src/Platforms/SYCL/SYCLruntime.hpp
index 46b29fa0d8..b6e6a575dd 100644
--- a/src/Platforms/SYCL/SYCLruntime.hpp
+++ b/src/Platforms/SYCL/SYCLruntime.hpp
@@ -17,6 +17,9 @@
 namespace qmcplusplus
 {
 sycl::queue& getSYCLDefaultDeviceDefaultQueue();
+
+size_t getSYCLdeviceFreeMem();
+
 } // namespace qmcplusplus
 
 #endif

From f2a3fb87175eb43bc713a72016df7827b9444bde Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Fri, 4 Aug 2023 10:04:22 -0500
Subject: [PATCH 11/65] Guard batch size computation against zero

Situation can arise when walkers_per_rank is not commensurate
with the number of processors and there are zero walkers on some
ranks.

Fixes #4690
---
 src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp      | 8 ++++++--
 src/QMCDrivers/tests/test_QMCCostFunctionBatched.cpp | 5 +++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
index f3a00c656c..a4fb4b03bf 100644
--- a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
+++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
@@ -198,9 +198,13 @@ void QMCCostFunctionBatched::getConfigurations(const std::string& aroot)
 //                             if the number of samples is not a multiple of the batch size
 void compute_batch_parameters(int sample_size, int batch_size, int& num_batches, int& final_batch_size)
 {
-  num_batches      = sample_size / batch_size;
+  if (batch_size == 0)
+    num_batches = 0;
+  else
+    num_batches = sample_size / batch_size;
+
   final_batch_size = batch_size;
-  if (sample_size % batch_size != 0)
+  if (batch_size != 0 && sample_size % batch_size != 0)
   {
     num_batches += 1;
     final_batch_size = sample_size % batch_size;
diff --git a/src/QMCDrivers/tests/test_QMCCostFunctionBatched.cpp b/src/QMCDrivers/tests/test_QMCCostFunctionBatched.cpp
index db31f83756..65864e5f43 100644
--- a/src/QMCDrivers/tests/test_QMCCostFunctionBatched.cpp
+++ b/src/QMCDrivers/tests/test_QMCCostFunctionBatched.cpp
@@ -40,6 +40,11 @@ TEST_CASE("compute_batch_parameters", "[drivers]")
   compute_batch_parameters(sample_size, batch_size, num_batches, final_batch_size);
   CHECK(num_batches == 3);
   CHECK(final_batch_size == 3);
+
+  batch_size = 0;
+  compute_batch_parameters(sample_size, batch_size, num_batches, final_batch_size);
+  CHECK(num_batches == 0);
+  CHECK(final_batch_size == 0);
 }
 
 namespace testing

From eefc5eec7a932034cccb8a3db2b8a6e54a366fbb Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Fri, 4 Aug 2023 15:31:47 -0500
Subject: [PATCH 12/65] Add comments to top of some Python files

---
 src/QMCWaveFunctions/tests/rot_be_sto_wf.py       | 4 ++++
 src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py | 4 ++++
 src/QMCWaveFunctions/tests/slater_orbitals.py     | 3 +++
 3 files changed, 11 insertions(+)

diff --git a/src/QMCWaveFunctions/tests/rot_be_sto_wf.py b/src/QMCWaveFunctions/tests/rot_be_sto_wf.py
index 7fdcc70ad5..ee6ffe25f4 100644
--- a/src/QMCWaveFunctions/tests/rot_be_sto_wf.py
+++ b/src/QMCWaveFunctions/tests/rot_be_sto_wf.py
@@ -1,3 +1,7 @@
+
+# Compute wavefunction values and parameter derivatives
+# for a wavefunction with STO Be orbitals, single determinant, and orbital rotation
+
 import autograd.numpy as np
 from autograd import hessian, grad
 from run_qmc import run_qmc
diff --git a/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py b/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
index fb8ba622b1..1256f4a982 100644
--- a/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
+++ b/src/QMCWaveFunctions/tests/rot_multi_be_sto_wf.py
@@ -1,3 +1,7 @@
+
+# Compute wavefunction values and parameter derivatives
+# for a wavefunction with STO Be orbitals, two determinants, and orbital rotation
+
 import autograd.numpy as np
 from autograd import hessian, grad
 
diff --git a/src/QMCWaveFunctions/tests/slater_orbitals.py b/src/QMCWaveFunctions/tests/slater_orbitals.py
index cf4a46c461..40e7cc7864 100644
--- a/src/QMCWaveFunctions/tests/slater_orbitals.py
+++ b/src/QMCWaveFunctions/tests/slater_orbitals.py
@@ -1,3 +1,6 @@
+
+# Evaluate STO's starting from a symbolic representation
+
 from sympy import *
 from sympy.utilities.lambdify import lambdastr
 from collections import namedtuple, defaultdict

From 0ff5bf90294e61ec3b6621537418c8fe339b6355 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Fri, 4 Aug 2023 17:29:49 +0000
Subject: [PATCH 13/65] Split CUDADeviceManager.h and OMPDeviceManager.h into
 cpp files.

---
 src/Platforms/CUDA/CMakeLists.txt            |  4 +-
 src/Platforms/CUDA/CUDADeviceManager.cpp     | 50 ++++++++++++++++++++
 src/Platforms/CUDA/CUDADeviceManager.h       | 34 +------------
 src/Platforms/OMPTarget/CMakeLists.txt       |  5 +-
 src/Platforms/OMPTarget/OMPDeviceManager.cpp | 44 +++++++++++++++++
 src/Platforms/OMPTarget/OMPDeviceManager.h   | 29 +-----------
 6 files changed, 103 insertions(+), 63 deletions(-)
 create mode 100644 src/Platforms/CUDA/CUDADeviceManager.cpp
 create mode 100644 src/Platforms/OMPTarget/OMPDeviceManager.cpp

diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt
index 7199c49e07..a37b0ecc9e 100644
--- a/src/Platforms/CUDA/CMakeLists.txt
+++ b/src/Platforms/CUDA/CMakeLists.txt
@@ -9,12 +9,14 @@
 #// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 #//////////////////////////////////////////////////////////////////////////////////////
 
-set(CUDA_RT_SRCS CUDAfill.cpp CUDAallocator.cpp CUDAruntime.cpp)
+set(CUDA_RT_SRCS CUDAfill.cpp CUDAallocator.cpp CUDAruntime.cpp CUDADeviceManager.cpp)
 set(CUDA_LA_SRCS cuBLAS_missing_functions.cu)
 
 add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
 add_library(platform_cuda_LA ${CUDA_LA_SRCS})
 
+target_link_libraries(platform_cuda_runtime PRIVATE platform_host_runtime)
+
 if(NOT QMC_CUDA2HIP)
   target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
   target_link_libraries(platform_cuda_LA PUBLIC CUDA::cublas CUDA::cusolver)
diff --git a/src/Platforms/CUDA/CUDADeviceManager.cpp b/src/Platforms/CUDA/CUDADeviceManager.cpp
new file mode 100644
index 0000000000..50dbd50dd1
--- /dev/null
+++ b/src/Platforms/CUDA/CUDADeviceManager.cpp
@@ -0,0 +1,50 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "CUDADeviceManager.h"
+#include <stdexcept>
+#include "CUDAruntime.hpp"
+#include "OutputManager.h"
+#include "determineDefaultDeviceNum.h"
+
+namespace qmcplusplus
+{
+CUDADeviceManager::CUDADeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size)
+    : cuda_default_device_num(-1), cuda_device_count(0)
+{
+  cudaErrorCheck(cudaGetDeviceCount(&cuda_device_count), "cudaGetDeviceCount failed!");
+  if (num_devices == 0)
+    num_devices = cuda_device_count;
+  else if (num_devices != cuda_device_count)
+    throw std::runtime_error("Inconsistent number of CUDA devices with the previous record!");
+  if (cuda_device_count > local_size)
+    app_warning() << "More CUDA devices than the number of MPI ranks. "
+                  << "Some devices will be left idle.\n"
+                  << "There is potential performance issue with the GPU affinity. "
+                  << "Use CUDA_VISIBLE_DEVICE or MPI launcher to expose desired devices.\n";
+  if (num_devices > 0)
+  {
+    cuda_default_device_num = determineDefaultDeviceNum(cuda_device_count, local_rank, local_size);
+    if (default_device_num < 0)
+      default_device_num = cuda_default_device_num;
+    else if (default_device_num != cuda_default_device_num)
+      throw std::runtime_error("Inconsistent assigned CUDA devices with the previous record!");
+
+#pragma omp parallel
+    {
+      cudaErrorCheck(cudaSetDevice(cuda_default_device_num), "cudaSetDevice failed!");
+      cudaErrorCheck(cudaFree(0), "cudaFree failed!");
+    }
+  }
+}
+} // namespace qmcplusplus
diff --git a/src/Platforms/CUDA/CUDADeviceManager.h b/src/Platforms/CUDA/CUDADeviceManager.h
index 003e0c0326..6acb97436e 100644
--- a/src/Platforms/CUDA/CUDADeviceManager.h
+++ b/src/Platforms/CUDA/CUDADeviceManager.h
@@ -14,11 +14,6 @@
 #ifndef QMCPLUSPLUS_CUDADEVICEMANAGER_H
 #define QMCPLUSPLUS_CUDADEVICEMANAGER_H
 
-#include <stdexcept>
-#include "CUDAruntime.hpp"
-#include "Host/OutputManager.h"
-#include "determineDefaultDeviceNum.h"
-
 namespace qmcplusplus
 {
 
@@ -30,34 +25,7 @@ class CUDADeviceManager
   int cuda_device_count;
 
 public:
-  CUDADeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size)
-      : cuda_default_device_num(-1), cuda_device_count(0)
-  {
-    cudaErrorCheck(cudaGetDeviceCount(&cuda_device_count), "cudaGetDeviceCount failed!");
-    if (num_devices == 0)
-      num_devices = cuda_device_count;
-    else if (num_devices != cuda_device_count)
-      throw std::runtime_error("Inconsistent number of CUDA devices with the previous record!");
-    if (cuda_device_count > local_size)
-      app_warning() << "More CUDA devices than the number of MPI ranks. "
-                    << "Some devices will be left idle.\n"
-                    << "There is potential performance issue with the GPU affinity. "
-                    << "Use CUDA_VISIBLE_DEVICE or MPI launcher to expose desired devices.\n";
-    if (num_devices > 0)
-    {
-      cuda_default_device_num = determineDefaultDeviceNum(cuda_device_count, local_rank, local_size);
-      if (default_device_num < 0)
-        default_device_num = cuda_default_device_num;
-      else if (default_device_num != cuda_default_device_num)
-        throw std::runtime_error("Inconsistent assigned CUDA devices with the previous record!");
-
-#pragma omp parallel
-      {
-        cudaErrorCheck(cudaSetDevice(cuda_default_device_num), "cudaSetDevice failed!");
-        cudaErrorCheck(cudaFree(0), "cudaFree failed!");
-      }
-    }
-  }
+  CUDADeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size);
 };
 } // namespace qmcplusplus
 
diff --git a/src/Platforms/OMPTarget/CMakeLists.txt b/src/Platforms/OMPTarget/CMakeLists.txt
index 55f187599a..bf7b56bb25 100644
--- a/src/Platforms/OMPTarget/CMakeLists.txt
+++ b/src/Platforms/OMPTarget/CMakeLists.txt
@@ -2,14 +2,14 @@
 #// This file is distributed under the University of Illinois/NCSA Open Source License.
 #// See LICENSE file in top directory for details.
 #//
-#// Copyright (c) 2021 QMCPACK developers.
+#// Copyright (c) 2023 QMCPACK developers.
 #//
 #// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 #//
 #// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 #//////////////////////////////////////////////////////////////////////////////////////
 
-set(OMP_RT_SRCS OMPallocator.cpp)
+set(OMP_RT_SRCS OMPallocator.cpp OMPDeviceManager.cpp)
 set(OMP_LA_SRCS ompBLAS.cpp)
 
 add_library(platform_omptarget_runtime ${OMP_RT_SRCS})
@@ -18,6 +18,7 @@ if(USE_OBJECT_TARGET)
 else()
   add_library(platform_omptarget_LA ${OMP_LA_SRCS})
 endif()
+target_link_libraries(platform_omptarget_runtime PRIVATE platform_host_runtime)
 target_link_libraries(platform_omptarget_LA PUBLIC platform_omptarget_runtime)
 
 target_compile_options(platform_omptarget_LA PRIVATE "$<$<BOOL:${ENABLE_OFFLOAD_CLANG_DEBUG_O3}>:$<$<CONFIG:DEBUG>:-O3>>")
diff --git a/src/Platforms/OMPTarget/OMPDeviceManager.cpp b/src/Platforms/OMPTarget/OMPDeviceManager.cpp
new file mode 100644
index 0000000000..c6b5701d83
--- /dev/null
+++ b/src/Platforms/OMPTarget/OMPDeviceManager.cpp
@@ -0,0 +1,44 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 QMCPACK developers.
+//
+// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
+//
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "OMPDeviceManager.h"
+#include <stdexcept>
+#include <omp.h>
+#include "OutputManager.h"
+#include "determineDefaultDeviceNum.h"
+
+namespace qmcplusplus
+{
+
+OMPDeviceManager::OMPDeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size)
+    : omp_default_device_num(-1), omp_device_count(omp_get_num_devices())
+{
+  if (num_devices == 0)
+    num_devices = omp_device_count;
+  else if (num_devices != omp_device_count)
+    throw std::runtime_error("Inconsistent number of OpenMP devices with the previous record!");
+  if (omp_device_count > local_size)
+    app_warning() << "More OpenMP devices than the number of MPI ranks. "
+                  << "Some devices will be left idle.\n"
+                  << "There is potential performance issue with the GPU affinity.\n";
+  if (num_devices > 0)
+  {
+    omp_default_device_num = determineDefaultDeviceNum(omp_device_count, local_rank, local_size);
+    if (default_device_num < 0)
+      default_device_num = omp_default_device_num;
+    else if (default_device_num != omp_default_device_num)
+      throw std::runtime_error("Inconsistent assigned OpenMP devices with the previous record!");
+    omp_set_default_device(omp_default_device_num);
+  }
+}
+} // namespace qmcplusplus
diff --git a/src/Platforms/OMPTarget/OMPDeviceManager.h b/src/Platforms/OMPTarget/OMPDeviceManager.h
index 3d88e4651c..528e14fb75 100644
--- a/src/Platforms/OMPTarget/OMPDeviceManager.h
+++ b/src/Platforms/OMPTarget/OMPDeviceManager.h
@@ -2,7 +2,7 @@
 // This file is distributed under the University of Illinois/NCSA Open Source License.
 // See LICENSE file in top directory for details.
 //
-// Copyright (c) 2021 QMCPACK developers.
+// Copyright (c) 2023 QMCPACK developers.
 //
 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 //
@@ -14,11 +14,6 @@
 #ifndef QMCPLUSPLUS_OMPDEVICEMANAGER_H
 #define QMCPLUSPLUS_OMPDEVICEMANAGER_H
 
-#include <stdexcept>
-#include <omp.h>
-#include "Host/OutputManager.h"
-#include "determineDefaultDeviceNum.h"
-
 namespace qmcplusplus
 {
 
@@ -30,27 +25,7 @@ class OMPDeviceManager
   const int omp_device_count;
 
 public:
-  OMPDeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size)
-      : omp_default_device_num(-1), omp_device_count(omp_get_num_devices())
-  {
-    if (num_devices == 0)
-      num_devices = omp_device_count;
-    else if (num_devices != omp_device_count)
-      throw std::runtime_error("Inconsistent number of OpenMP devices with the previous record!");
-    if (omp_device_count > local_size)
-      app_warning() << "More OpenMP devices than the number of MPI ranks. "
-                    << "Some devices will be left idle.\n"
-                    << "There is potential performance issue with the GPU affinity.\n";
-    if (num_devices > 0)
-    {
-      omp_default_device_num = determineDefaultDeviceNum(omp_device_count, local_rank, local_size);
-      if (default_device_num < 0)
-        default_device_num = omp_default_device_num;
-      else if (default_device_num != omp_default_device_num)
-        throw std::runtime_error("Inconsistent assigned OpenMP devices with the previous record!");
-      omp_set_default_device(omp_default_device_num);
-    }
-  }
+  OMPDeviceManager(int& default_device_num, int& num_devices, int local_rank, int local_size);
 };
 } // namespace qmcplusplus
 

From bf2443448ad4dd994b0d29053dcada5bb72646b5 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Sat, 5 Aug 2023 04:03:44 +0000
Subject: [PATCH 14/65] Correct SYCL free memory query.

---
 src/Platforms/MemoryUsage.cpp            | 12 ++++++------
 src/Platforms/SYCL/SYCLDeviceManager.cpp |  3 +++
 src/Platforms/SYCL/SYCLruntime.cpp       |  6 +++++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/Platforms/MemoryUsage.cpp b/src/Platforms/MemoryUsage.cpp
index 756693fb89..34842a32dc 100644
--- a/src/Platforms/MemoryUsage.cpp
+++ b/src/Platforms/MemoryUsage.cpp
@@ -38,20 +38,20 @@ void print_mem(const std::string& title, std::ostream& log)
   log << std::right;
   log << "Available memory on node 0, free + buffers : " << std::setw(7) << (freemem() >> 20) << " MiB" << std::endl;
   log << "Memory footprint by rank 0 on node 0       : " << std::setw(7) << (memusage() >> 10) << " MiB" << std::endl;
+#ifdef ENABLE_OFFLOAD
+  log << "Device memory allocated via OpenMP offload : " << std::setw(7) << (getOMPdeviceMemAllocated() >> 20) << " MiB"
+      << std::endl;
+#endif
 #ifdef ENABLE_CUDA
   log << "Device memory allocated via CUDA allocator : " << std::setw(7) << (getCUDAdeviceMemAllocated() >> 20)
       << " MiB" << std::endl;
-  log << "Free memory available on default device    : " << std::setw(7) << (getCUDAdeviceFreeMem() >> 20) << " MiB"
+  log << "Free memory on the default device          : " << std::setw(7) << (getCUDAdeviceFreeMem() >> 20) << " MiB"
       << std::endl;
 #endif
 #ifdef ENABLE_SYCL
   log << "Device memory allocated via SYCL allocator : " << std::setw(7) << (getSYCLdeviceMemAllocated() >> 20)
       << " MiB" << std::endl;
-  log << "Free memory available on default device    : " << std::setw(7) << (getSYCLdeviceFreeMem() >> 20) << " MiB"
-      << std::endl;
-#endif
-#ifdef ENABLE_OFFLOAD
-  log << "Device memory allocated via OpenMP offload : " << std::setw(7) << (getOMPdeviceMemAllocated() >> 20) << " MiB"
+  log << "Free memory on the default device          : " << std::setw(7) << (getSYCLdeviceFreeMem() >> 20) << " MiB"
       << std::endl;
 #endif
   log << line_separator << std::endl;
diff --git a/src/Platforms/SYCL/SYCLDeviceManager.cpp b/src/Platforms/SYCL/SYCLDeviceManager.cpp
index cb8cfd57ed..2c32e1d6f7 100644
--- a/src/Platforms/SYCL/SYCLDeviceManager.cpp
+++ b/src/Platforms/SYCL/SYCLDeviceManager.cpp
@@ -130,6 +130,9 @@ SYCLDeviceManager::SYCLDeviceManager(int& default_device_num, int& num_devices,
     default_device_queue = std::make_unique<sycl::queue>(visible_devices[sycl_default_device_num].get_context(),
                                                          visible_devices[sycl_default_device_num].get_device(),
                                                          sycl::property::queue::in_order());
+    if(!visible_devices[sycl_default_device_num].get_device().has(sycl::aspect::ext_intel_free_memory))
+      app_warning() << "Free memory queries always return 0 due to inactive 'oneAPI' System Resource Management (sysman). "
+	            << "Set environment variable ZES_ENABLE_SYSMAN to 1 to activate the query feature." << std::endl;
   }
 }
 
diff --git a/src/Platforms/SYCL/SYCLruntime.cpp b/src/Platforms/SYCL/SYCLruntime.cpp
index 535969d6c0..b782cb8e1e 100644
--- a/src/Platforms/SYCL/SYCLruntime.cpp
+++ b/src/Platforms/SYCL/SYCLruntime.cpp
@@ -18,6 +18,10 @@ namespace qmcplusplus
 sycl::queue& getSYCLDefaultDeviceDefaultQueue() { return SYCLDeviceManager::getDefaultDeviceDefaultQueue(); }
 size_t getSYCLdeviceFreeMem()
 {
-  return getSYCLDefaultDeviceDefaultQueue().get_device().get_info<sycl::info::device::global_mem_size>();
+	auto device = getSYCLDefaultDeviceDefaultQueue().get_device();
+	if(device.has(sycl::aspect::ext_intel_free_memory))
+  return getSYCLDefaultDeviceDefaultQueue().get_device().get_info<sycl::ext::intel::info::device::free_memory>();
+	else
+		return 0;
 }
 } // namespace qmcplusplus

From b08f1368bed66a4681b95ef7ffed94b342a75103 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Fri, 4 Aug 2023 23:04:36 -0500
Subject: [PATCH 15/65] Formatting.

---
 src/Platforms/OMPTarget/CMakeLists.txt   |  3 ++-
 src/Platforms/SYCL/CMakeLists.txt        | 13 ++++++++-----
 src/Platforms/SYCL/SYCLDeviceManager.cpp |  7 ++++---
 src/Platforms/SYCL/SYCLruntime.cpp       | 10 +++++-----
 4 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/Platforms/OMPTarget/CMakeLists.txt b/src/Platforms/OMPTarget/CMakeLists.txt
index bf7b56bb25..131b7452b6 100644
--- a/src/Platforms/OMPTarget/CMakeLists.txt
+++ b/src/Platforms/OMPTarget/CMakeLists.txt
@@ -21,7 +21,8 @@ endif()
 target_link_libraries(platform_omptarget_runtime PRIVATE platform_host_runtime)
 target_link_libraries(platform_omptarget_LA PUBLIC platform_omptarget_runtime)
 
-target_compile_options(platform_omptarget_LA PRIVATE "$<$<BOOL:${ENABLE_OFFLOAD_CLANG_DEBUG_O3}>:$<$<CONFIG:DEBUG>:-O3>>")
+target_compile_options(platform_omptarget_LA
+                       PRIVATE "$<$<BOOL:${ENABLE_OFFLOAD_CLANG_DEBUG_O3}>:$<$<CONFIG:DEBUG>:-O3>>")
 
 if(ENABLE_CUDA AND QMC_OFFLOAD_MEM_ASSOCIATED)
   target_link_libraries(platform_omptarget_runtime PUBLIC platform_cuda_runtime)
diff --git a/src/Platforms/SYCL/CMakeLists.txt b/src/Platforms/SYCL/CMakeLists.txt
index a1a73c5d24..f114ca2aae 100644
--- a/src/Platforms/SYCL/CMakeLists.txt
+++ b/src/Platforms/SYCL/CMakeLists.txt
@@ -9,14 +9,17 @@
 #// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
 #//////////////////////////////////////////////////////////////////////////////////////
 
-
 set(SYCL_RT_SRCS SYCLDeviceManager.cpp SYCLallocator.cpp SYCLruntime.cpp)
 set(SYCL_LA_SRCS syclBLAS.cpp)
 
 add_library(platform_sycl_runtime ${SYCL_RT_SRCS})
-target_link_libraries(platform_sycl_runtime PUBLIC SYCL::host
-                                            PRIVATE platform_host_runtime)
+target_link_libraries(
+  platform_sycl_runtime
+  PUBLIC SYCL::host
+  PRIVATE platform_host_runtime)
 
 add_library(platform_sycl_LA ${SYCL_LA_SRCS})
-target_link_libraries(platform_sycl_LA PUBLIC platform_sycl_runtime MKL::sycl
-                                       PRIVATE SYCL::device platform_sycl_runtime)
+target_link_libraries(
+  platform_sycl_LA
+  PUBLIC platform_sycl_runtime MKL::sycl
+  PRIVATE SYCL::device platform_sycl_runtime)
diff --git a/src/Platforms/SYCL/SYCLDeviceManager.cpp b/src/Platforms/SYCL/SYCLDeviceManager.cpp
index 2c32e1d6f7..5cf1b24e55 100644
--- a/src/Platforms/SYCL/SYCLDeviceManager.cpp
+++ b/src/Platforms/SYCL/SYCLDeviceManager.cpp
@@ -130,9 +130,10 @@ SYCLDeviceManager::SYCLDeviceManager(int& default_device_num, int& num_devices,
     default_device_queue = std::make_unique<sycl::queue>(visible_devices[sycl_default_device_num].get_context(),
                                                          visible_devices[sycl_default_device_num].get_device(),
                                                          sycl::property::queue::in_order());
-    if(!visible_devices[sycl_default_device_num].get_device().has(sycl::aspect::ext_intel_free_memory))
-      app_warning() << "Free memory queries always return 0 due to inactive 'oneAPI' System Resource Management (sysman). "
-	            << "Set environment variable ZES_ENABLE_SYSMAN to 1 to activate the query feature." << std::endl;
+    if (!visible_devices[sycl_default_device_num].get_device().has(sycl::aspect::ext_intel_free_memory))
+      app_warning()
+          << "Free memory queries always return 0 due to inactive 'oneAPI' System Resource Management (sysman). "
+          << "Set environment variable ZES_ENABLE_SYSMAN to 1 to activate the query feature." << std::endl;
   }
 }
 
diff --git a/src/Platforms/SYCL/SYCLruntime.cpp b/src/Platforms/SYCL/SYCLruntime.cpp
index b782cb8e1e..4052860a2a 100644
--- a/src/Platforms/SYCL/SYCLruntime.cpp
+++ b/src/Platforms/SYCL/SYCLruntime.cpp
@@ -18,10 +18,10 @@ namespace qmcplusplus
 sycl::queue& getSYCLDefaultDeviceDefaultQueue() { return SYCLDeviceManager::getDefaultDeviceDefaultQueue(); }
 size_t getSYCLdeviceFreeMem()
 {
-	auto device = getSYCLDefaultDeviceDefaultQueue().get_device();
-	if(device.has(sycl::aspect::ext_intel_free_memory))
-  return getSYCLDefaultDeviceDefaultQueue().get_device().get_info<sycl::ext::intel::info::device::free_memory>();
-	else
-		return 0;
+  auto device = getSYCLDefaultDeviceDefaultQueue().get_device();
+  if (device.has(sycl::aspect::ext_intel_free_memory))
+    return getSYCLDefaultDeviceDefaultQueue().get_device().get_info<sycl::ext::intel::info::device::free_memory>();
+  else
+    return 0;
 }
 } // namespace qmcplusplus

From 6fde9ec6f1064ef2393e1a6659c466e73a7b16dc Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Mon, 7 Aug 2023 10:07:41 -0500
Subject: [PATCH 16/65] Enable orbital rotation for J3 and PP

Implement evaluateDerivativesWF, which is evaluateDerivatives with
code for dhpsioverpsi removed.
---
 src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
index 3548c7ca0f..3f39c98d4d 100644
--- a/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
+++ b/src/QMCWaveFunctions/Jastrow/JeeIOrbitalSoA.h
@@ -947,6 +947,90 @@ class JeeIOrbitalSoA : public WaveFunctionComponent
     }
   }
 
+  void evaluateDerivativesWF(ParticleSet& P, const opt_variables_type& optvars, Vector<ValueType>& dlogpsi) override
+  {
+    resizeWFOptVectors();
+
+    bool recalculate(false);
+    for (int k = 0; k < myVars.size(); ++k)
+    {
+      int kk = myVars.where(k);
+      if (kk < 0)
+        continue;
+      if (optvars.recompute(kk))
+        recalculate = true;
+    }
+
+    if (recalculate)
+    {
+      constexpr valT czero(0);
+      constexpr valT cone(1);
+      constexpr valT cminus(-1);
+      constexpr valT ctwo(2);
+      constexpr valT lapfac = OHMMS_DIM - cone;
+
+      const auto& ee_table  = P.getDistTableAA(ee_Table_ID_);
+      const auto& ee_dists  = ee_table.getDistances();
+      const auto& ee_displs = ee_table.getDisplacements();
+
+      build_compact_list(P);
+
+      dLogPsi    = czero;
+      gradLogPsi = PosType();
+      lapLogPsi  = czero;
+
+      for (int iat = 0; iat < Nion; ++iat)
+      {
+        const int ig = Ions.GroupID[iat];
+        for (int jg = 0; jg < eGroups; ++jg)
+          for (int jind = 0; jind < elecs_inside(jg, iat).size(); jind++)
+          {
+            const int jel       = elecs_inside(jg, iat)[jind];
+            const valT r_Ij     = elecs_inside_dist(jg, iat)[jind];
+            const posT disp_Ij  = cminus * elecs_inside_displ(jg, iat)[jind];
+            const valT r_Ij_inv = cone / r_Ij;
+
+            for (int kg = 0; kg < eGroups; ++kg)
+              for (int kind = 0; kind < elecs_inside(kg, iat).size(); kind++)
+              {
+                const int kel = elecs_inside(kg, iat)[kind];
+                if (kel < jel)
+                {
+                  const valT r_Ik     = elecs_inside_dist(kg, iat)[kind];
+                  const posT disp_Ik  = cminus * elecs_inside_displ(kg, iat)[kind];
+                  const valT r_Ik_inv = cone / r_Ik;
+
+                  const valT r_jk     = ee_dists[jel][kel];
+                  const posT disp_jk  = ee_displs[jel][kel];
+                  const valT r_jk_inv = cone / r_jk;
+
+                  FT& func = *F(ig, jg, kg);
+                  int idx  = J3UniqueIndex[F(ig, jg, kg)];
+                  func.evaluateDerivatives(r_jk, r_Ij, r_Ik, du_dalpha[idx], dgrad_dalpha[idx], dhess_dalpha[idx]);
+                  int first                   = VarOffset(ig, jg, kg).first;
+                  int last                    = VarOffset(ig, jg, kg).second;
+                  std::vector<RealType>& dlog = du_dalpha[idx];
+
+                  for (int p = first, ip = 0; p < last; p++, ip++)
+                  {
+                    RealType& dval = dlog[ip];
+                    dLogPsi[p] -= dval;
+                  }
+                }
+              }
+          }
+      }
+
+      for (int k = 0; k < myVars.size(); ++k)
+      {
+        int kk = myVars.where(k);
+        if (kk < 0)
+          continue;
+        dlogpsi[kk] = (ValueType)dLogPsi[k];
+      }
+    }
+  }
+
   void evaluateDerivRatios(const VirtualParticleSet& VP,
                            const opt_variables_type& optvars,
                            std::vector<ValueType>& ratios,

From 00fabe59befd05cf0b23158dac73b7e835ed2a29 Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Tue, 8 Aug 2023 10:50:52 -0500
Subject: [PATCH 17/65] Update function doc to doxygen format

Also add a comment about the possibility of zero batch size.
---
 src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
index a4fb4b03bf..56bdce8f98 100644
--- a/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
+++ b/src/QMCDrivers/WFOpt/QMCCostFunctionBatched.cpp
@@ -191,11 +191,17 @@ void QMCCostFunctionBatched::getConfigurations(const std::string& aroot)
   }
 }
 
-// Input - sample_size  - number of samples to process
-//       - batch_size  -  process samples in batch_size at a time
-// Output - num_batches - number of batches to use
-//        - final_batch_size - the last batch size.  May be smaller than batch_size
-//                             if the number of samples is not a multiple of the batch size
+  /** Compute number of batches and final batch size given the number of samples
+   *   and a batch size.
+   * \param[in] sample_size number of samples to process.
+   * \param[in] batch_size process samples in batch_size at a time (typically the number of walkers in a crowd).
+   * \param[out] num_batches number of batches to use.
+   * \param[out] final_batch_size the last batch size.  May be smaller than batch_size
+   *             if the number of samples is not a multiple of the batch size.
+   *
+   * There may be cases where the batch size is zero. One cause is when the number of walkers per
+   *  rank is less than the number of crowds.
+   */
 void compute_batch_parameters(int sample_size, int batch_size, int& num_batches, int& final_batch_size)
 {
   if (batch_size == 0)

From 15c73d2f8c05012e5f5fa6becca9b139c25a6cd7 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Tue, 8 Aug 2023 16:20:07 -0500
Subject: [PATCH 18/65] Capture all non-zero info in getrf batched.

---
 src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
index 6ac999ca95..af7b3032c1 100644
--- a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
+++ b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
@@ -158,14 +158,14 @@ void computeGetrf_batched(cublasHandle_t& h_cublas,
                  "cudaMemcpyAsync failed copying cuBLAS::getrf_batched infos from device");
   cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
 
-  for (int iw = 0; iw < batch_size; ++iw)
+  if (std::all_of(host_infos, host_infos + batch_size, [](int i) { return i != 0; }))
   {
-    if (*(host_infos + iw) != 0)
-    {
-      std::ostringstream err_msg;
-      err_msg << "cuBLAS::getrf_batched failed with return code " << *(host_infos + iw);
-      throw std::runtime_error(err_msg.str());
-    }
+    std::ostringstream err_msg;
+    err_msg << "cuBLAS::getrf_batched failed! Non-zero infos:" << std::endl;
+    for (int iw = 0; iw < batch_size; ++iw)
+      if (*(host_infos + iw) != 0)
+        err_msg << "infos[" << iw << "] = " << *(host_infos + iw) << std::endl;
+    throw std::runtime_error(err_msg.str());
   }
 }
 

From 95c3701df73928a0c94587bbebff790ba604ff7e Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Tue, 8 Aug 2023 17:05:07 -0500
Subject: [PATCH 19/65] Connect computeGetri_batched

---
 src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu | 49 +++++++++++++++---
 .../detail/CUDA/cuBLAS_LU.hpp                 | 51 ++++++++++---------
 src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp |  2 +-
 3 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
index af7b3032c1..2b31866af1 100644
--- a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
+++ b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.cu
@@ -10,6 +10,7 @@
 //////////////////////////////////////////////////////////////////////////////////////
 
 #include "cuBLAS_LU.hpp"
+#include <algorithm>
 #include "Platforms/CUDA/CUDAruntime.hpp"
 #include "Platforms/CUDA/cuBLAS.hpp"
 #include "Platforms/CUDA/CUDATypeMapping.hpp"
@@ -158,7 +159,7 @@ void computeGetrf_batched(cublasHandle_t& h_cublas,
                  "cudaMemcpyAsync failed copying cuBLAS::getrf_batched infos from device");
   cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
 
-  if (std::all_of(host_infos, host_infos + batch_size, [](int i) { return i != 0; }))
+  if (std::any_of(host_infos, host_infos + batch_size, [](int i) { return i != 0; }))
   {
     std::ostringstream err_msg;
     err_msg << "cuBLAS::getrf_batched failed! Non-zero infos:" << std::endl;
@@ -186,24 +187,37 @@ void computeInverseAndDetLog_batched(cublasHandle_t& h_cublas,
   computeGetrf_batched(h_cublas, hstream, n, lda, Ms, pivots, host_infos, infos, batch_size);
   cudaErrorCheck(computeLogDet_batched_impl(hstream, n, lda, Ms, pivots, log_dets, batch_size),
                  "failed to calculate log determinant values in computeLogDet_batched_impl");
-  cublasErrorCheck(cuBLAS::getri_batched(h_cublas, n, Ms, lda, pivots, Cs, lda, infos, batch_size),
-                   "cuBLAS::getri_batched failed in computeInverseAndDetLog_batched");
-  //FIXME replace getri_batched with computeGetri_batched and computeGetri_batched should sync and check infos
-  cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
+  computeGetri_batched(h_cublas, hstream, n, lda, Ms, Cs, pivots, host_infos, infos, batch_size);
 }
 
 
+template<typename T>
 void computeGetri_batched(cublasHandle_t& h_cublas,
+                          cudaStream_t& hstream,
                           const int n,
                           const int lda,
-                          double* Ms[],
-                          double* Cs[],
+                          T* Ms[],
+                          T* Cs[],
                           int* pivots,
+                          int* host_infos,
                           int* infos,
                           const int batch_size)
 {
   cublasErrorCheck(cuBLAS::getri_batched(h_cublas, n, Ms, lda, pivots, Cs, lda, infos, batch_size),
                    "cuBLAS::getri_batched failed in computeInverseAndDetLog_batched");
+  cudaErrorCheck(cudaMemcpyAsync(host_infos, infos, sizeof(int) * batch_size, cudaMemcpyDeviceToHost, hstream),
+                 "cudaMemcpyAsync failed copying cuBLAS::getri_batched infos from device");
+  cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
+
+  if (std::any_of(host_infos, host_infos + batch_size, [](int i) { return i != 0; }))
+  {
+    std::ostringstream err_msg;
+    err_msg << "cuBLAS::getri_batched failed! Non-zero infos:" << std::endl;
+    for (int iw = 0; iw < batch_size; ++iw)
+      if (*(host_infos + iw) != 0)
+        err_msg << "infos[" << iw << "] = " << *(host_infos + iw) << std::endl;
+    throw std::runtime_error(err_msg.str());
+  }
 }
 
 template void computeGetrf_batched<double>(cublasHandle_t& h_cublas,
@@ -226,6 +240,27 @@ template void computeGetrf_batched<std::complex<double>>(cublasHandle_t& h_cubla
                                                          int* infos,
                                                          const int batch_size);
 
+template void computeGetri_batched<double>(cublasHandle_t& h_cublas,
+                                           cudaStream_t& hstream,
+                                           const int n,
+                                           const int lda,
+                                           double* Ms[],
+                                           double* Cs[],
+                                           int* pivots,
+                                           int* host_infos,
+                                           int* infos,
+                                           const int batch_size);
+
+template void computeGetri_batched<std::complex<double>>(cublasHandle_t& h_cublas,
+                                                         cudaStream_t& hstream,
+                                                         const int n,
+                                                         const int lda,
+                                                         std::complex<double>* Ms[],
+                                                         std::complex<double>* Cs[],
+                                                         int* pivots,
+                                                         int* host_infos,
+                                                         int* infos,
+                                                         const int batch_size);
 
 template void computeLogDet_batched<std::complex<double>>(cudaStream_t& hstream,
                                                           const int n,
diff --git a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.hpp b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.hpp
index c2157bd2fb..f22e13196d 100644
--- a/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.hpp
+++ b/src/QMCWaveFunctions/detail/CUDA/cuBLAS_LU.hpp
@@ -81,40 +81,43 @@ void computeLogDet_batched(cudaStream_t& hstream,
                            std::complex<double>* logdets,
                            const int batch_size);
 
+template<typename T>
 void computeGetri_batched(cublasHandle_t& h_cublas,
+                          cudaStream_t& hstream,
                           const int n,
                           const int lda,
-                          double* Ms[],
-                          double* Cs[],
+                          T* Ms[],
+                          T* Cs[],
                           int* pivots,
+                          int* host_infos,
                           int* infos,
                           const int batch_size);
 
 extern template void computeInverseAndDetLog_batched<double>(cublasHandle_t& h_cublas,
-                                     cudaStream_t& hstream,
-                                     const int n,
-                                     const int lda,
-                                     double* Ms[],
-                                     double* Cs[],
-                                     double* LU_diags,
-                                     int* pivots,
-                                     int* host_infos,
-                                     int* infos,
-                                     std::complex<double>* log_dets,
-                                     const int batch_size);
+                                                             cudaStream_t& hstream,
+                                                             const int n,
+                                                             const int lda,
+                                                             double* Ms[],
+                                                             double* Cs[],
+                                                             double* LU_diags,
+                                                             int* pivots,
+                                                             int* host_infos,
+                                                             int* infos,
+                                                             std::complex<double>* log_dets,
+                                                             const int batch_size);
 
 extern template void computeInverseAndDetLog_batched<std::complex<double>>(cublasHandle_t& h_cublas,
-                                     cudaStream_t& hstream,
-                                     const int n,
-                                     const int lda,
-                                     std::complex<double>* Ms[],
-                                     std::complex<double>* Cs[],
-                                     std::complex<double>* LU_diags,
-                                     int* pivots,
-                                     int* host_infos,
-                                     int* infos,
-                                     std::complex<double>* log_dets,
-                                     const int batch_size);
+                                                                           cudaStream_t& hstream,
+                                                                           const int n,
+                                                                           const int lda,
+                                                                           std::complex<double>* Ms[],
+                                                                           std::complex<double>* Cs[],
+                                                                           std::complex<double>* LU_diags,
+                                                                           int* pivots,
+                                                                           int* host_infos,
+                                                                           int* infos,
+                                                                           std::complex<double>* log_dets,
+                                                                           const int batch_size);
 
 } // namespace cuBLAS_LU
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
index 3f038b7611..f70bc2579d 100644
--- a/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
+++ b/src/QMCWaveFunctions/tests/test_cuBLAS_LU.cpp
@@ -495,7 +495,7 @@ TEST_CASE("cuBLAS_LU::getri_batched", "[wavefunction][CUDA]")
                  "cudaMemcpyAsync failed copying invMs to device");
   cudaErrorCheck(cudaMemcpyAsync(dev_pivots.data(), pivots.data(), sizeof(int) * 4, cudaMemcpyHostToDevice, hstream),
                  "cudaMemcpyAsync failed copying pivots to device");
-  cuBLAS_LU::computeGetri_batched(cuda_handles->h_cublas, n, lda, devMs.data(), invMs.data(), dev_pivots.data(), dev_infos.data(), batch_size);
+  cuBLAS_LU::computeGetri_batched(cuda_handles->h_cublas, cuda_handles->hstream, n, lda, devMs.data(), invMs.data(), dev_pivots.data(), infos.data(), dev_infos.data(), batch_size);
 
   cudaErrorCheck(cudaMemcpyAsync(invM_vec.data(), dev_invM_vec.data(), sizeof(double) * 16, cudaMemcpyDeviceToHost, hstream),
                  "cudaMemcpyAsync failed copying invM from device");

From aa30754c06d49b6b2c5e36a010f828dc4e1c6a4f Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Wed, 9 Aug 2023 10:08:55 -0500
Subject: [PATCH 20/65] Add minimal test for J3 evaluateDerivativesWF

---
 src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
index 40b6a2d0da..b2edd16b29 100644
--- a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
+++ b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
@@ -151,6 +151,7 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
 
   opt_variables_type optvars;
   Vector<WaveFunctionComponent::ValueType> dlogpsi;
+  Vector<WaveFunctionComponent::ValueType> dlogpsiWF;
   Vector<WaveFunctionComponent::ValueType> dhpsioverpsi;
 
   for (OptimizableObject& obj : opt_obj_refs)
@@ -159,6 +160,7 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
   const int NumOptimizables(optvars.size());
   j3->checkOutVariables(optvars);
   dlogpsi.resize(NumOptimizables);
+  dlogpsiWF.resize(NumOptimizables);
   dhpsioverpsi.resize(NumOptimizables);
   j3->evaluateDerivatives(elec_, optvars, dlogpsi, dhpsioverpsi);
 
@@ -170,6 +172,10 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
   CHECK(std::real(dlogpsi[43]) == Approx(1.3358726814e+05));
   CHECK(std::real(dhpsioverpsi[43]) == Approx(-2.3246270644e+05));
 
+  j3->evaluateDerivativesWF(elec_, optvars, dlogpsiWF);
+  for (int i = 0; i < NumOptimizables; i++)
+    CHECK(dlogpsi[i] == Approx(dlogpsiWF[i]));
+
   VirtualParticleSet VP(elec_, 2);
   std::vector<PosType> newpos2(2);
   std::vector<ValueType> ratios2(2);

From e4b42d478b99cc62b9ce79ad4e29913109a01a74 Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Wed, 9 Aug 2023 10:56:56 -0500
Subject: [PATCH 21/65] Move variable declarations to right before use

---
 src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
index b2edd16b29..a51abe179b 100644
--- a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
+++ b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
@@ -151,7 +151,6 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
 
   opt_variables_type optvars;
   Vector<WaveFunctionComponent::ValueType> dlogpsi;
-  Vector<WaveFunctionComponent::ValueType> dlogpsiWF;
   Vector<WaveFunctionComponent::ValueType> dhpsioverpsi;
 
   for (OptimizableObject& obj : opt_obj_refs)
@@ -160,7 +159,6 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
   const int NumOptimizables(optvars.size());
   j3->checkOutVariables(optvars);
   dlogpsi.resize(NumOptimizables);
-  dlogpsiWF.resize(NumOptimizables);
   dhpsioverpsi.resize(NumOptimizables);
   j3->evaluateDerivatives(elec_, optvars, dlogpsi, dhpsioverpsi);
 
@@ -172,6 +170,8 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
   CHECK(std::real(dlogpsi[43]) == Approx(1.3358726814e+05));
   CHECK(std::real(dhpsioverpsi[43]) == Approx(-2.3246270644e+05));
 
+  Vector<WaveFunctionComponent::ValueType> dlogpsiWF;
+  dlogpsiWF.resize(NumOptimizables);
   j3->evaluateDerivativesWF(elec_, optvars, dlogpsiWF);
   for (int i = 0; i < NumOptimizables; i++)
     CHECK(dlogpsi[i] == Approx(dlogpsiWF[i]));

From 365c095a3925be269a45850f1d839f0dbeeef9de Mon Sep 17 00:00:00 2001
From: Mark Dewing <markdewing@gmail.com>
Date: Wed, 9 Aug 2023 11:44:06 -0500
Subject: [PATCH 22/65] Fix complex build

---
 src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
index a51abe179b..353a3b3f6f 100644
--- a/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
+++ b/src/QMCWaveFunctions/tests/test_polynomial_eeI_jastrow.cpp
@@ -174,7 +174,7 @@ void test_J3_polynomial3D(const DynamicCoordinateKind kind_selected)
   dlogpsiWF.resize(NumOptimizables);
   j3->evaluateDerivativesWF(elec_, optvars, dlogpsiWF);
   for (int i = 0; i < NumOptimizables; i++)
-    CHECK(dlogpsi[i] == Approx(dlogpsiWF[i]));
+    CHECK(dlogpsi[i] == ValueApprox(dlogpsiWF[i]));
 
   VirtualParticleSet VP(elec_, 2);
   std::vector<PosType> newpos2(2);

From 85b741ba3af714f7a088360d2526b0781400c361 Mon Sep 17 00:00:00 2001
From: walshmm <walshmm@ornl.gov>
Date: Thu, 10 Aug 2023 12:37:17 -0400
Subject: [PATCH 23/65] pin clang and gcc versions for ubuntu22-openmpi

---
 .../docker/dependencies/ubuntu22/openmpi/Dockerfile   | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/config/docker/dependencies/ubuntu22/openmpi/Dockerfile b/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
index 0c292e0fc7..2afbc574f3 100644
--- a/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
+++ b/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
@@ -11,8 +11,8 @@ RUN wget https://apt.kitware.com/kitware-archive.sh &&\
     sh kitware-archive.sh
 
 RUN export DEBIAN_FRONTEND=noninteractive &&\
-    apt-get install gcc g++ \ 
-    clang \
+    apt-get install gcc-9 g++-9 \ 
+    clang-14 \
     clang-format \
     clang-tidy \
     libomp-dev \
@@ -49,6 +49,13 @@ RUN export DEBIAN_FRONTEND=noninteractive &&\
 RUN export DEBIAN_FRONTEND=noninteractive &&\
     pip3 install cif2cell
 
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 100
+
+# add clang-14 as clang
+RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-14 100 && \
+    update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-14 100
+
 # must add a user different from root 
 # to run MPI executables
 RUN useradd -ms /bin/bash user

From 7801ac4ead0261cd36dec09f5f995876f7310f1f Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 10 Aug 2023 15:00:24 -0500
Subject: [PATCH 24/65] House keeping.

---
 src/QMCDrivers/QMCDriverNew.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/QMCDrivers/QMCDriverNew.h b/src/QMCDrivers/QMCDriverNew.h
index 0875802c46..777f7f095c 100644
--- a/src/QMCDrivers/QMCDriverNew.h
+++ b/src/QMCDrivers/QMCDriverNew.h
@@ -433,12 +433,6 @@ class QMCDriverNew : public QMCDriverInterface, public MPIObjectBase
   ///a list of mcwalkerset element
   std::vector<xmlNodePtr> mcwalkerNodePtr;
 
-  ///temporary storage for drift
-  ParticleSet::ParticlePos drift;
-
-  ///temporary storage for random displacement
-  ParticleSet::ParticlePos deltaR;
-
   // ///alternate method of setting QMC run parameters
   // IndexType nStepsBetweenSamples;
   // ///samples per thread

From 555fbf4a710098cfa84dac43fddeb016d0bcbdda Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 10 Aug 2023 15:00:40 -0500
Subject: [PATCH 25/65] Add TWF::checkOneParticleGradientsNaN

---
 src/QMCWaveFunctions/TrialWaveFunction.cpp | 24 ++++++++++++++++++++++
 src/QMCWaveFunctions/TrialWaveFunction.h   |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/src/QMCWaveFunctions/TrialWaveFunction.cpp b/src/QMCWaveFunctions/TrialWaveFunction.cpp
index 5a5e34bc70..4a4336c1d9 100644
--- a/src/QMCWaveFunctions/TrialWaveFunction.cpp
+++ b/src/QMCWaveFunctions/TrialWaveFunction.cpp
@@ -514,6 +514,7 @@ TrialWaveFunction::GradType TrialWaveFunction::evalGrad(ParticleSet& P, int iat)
     ScopedTimer z_timer(WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
     grad_iat += Z[i]->evalGrad(P, iat);
   }
+  checkOneParticleGradientsNaN(iat, grad_iat, "TWF::evalGrad");
   return grad_iat;
 }
 
@@ -527,6 +528,7 @@ TrialWaveFunction::GradType TrialWaveFunction::evalGradWithSpin(ParticleSet& P,
     ScopedTimer z_timer(WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
     grad_iat += Z[i]->evalGradWithSpin(P, iat, spingrad);
   }
+  checkOneParticleGradientsNaN(iat, grad_iat, "TWF::evalGradWithSpin");
   return grad_iat;
 }
 
@@ -553,6 +555,9 @@ void TrialWaveFunction::mw_evalGrad(const RefVectorWithLeader<TrialWaveFunction>
     wavefunction_components[i]->mw_evalGrad(wfc_list, p_list, iat, grads_z);
     grads += grads_z;
   }
+
+  for (const GradType& grads : grads.grads_positions)
+    checkOneParticleGradientsNaN(iat, grads, "TWF::mw_evalGrad");
 }
 
 // Evaluates the gradient w.r.t. to the source of the Laplacian
@@ -612,6 +617,8 @@ TrialWaveFunction::ValueType TrialWaveFunction::calcRatioGrad(ParticleSet& P, in
       ScopedTimer z_timer(WFC_timers_[VGL_TIMER + TIMER_SKIP * i]);
       r *= Z[i]->ratioGrad(P, iat, grad_iat);
     }
+
+  checkOneParticleGradientsNaN(iat, grad_iat, "TWF::calcRatioGrad");
   LogValueType logratio = convertValueToLog(r);
   PhaseDiff             = std::imag(logratio);
   return static_cast<ValueType>(r);
@@ -632,6 +639,7 @@ TrialWaveFunction::ValueType TrialWaveFunction::calcRatioGradWithSpin(ParticleSe
     r *= Z[i]->ratioGradWithSpin(P, iat, grad_iat, spingrad_iat);
   }
 
+  checkOneParticleGradientsNaN(iat, grad_iat, "TWF::calcRatioGradWithSpin");
   LogValueType logratio = convertValueToLog(r);
   PhaseDiff             = std::imag(logratio);
   return static_cast<ValueType>(r);
@@ -687,6 +695,9 @@ void TrialWaveFunction::mw_calcRatioGrad(const RefVectorWithLeader<TrialWaveFunc
   }
   for (int iw = 0; iw < wf_list.size(); iw++)
     wf_list[iw].PhaseDiff = std::imag(std::arg(ratios[iw]));
+
+  for (const GradType& grads : grad_new.grads_positions)
+    checkOneParticleGradientsNaN(iat, grads, "TWF::mw_calcRatioGrad");
 }
 
 void TrialWaveFunction::printGL(ParticleSet::ParticleGradient& G, ParticleSet::ParticleLaplacian& L, std::string tag)
@@ -1183,6 +1194,19 @@ void TrialWaveFunction::releaseResource(ResourceCollection& collection,
   }
 }
 
+void TrialWaveFunction::checkOneParticleGradientsNaN(int iel, const GradType& grads, const std::string_view location)
+{
+  if (qmcplusplus::isnan(std::norm(dot(grads, grads))))
+  {
+    std::ostringstream error_message;
+    error_message << "NaN check in " << location << " found" << std::endl;
+    for (int i = 0; i < grads.size(); ++i)
+      if (qmcplusplus::isnan(std::norm(grads[i])))
+        error_message << "  particle " << iel << " grads[" << i << "] is NaN." << std::endl;
+    throw std::runtime_error(error_message.str());
+  }
+}
+
 RefVectorWithLeader<WaveFunctionComponent> TrialWaveFunction::extractWFCRefList(
     const RefVectorWithLeader<TrialWaveFunction>& wf_list,
     int id)
diff --git a/src/QMCWaveFunctions/TrialWaveFunction.h b/src/QMCWaveFunctions/TrialWaveFunction.h
index 3bf96c7af3..aa31fd8374 100644
--- a/src/QMCWaveFunctions/TrialWaveFunction.h
+++ b/src/QMCWaveFunctions/TrialWaveFunction.h
@@ -551,6 +551,8 @@ class TrialWaveFunction
   std::vector<std::reference_wrapper<NewTimer>> WFC_timers_;
   std::vector<RealType> myTwist;
 
+  static void checkOneParticleGradientsNaN(int iel, const GradType& grads, const std::string_view location);
+
   /** @{
    *  @brief helper function for extracting a list of WaveFunctionComponent from a list of TrialWaveFunction
    */

From 6421417f54d6998b253fe354b1a61ebea5ce2ebd Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Thu, 10 Aug 2023 16:05:33 -0500
Subject: [PATCH 26/65] Remove NaN checks in DriftModifierUNR

---
 .../DriftModifierUNR.cpp                      | 33 -------------------
 1 file changed, 33 deletions(-)

diff --git a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
index 7c3fab8a32..de30678f15 100644
--- a/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
+++ b/src/QMCDrivers/GreenFunctionModifiers/DriftModifierUNR.cpp
@@ -21,28 +21,12 @@ void DriftModifierUNR::getDrift(RealType tau, const GradType& qf, PosType& drift
 {
   // convert the complex WF gradient to real
   convertToReal(qf, drift);
-#ifndef NDEBUG
-  PosType debug_drift = drift;
-#endif
   RealType vsq = dot(drift, drift);
   RealType sc  = vsq < std::numeric_limits<RealType>::epsilon()
       ? tau
       : ((-1.0 + std::sqrt(1.0 + 2.0 * a_ * tau * vsq)) / (a_ * vsq));
   //Apply the umrigar scaling to drift.
   drift *= sc;
-  if (qmcplusplus::isnan(vsq))
-  {
-    std::ostringstream error_message;
-    for (int i = 0; i < drift.size(); ++i)
-    {
-      if (qmcplusplus::isnan(drift[i]))
-      {
-        error_message << "drift[" << i << "] is nan, vsq (" << vsq << ") sc (" << sc << ")\n";
-        break;
-      }
-    }
-    throw std::runtime_error(error_message.str());
-  }
 }
 
 void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet::Scalar_t& drift) const
@@ -55,27 +39,12 @@ void DriftModifierUNR::getDrift(RealType tau, const ComplexType& qf, ParticleSet
       : ((-1.0 + std::sqrt(1.0 + 2.0 * a_ * tau * vsq)) / (a_ * vsq));
   //Apply the umrigar scaling to drift.
   drift *= sc;
-  if (qmcplusplus::isnan(vsq))
-  {
-    std::ostringstream error_message;
-    if (qmcplusplus::isnan(drift))
-    {
-      error_message << "drift is nan, vsq (" << vsq << ") sc (" << sc << ")\n";
-    }
-    else
-    {
-      error_message << "vsq is nan but drift is " << drift << ", unexpected, investigate.\n";
-    }
-    throw std::runtime_error(error_message.str());
-  }
 }
 
 void DriftModifierUNR::getDrifts(RealType tau, const std::vector<GradType>& qf, std::vector<PosType>& drift) const
 {
   for (int i = 0; i < qf.size(); ++i)
-  {
     getDrift(tau, qf[i], drift[i]);
-  }
 }
 
 void DriftModifierUNR::getDrifts(RealType tau,
@@ -83,9 +52,7 @@ void DriftModifierUNR::getDrifts(RealType tau,
                                  std::vector<ParticleSet::Scalar_t>& drift) const
 {
   for (int i = 0; i < qf.size(); ++i)
-  {
     getDrift(tau, qf[i], drift[i]);
-  }
 }
 
 bool DriftModifierUNR::parseXML(xmlNodePtr cur)

From abb9e3b786fdd521613d8a85de657b62ad0d2187 Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Fri, 11 Aug 2023 09:05:56 -0500
Subject: [PATCH 27/65] Documentation.

---
 src/QMCWaveFunctions/TrialWaveFunction.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/QMCWaveFunctions/TrialWaveFunction.h b/src/QMCWaveFunctions/TrialWaveFunction.h
index aa31fd8374..33c7c1db5c 100644
--- a/src/QMCWaveFunctions/TrialWaveFunction.h
+++ b/src/QMCWaveFunctions/TrialWaveFunction.h
@@ -551,6 +551,11 @@ class TrialWaveFunction
   std::vector<std::reference_wrapper<NewTimer>> WFC_timers_;
   std::vector<RealType> myTwist;
 
+  /** check if any gradient component (x,y,z) is NaN and throw an error if yes.
+   * @param iel particle index
+   * @param grads gradients to be checked
+   * @param location usually put function name to indicate where the check is being called.
+   */
   static void checkOneParticleGradientsNaN(int iel, const GradType& grads, const std::string_view location);
 
   /** @{

From bd35ee7f08358ec7d6ecbf49edcc0c4d9759e313 Mon Sep 17 00:00:00 2001
From: walshmm <walshmm@ornl.gov>
Date: Fri, 11 Aug 2023 10:21:11 -0400
Subject: [PATCH 28/65] pin clang-tidy/format and libomp to 14

---
 .../docker/dependencies/ubuntu22/openmpi/Dockerfile   | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/config/docker/dependencies/ubuntu22/openmpi/Dockerfile b/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
index 2afbc574f3..819d88b46b 100644
--- a/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
+++ b/config/docker/dependencies/ubuntu22/openmpi/Dockerfile
@@ -13,9 +13,9 @@ RUN wget https://apt.kitware.com/kitware-archive.sh &&\
 RUN export DEBIAN_FRONTEND=noninteractive &&\
     apt-get install gcc-9 g++-9 \ 
     clang-14 \
-    clang-format \
-    clang-tidy \
-    libomp-dev \
+    clang-format-14 \
+    clang-tidy-14 \
+    libomp-14-dev \
     gcovr \
     python3 \
     cmake \
@@ -56,6 +56,11 @@ RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 && \
 RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-14 100 && \
     update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-14 100
 
+# add clang-format and clang-tidy as well as libomp
+RUN update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-14 100 && \
+    update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-14 100 && \
+    update-alternatives --install /usr/bin/clang-tidy-diff.py clang-tidy-diff.py /usr/bin/clang-tidy-diff-14.py 100
+
 # must add a user different from root 
 # to run MPI executables
 RUN useradd -ms /bin/bash user

From ee62b3770d4b8d606033126812ce68da4dd95deb Mon Sep 17 00:00:00 2001
From: Ye Luo <yeluo@anl.gov>
Date: Sat, 12 Aug 2023 14:32:21 -0700
Subject: [PATCH 29/65] Add NERSC perlmutter recipe.

---
 config/build_alcf_polaris_Clang.sh     |  8 ++-
 config/build_nersc_perlmutter_Clang.sh | 99 ++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 2 deletions(-)
 create mode 100755 config/build_nersc_perlmutter_Clang.sh

diff --git a/config/build_alcf_polaris_Clang.sh b/config/build_alcf_polaris_Clang.sh
index 6dcdeeda55..7b3e34e000 100755
--- a/config/build_alcf_polaris_Clang.sh
+++ b/config/build_alcf_polaris_Clang.sh
@@ -57,12 +57,16 @@ if [[ $name == *"_MP"* ]]; then
   CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON"
 fi
 
+if [[ $name == *"offload"* || $name == *"cuda"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_GPU_ARCHS=sm_80"
+fi
+
 if [[ $name == *"offload"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DUSE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_80"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON"
 fi
 
 if [[ $name == *"cuda"* ]]; then
-  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=80"
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON"
 fi
 
 folder=build_${Machine}_${Compiler}_${name}
diff --git a/config/build_nersc_perlmutter_Clang.sh b/config/build_nersc_perlmutter_Clang.sh
new file mode 100755
index 0000000000..595155a22c
--- /dev/null
+++ b/config/build_nersc_perlmutter_Clang.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# This recipe is intended for NERSC Perlmutter https://docs.nersc.gov/systems/perlmutter
+# It builds all the varaints of QMCPACK in the current directory
+# last revision: Aug 12th 2023
+#
+# How to invoke this script?
+# build_alcf_polaris_Clang.sh # build all the variants assuming the current directory is the source directory.
+# build_alcf_polaris_Clang.sh <source_dir> # build all the variants with a given source directory <source_dir>
+# build_alcf_polaris_Clang.sh <source_dir> <install_dir> # build all the variants with a given source directory <source_dir> and install to <install_dir>
+
+module load PrgEnv-gnu
+module load cray-libsci
+CRAY_LIBSCI_LIB=$CRAY_LIBSCI_PREFIX_DIR/lib/libsci_gnu_mp.so
+
+module load PrgEnv-llvm/0.1 llvm/16
+module load cray-fftw/3.3.10.3
+module load cray-hdf5-parallel/1.12.2.3
+module load cmake/3.24.3
+
+
+echo "**********************************"
+echo '$ clang -v'
+clang -v
+echo "**********************************"
+
+TYPE=Release
+Machine=perlmutter
+Compiler=Clang16
+
+if [[ $# -eq 0 ]]; then
+  source_folder=`pwd`
+elif [[ $# -eq 1 ]]; then
+  source_folder=$1
+else
+  source_folder=$1
+  install_folder=$2
+fi
+
+if [[ -f $source_folder/CMakeLists.txt ]]; then
+  echo Using QMCPACK source directory $source_folder
+else
+  echo "Source directory $source_folder doesn't contain CMakeLists.txt. Pass QMCPACK source directory as the first argument."
+  exit
+fi
+
+for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \
+            cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx
+do
+
+CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DBLAS_LIBRARIES=$CRAY_LIBSCI_LIB"
+
+if [[ $name == *"cplx"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON"
+fi
+
+if [[ $name == *"_MP"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON"
+fi
+
+if [[ $name == *"offload"* || $name == *"cuda"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_GPU_ARCHS=sm_80"
+fi
+
+if [[ $name == *"offload"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON"
+fi
+
+if [[ $name == *"cuda"* ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON"
+fi
+
+folder=build_${Machine}_${Compiler}_${name}
+
+if [[ -v install_folder ]]; then
+  CMAKE_FLAGS="$CMAKE_FLAGS -DCMAKE_INSTALL_PREFIX=$install_folder/$folder"
+fi
+
+echo "**********************************"
+echo "$folder"
+echo "$CMAKE_FLAGS"
+echo "**********************************"
+
+mkdir $folder
+cd $folder
+
+if [ ! -f CMakeCache.txt ] ; then
+cmake $CMAKE_FLAGS -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx $source_folder
+fi
+
+if [[ -v install_folder ]]; then
+  make -j16 install && chmod -R -w $install_folder/$folder
+else
+  make -j16
+fi
+
+cd ..
+
+echo
+done

From 3aeb289191f5a211a66a941e1febbffb9436387b Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Sun, 13 Aug 2023 14:27:38 -0400
Subject: [PATCH 30/65] Fix comments

---
 config/build_nersc_perlmutter_Clang.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/build_nersc_perlmutter_Clang.sh b/config/build_nersc_perlmutter_Clang.sh
index 595155a22c..cea2d37c12 100755
--- a/config/build_nersc_perlmutter_Clang.sh
+++ b/config/build_nersc_perlmutter_Clang.sh
@@ -4,9 +4,9 @@
 # last revision: Aug 12th 2023
 #
 # How to invoke this script?
-# build_alcf_polaris_Clang.sh # build all the variants assuming the current directory is the source directory.
-# build_alcf_polaris_Clang.sh <source_dir> # build all the variants with a given source directory <source_dir>
-# build_alcf_polaris_Clang.sh <source_dir> <install_dir> # build all the variants with a given source directory <source_dir> and install to <install_dir>
+# build_nersc_perlmutter_Clang.sh # build all the variants assuming the current directory is the source directory.
+# build_nersc_perlmutter_Clang.sh <source_dir> # build all the variants with a given source directory <source_dir>
+# build_nersc_perlmutter_Clang.sh <source_dir> <install_dir> # build all the variants with a given source directory <source_dir> and install to <install_dir>
 
 module load PrgEnv-gnu
 module load cray-libsci

From 146e863d3bc61b5ea78be8cdf5d8211a9fe27c69 Mon Sep 17 00:00:00 2001
From: Gani Annaberdiyev <annaberdiyev@gmail.com>
Date: Mon, 14 Aug 2023 12:24:29 -0700
Subject: [PATCH 31/65] fix gpu jobs for perlmutter

---
 nexus/lib/machines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nexus/lib/machines.py b/nexus/lib/machines.py
index 4983d47a77..2f04ffffb8 100644
--- a/nexus/lib/machines.py
+++ b/nexus/lib/machines.py
@@ -2314,7 +2314,7 @@ def write_job_header(self,job):
 echo $SLURM_SUBMIT_DIR
 cd $SLURM_SUBMIT_DIR
 '''
-        if job.threads>1:
+        if (job.threads>1) and ('cpu' in job.constraint):
             c+='''
 export OMP_PROC_BIND=true
 export OMP_PLACES=threads

From 02b0696ed4a3a0eaf9fee4b6aebdd4d5113fc759 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 17 Aug 2023 13:11:32 -0600
Subject: [PATCH 32/65] timer for applyRotation

---
 src/QMCWaveFunctions/RotatedSPOs.cpp | 12 ++++++++++--
 src/QMCWaveFunctions/RotatedSPOs.h   |  3 +++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp
index ad68f59a72..1b74679de6 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOs.cpp
@@ -20,7 +20,12 @@
 namespace qmcplusplus
 {
 RotatedSPOs::RotatedSPOs(const std::string& my_name, std::unique_ptr<SPOSet>&& spos)
-    : SPOSet(my_name), OptimizableObject(my_name), Phi(std::move(spos)), nel_major_(0), params_supplied(false)
+    : SPOSet(my_name),
+      OptimizableObject(my_name),
+      Phi(std::move(spos)),
+      nel_major_(0),
+      params_supplied(false),
+      rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine))
 {
   OrbitalSetSize = Phi->getOrbitalSetSize();
 }
@@ -408,7 +413,10 @@ void RotatedSPOs::apply_rotation(const std::vector<RealType>& param, bool use_st
     Finally, apply unitary matrix to orbs.
   */
   exponentiate_antisym_matrix(rot_mat);
-  Phi->applyRotation(rot_mat, use_stored_copy);
+  {
+    ScopedTimer local(rotation_timer_);
+    Phi->applyRotation(rot_mat, use_stored_copy);
+  }
 }
 
 void RotatedSPOs::applyDeltaRotation(const std::vector<RealType>& delta_param,
diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h
index b4f1b0e908..f1d89220bd 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.h
+++ b/src/QMCWaveFunctions/RotatedSPOs.h
@@ -380,6 +380,9 @@ class RotatedSPOs : public SPOSet, public OptimizableObject
   /// Full set of rotation matrix parameters for use in global rotation method
   opt_variables_type myVarsFull;
 
+  /// timer for apply_rotation
+  NewTimer& rotation_timer_;
+
   /// List of previously applied parameters
   std::vector<std::vector<RealType>> history_params_;
 

From 8f0d7fc5039e8ee3acb79cfa2153ec08c71a5136 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 17 Aug 2023 13:27:29 -0600
Subject: [PATCH 33/65] add timer to delta rotation

---
 src/QMCWaveFunctions/RotatedSPOs.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp
index 1b74679de6..e17bae15e0 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOs.cpp
@@ -427,7 +427,10 @@ void RotatedSPOs::applyDeltaRotation(const std::vector<RealType>& delta_param,
   ValueMatrix new_rot_mat(nmo, nmo);
   constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
 
-  Phi->applyRotation(new_rot_mat, true);
+  {
+    ScopedTimer local(rotation_timer_);
+    Phi->applyRotation(new_rot_mat, true);
+  }
 }
 
 void RotatedSPOs::constructDeltaRotation(const std::vector<RealType>& delta_param,

From 134896bc2cb6c0dc432ada7ffd4477fba9a6e9cb Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 17 Aug 2023 20:19:54 -0400
Subject: [PATCH 34/65] add mw_ APIs to RotatedSPO (#4701)

* add mw_ APIs to rotatedSPO

* adding unit test

* add reasourceAPIs

* remove unused header in test

* fix typo

* fix typo in test

* add .get() to test

* remove .get() from evaluateValue

* extract underlying SPOs as RefVectorWithLeader

* typo

* fix -> to .

* address review

* more reviews

* forgot a std::move
---
 src/QMCWaveFunctions/RotatedSPOs.cpp          | 116 ++++++++++++++++++
 src/QMCWaveFunctions/RotatedSPOs.h            |  60 +++++++++
 .../tests/test_RotatedSPOs.cpp                | 116 ++++++++++++++++++
 3 files changed, 292 insertions(+)

diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp
index ad68f59a72..e0465f24aa 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOs.cpp
@@ -1596,5 +1596,121 @@ std::unique_ptr<SPOSet> RotatedSPOs::makeClone() const
   return myclone;
 }
 
+void RotatedSPOs::mw_evaluateDetRatios(const RefVectorWithLeader<SPOSet>& spo_list,
+                                       const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                                       const RefVector<ValueVector>& psi_list,
+                                       const std::vector<const ValueType*>& invRow_ptr_list,
+                                       std::vector<std::vector<ValueType>>& ratios_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateDetRatios(phi_list, vp_list, psi_list, invRow_ptr_list, ratios_list);
+}
+
+void RotatedSPOs::mw_evaluateValue(const RefVectorWithLeader<SPOSet>& spo_list,
+                                   const RefVectorWithLeader<ParticleSet>& P_list,
+                                   int iat,
+                                   const RefVector<ValueVector>& psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateValue(phi_list, P_list, iat, psi_v_list);
+}
+
+void RotatedSPOs::mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& spo_list,
+                                 const RefVectorWithLeader<ParticleSet>& P_list,
+                                 int iat,
+                                 const RefVector<ValueVector>& psi_v_list,
+                                 const RefVector<GradVector>& dpsi_v_list,
+                                 const RefVector<ValueVector>& d2psi_v_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGL(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list);
+}
+
+void RotatedSPOs::mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSet>& spo_list,
+                                         const RefVectorWithLeader<ParticleSet>& P_list,
+                                         int iat,
+                                         const RefVector<ValueVector>& psi_v_list,
+                                         const RefVector<GradVector>& dpsi_v_list,
+                                         const RefVector<ValueVector>& d2psi_v_list,
+                                         OffloadMatrix<ComplexType>& mw_dspin) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLWithSpin(phi_list, P_list, iat, psi_v_list, dpsi_v_list, d2psi_v_list, mw_dspin);
+}
+
+void RotatedSPOs::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list,
+                                                 const RefVectorWithLeader<ParticleSet>& P_list,
+                                                 int iat,
+                                                 const std::vector<const ValueType*>& invRow_ptr_list,
+                                                 OffloadMWVGLArray& phi_vgl_v,
+                                                 std::vector<ValueType>& ratios,
+                                                 std::vector<GradType>& grads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGrads(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads);
+}
+
+void RotatedSPOs::mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSet>& spo_list,
+                                                         const RefVectorWithLeader<ParticleSet>& P_list,
+                                                         int iat,
+                                                         const std::vector<const ValueType*>& invRow_ptr_list,
+                                                         OffloadMWVGLArray& phi_vgl_v,
+                                                         std::vector<ValueType>& ratios,
+                                                         std::vector<GradType>& grads,
+                                                         std::vector<ValueType>& spingrads) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluateVGLandDetRatioGradsWithSpin(phi_list, P_list, iat, invRow_ptr_list, phi_vgl_v, ratios, grads,
+                                                spingrads);
+}
+
+void RotatedSPOs::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_list,
+                                          const RefVectorWithLeader<ParticleSet>& P_list,
+                                          int first,
+                                          int last,
+                                          const RefVector<ValueMatrix>& logdet_list,
+                                          const RefVector<GradMatrix>& dlogdet_list,
+                                          const RefVector<ValueMatrix>& d2logdet_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.mw_evaluate_notranspose(phi_list, P_list, first, last, logdet_list, dlogdet_list, d2logdet_list);
+}
+
+void RotatedSPOs::createResource(ResourceCollection& collection) const { Phi->createResource(collection); }
+
+void RotatedSPOs::acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.acquireResource(collection, phi_list);
+}
+
+void RotatedSPOs::releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const
+{
+  auto phi_list = extractPhiRefList(spo_list);
+  auto& leader  = phi_list.getLeader();
+  leader.releaseResource(collection, phi_list);
+}
+
+RefVectorWithLeader<SPOSet> RotatedSPOs::extractPhiRefList(const RefVectorWithLeader<SPOSet>& spo_list)
+{
+  auto& spo_leader = spo_list.getCastedLeader<RotatedSPOs>();
+  const auto nw    = spo_list.size();
+  RefVectorWithLeader<SPOSet> phi_list(*spo_leader.Phi);
+  phi_list.reserve(nw);
+  for (int iw = 0; iw < nw; iw++)
+  {
+    RotatedSPOs& rot = spo_list.getCastedElement<RotatedSPOs>(iw);
+    phi_list.emplace_back(*rot.Phi);
+  }
+  return phi_list;
+}
 
 } // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h
index b4f1b0e908..5a9f6cb1c7 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.h
+++ b/src/QMCWaveFunctions/RotatedSPOs.h
@@ -371,6 +371,63 @@ class RotatedSPOs : public SPOSet, public OptimizableObject
   /// Use history list (false) or global rotation (true)
   void set_use_global_rotation(bool use_global_rotation) { use_global_rot_ = use_global_rotation; }
 
+  void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSet>& spo_list,
+                            const RefVectorWithLeader<const VirtualParticleSet>& vp_list,
+                            const RefVector<ValueVector>& psi_list,
+                            const std::vector<const ValueType*>& invRow_ptr_list,
+                            std::vector<std::vector<ValueType>>& ratios_list) const override;
+
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSet>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override;
+
+  void mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& spo_list,
+                      const RefVectorWithLeader<ParticleSet>& P_list,
+                      int iat,
+                      const RefVector<ValueVector>& psi_v_list,
+                      const RefVector<GradVector>& dpsi_v_list,
+                      const RefVector<ValueVector>& d2psi_v_list) const override;
+
+  void mw_evaluateVGLWithSpin(const RefVectorWithLeader<SPOSet>& spo_list,
+                              const RefVectorWithLeader<ParticleSet>& P_list,
+                              int iat,
+                              const RefVector<ValueVector>& psi_v_list,
+                              const RefVector<GradVector>& dpsi_v_list,
+                              const RefVector<ValueVector>& d2psi_v_list,
+                              OffloadMatrix<ComplexType>& mw_dspin) const override;
+
+  void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list,
+                                      const RefVectorWithLeader<ParticleSet>& P_list,
+                                      int iat,
+                                      const std::vector<const ValueType*>& invRow_ptr_list,
+                                      OffloadMWVGLArray& phi_vgl_v,
+                                      std::vector<ValueType>& ratios,
+                                      std::vector<GradType>& grads) const override;
+
+  void mw_evaluateVGLandDetRatioGradsWithSpin(const RefVectorWithLeader<SPOSet>& spo_list,
+                                              const RefVectorWithLeader<ParticleSet>& P_list,
+                                              int iat,
+                                              const std::vector<const ValueType*>& invRow_ptr_list,
+                                              OffloadMWVGLArray& phi_vgl_v,
+                                              std::vector<ValueType>& ratios,
+                                              std::vector<GradType>& grads,
+                                              std::vector<ValueType>& spingrads) const override;
+
+  void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_list,
+                               const RefVectorWithLeader<ParticleSet>& P_list,
+                               int first,
+                               int last,
+                               const RefVector<ValueMatrix>& logdet_list,
+                               const RefVector<GradMatrix>& dlogdet_list,
+                               const RefVector<ValueMatrix>& d2logdet_list) const override;
+
+  void createResource(ResourceCollection& collection) const override;
+
+  void acquireResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override;
+
+  void releaseResource(ResourceCollection& collection, const RefVectorWithLeader<SPOSet>& spo_list) const override;
+
 private:
   /// true if SPO parameters (orbital rotation parameters) have been supplied by input
   bool params_supplied;
@@ -383,6 +440,8 @@ class RotatedSPOs : public SPOSet, public OptimizableObject
   /// List of previously applied parameters
   std::vector<std::vector<RealType>> history_params_;
 
+  static RefVectorWithLeader<SPOSet> extractPhiRefList(const RefVectorWithLeader<SPOSet>& spo_list);
+
   /// Use global rotation or history list
   bool use_global_rot_ = true;
 
@@ -390,6 +449,7 @@ class RotatedSPOs : public SPOSet, public OptimizableObject
   friend std::vector<std::vector<RealType>>& testing::getHistoryParams(RotatedSPOs& rot);
 };
 
+
 } //namespace qmcplusplus
 
 #endif
diff --git a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
index 1c009357ee..39e35c9c70 100644
--- a/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/tests/test_RotatedSPOs.cpp
@@ -22,6 +22,7 @@
 #include "QMCWaveFunctions/RotatedSPOs.h"
 #include "checkMatrix.hpp"
 #include "FakeSPO.h"
+#include <ResourceCollection.h>
 
 #include <stdio.h>
 #include <string>
@@ -749,4 +750,119 @@ TEST_CASE("RotatedSPOs read and write parameters history", "[wavefunction]")
   REQUIRE(hist[0].size() == 4);
 }
 
+class DummySPOSetWithoutMW : public SPOSet
+{
+public:
+  DummySPOSetWithoutMW(const std::string& my_name) : SPOSet(my_name) {}
+  void setOrbitalSetSize(int norbs) override {}
+  void evaluateValue(const ParticleSet& P, int iat, SPOSet::ValueVector& psi) override
+  {
+    assert(psi.size() == 3);
+    psi[0] = 123;
+    psi[1] = 456;
+    psi[2] = 789;
+  }
+  void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override {}
+  void evaluate_notranspose(const ParticleSet& P,
+                            int first,
+                            int last,
+                            ValueMatrix& logdet,
+                            GradMatrix& dlogdet,
+                            ValueMatrix& d2logdet) override
+  {}
+  std::string getClassName() const override { return my_name_; }
+};
+
+class DummySPOSetWithMW : public DummySPOSetWithoutMW
+{
+public:
+  DummySPOSetWithMW(const std::string& my_name) : DummySPOSetWithoutMW(my_name) {}
+  void mw_evaluateValue(const RefVectorWithLeader<SPOSet>& spo_list,
+                        const RefVectorWithLeader<ParticleSet>& P_list,
+                        int iat,
+                        const RefVector<ValueVector>& psi_v_list) const override
+  {
+    for (auto& psi : psi_v_list)
+    {
+      assert(psi.get().size() == 3);
+      psi.get()[0] = 321;
+      psi.get()[1] = 654;
+      psi.get()[2] = 987;
+    }
+  }
+};
+
+TEST_CASE("RotatedSPOs mw_ APIs", "[wavefunction]")
+{
+  //checking that mw_ API works in RotatedSPOs and is not defaulting to
+  //SPOSet default implementation
+  {
+    //First check calling the mw_ APIs for RotatedSPOs, for which the
+    //underlying implementation just calls the underlying SPOSet mw_ API
+    //In the case that the underlying SPOSet doesn't specialize the mw_ API,
+    //the underlying SPOSet will fall back to the default SPOSet mw_, which is
+    //just a loop over the single walker API.
+    RotatedSPOs rot_spo0("rotated0", std::make_unique<DummySPOSetWithoutMW>("no mw 0"));
+    RotatedSPOs rot_spo1("rotated1", std::make_unique<DummySPOSetWithoutMW>("no mw 1"));
+    RefVectorWithLeader<SPOSet> spo_list(rot_spo0, {rot_spo0, rot_spo1});
+
+    ResourceCollection spo_res("test_rot_res");
+    rot_spo0.createResource(spo_res);
+    ResourceCollectionTeamLock<SPOSet> mw_sposet_lock(spo_res, spo_list);
+
+    const SimulationCell simulation_cell;
+    ParticleSet elec0(simulation_cell);
+    ParticleSet elec1(simulation_cell);
+    RefVectorWithLeader<ParticleSet> p_list(elec0, {elec0, elec1});
+
+    SPOSet::ValueVector psi0(3);
+    SPOSet::ValueVector psi1(3);
+    RefVector<SPOSet::ValueVector> psi_v_list{psi0, psi1};
+
+    rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
+    for (int iw = 0; iw < spo_list.size(); iw++)
+    {
+      CHECK(psi_v_list[iw].get()[0] == Approx(123));
+      CHECK(psi_v_list[iw].get()[1] == Approx(456));
+      CHECK(psi_v_list[iw].get()[2] == Approx(789));
+    }
+  }
+  {
+    //In the case that the underlying SPOSet DOES have mw_ specializations,
+    //we want to make sure that RotatedSPOs are triggering that appropriately
+    //This will mean that the underlying SPOSets will do the appropriate offloading
+    //To check this, DummySPOSetWithMW has an explicit mw_evaluateValue which sets
+    //different values than what gets set in evaluateValue. By doing this,
+    //we are ensuring that RotatedSPOs->mw_evaluaeValue is calling the specialization
+    //in the underlying SPO and not using the default SPOSet implementation which
+    //loops over single walker APIs (which have different values enforced in
+    // DummySPOSetWithoutMW
+
+    RotatedSPOs rot_spo0("rotated0", std::make_unique<DummySPOSetWithMW>("mw 0"));
+    RotatedSPOs rot_spo1("rotated1", std::make_unique<DummySPOSetWithMW>("mw 1"));
+    RefVectorWithLeader<SPOSet> spo_list(rot_spo0, {rot_spo0, rot_spo1});
+
+    ResourceCollection spo_res("test_rot_res");
+    rot_spo0.createResource(spo_res);
+    ResourceCollectionTeamLock<SPOSet> mw_sposet_lock(spo_res, spo_list);
+
+    const SimulationCell simulation_cell;
+    ParticleSet elec0(simulation_cell);
+    ParticleSet elec1(simulation_cell);
+    RefVectorWithLeader<ParticleSet> p_list(elec0, {elec0, elec1});
+
+    SPOSet::ValueVector psi0(3);
+    SPOSet::ValueVector psi1(3);
+    RefVector<SPOSet::ValueVector> psi_v_list{psi0, psi1};
+
+    rot_spo0.mw_evaluateValue(spo_list, p_list, 0, psi_v_list);
+    for (int iw = 0; iw < spo_list.size(); iw++)
+    {
+      CHECK(psi_v_list[iw].get()[0] == Approx(321));
+      CHECK(psi_v_list[iw].get()[1] == Approx(654));
+      CHECK(psi_v_list[iw].get()[2] == Approx(987));
+    }
+  }
+}
+
 } // namespace qmcplusplus

From 91a018a0d08e406b2ccdd70e5c132fae16476878 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Fri, 18 Aug 2023 12:55:29 -0400
Subject: [PATCH 35/65] Increase version number

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 558d5eaf19..3bdf6834a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
 ######################################################################
 project(
   qmcpack
-  VERSION 3.16.9
+  VERSION 3.17.0
   LANGUAGES C CXX)
 
 # add the automatically determined parts of the RPATH

From 18804d643eea76d3b8e4835cf9131923508364b9 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Fri, 18 Aug 2023 13:35:30 -0400
Subject: [PATCH 36/65] Update changelog

---
 CHANGELOG.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b32a9dad3e..882a6b9e76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,9 +2,61 @@
 
 Notable changes to QMCPACK are documented in this file.
 
-## [Unreleased]
+## [3.17.0] - 2023-08-18
+
+This is a recommended release for all users. Thanks to everyone who contributed directly, reported an issue, or suggested an
+improvement. There are many quality of life improvements, bug fixes throughout the application, and updates to the associated
+testing. As previously announced, the legacy CUDA support (QMC_CUDA=1) is removed in this version. For GPU support, users should
+transition to the offload code which is more capable and fully usable in production on NVIDIA GPUs.
+
+This version is intended for long-term support of v3 of QMCPACK. Development effort is now focused towards v4. Contributions of
+tests, fixes, and features from users and developers are still welcome to v3 for a potential future release. However, these will not
+be ported towards v4 by the core QMCPACK developers without prior arrangement. Please discuss options with QMCPACK developers.
+
+* Simplified checkpointing and enabled it in the batched drivers. Users now only need specify checkpoint={-1,0,N} to checkpoint
+  between blocks. [#4646](https://github.com/QMCPACK/qmcpack/pull/4646)
+* NERSC Perlmutter build recipe. [#4698](https://github.com/QMCPACK/qmcpack/pull/4698)
+* qmc-fit: Now supports parameter fitting with jackknife for e.g. DFT+U, EXX scans
+  [#4475](https://github.com/QMCPACK/qmcpack/pull/4475) and for equation of states and morse fits
+  [#4518](https://github.com/QMCPACK/qmcpack/pull/4518)
+* Improved error checking including NaN checks to protect against potentially unreliable compilers and libraries,
+  [#4697](https://github.com/QMCPACK/qmcpack/pull/4697), and checks on GPU matrix inversion
+  [#4693](https://github.com/QMCPACK/qmcpack/pull/4693)
+* Significant advances in orbital optimization capability, focusing on LCAO wavefunctions. Development is ongoing for
+  multideterminant support and for spline wavefunctions. See e.g. the Be atom orbital optimization test
+  [#4626](https://github.com/QMCPACK/qmcpack/pull/4626), [#4619](https://github.com/QMCPACK/qmcpack/pull/4619), reading and writing
+  of orbital rotation parameters [#4580](https://github.com/QMCPACK/qmcpack/pull/4580), support for disabled/frozen parameters
+  [#4581](https://github.com/QMCPACK/qmcpack/pull/4581). 
+* Magnetization Density Estimator for non-collinear wavefunctions [#4531](https://github.com/QMCPACK/qmcpack/pull/4531)
+* The legacy CUDA implementation, the version built with QMC_CUDA=1, has been removed from the codebase,
+  [#4431](https://github.com/QMCPACK/qmcpack/pull/4431),
+  [#4632](https://github.com/QMCPACK/qmcpack/pull/4632),[#4499](https://github.com/QMCPACK/qmcpack/pull/4499),
+  [#4442](https://github.com/QMCPACK/qmcpack/pull/4442).
+* For increased performance with current AMD GPU support, new QMC_DISABLE_HIP_HOST_REGISTER option is enabled by default for
+  ROCm/HIP builds. [#4674](https://github.com/QMCPACK/qmcpack/pull/4674)
+* Bugfix: J1Spin indexing was wrong [#4612](https://github.com/QMCPACK/qmcpack/pull/4612)
+* Bugfix: 1RDM estimator data written to stat.h5 was incorrect [#4568](https://github.com/QMCPACK/qmcpack/pull/4568)
+* Introduced ENABLE_PPCONVERT option and skip ppconvert compilation when cross compiling. [#4601](https://github.com/QMCPACK/qmcpack/pull/4601)
+* Faster builds compared to v3.16.0 due to code refactoring
+* Many refinements throughout the codebase, cleanup, improved testing.
 
-The legacy CUDA implementation, the version built with QMC_CUDA=1, has been removed from the codebase.
+### NEXUS
+
+* Nexus: Equilibration detection algorithm is now deterministic [#4557](https://github.com/QMCPACK/qmcpack/pull/4557)
+* Nexus: Support for Kagayaki cluster at JAIST [#4598](https://github.com/QMCPACK/qmcpack/pull/4598)
+* Nexus: GPU support fix for NERSC/Perlmutter [#4699](https://github.com/QMCPACK/qmcpack/pull/4699)
+* Nexus: Use simplices in convex_hull to support newer scipy versions [#4671](https://github.com/QMCPACK/qmcpack/pull/4671)
+* Nexus: Add pdos flag for Projwfc [#4655](https://github.com/QMCPACK/qmcpack/pull/4655)
+* Nexus: Adding crowds_serialize_walkers tag to dmc input list [#4651](https://github.com/QMCPACK/qmcpack/pull/4651)
+* Nexus: Qdens handles batched driver input/output [#4645](https://github.com/QMCPACK/qmcpack/pull/4645)
+* Nexus: Fix namelist read for Projwfc input [#4644](https://github.com/QMCPACK/qmcpack/pull/4644)
+
+### Known problems
+
+* When offload builds are compiled with CUDA toolkit versions above 11.2 using LLVM, multideterminant tests and functionality will
+  fail, seemingly due to an issue with the toolkit. This is discussed in https://github.com/llvm/llvm-project/issues/54633 . All
+  other functionality appears to work as expected. As a workaround, the CUDA toolkit 11.2 can be used. The actual NVIDIA drivers can
+  be more recent.
 
 ## [3.16.0] - 2023-01-31
 

From af3ac23570f80d6ce4491864c0eeb1717ee3e8f4 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Fri, 18 Aug 2023 15:12:53 -0400
Subject: [PATCH 37/65] Add link

Co-authored-by: Steven Hahn <hahnse@ornl.gov>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 882a6b9e76..6e67ddcd64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,7 +37,7 @@ be ported towards v4 by the core QMCPACK developers without prior arrangement. P
 * Bugfix: J1Spin indexing was wrong [#4612](https://github.com/QMCPACK/qmcpack/pull/4612)
 * Bugfix: 1RDM estimator data written to stat.h5 was incorrect [#4568](https://github.com/QMCPACK/qmcpack/pull/4568)
 * Introduced ENABLE_PPCONVERT option and skip ppconvert compilation when cross compiling. [#4601](https://github.com/QMCPACK/qmcpack/pull/4601)
-* Faster builds compared to v3.16.0 due to code refactoring
+* Faster builds compared to v3.16.0 due to code refactoring [#4682](https://github.com/QMCPACK/qmcpack/pull/4682)
 * Many refinements throughout the codebase, cleanup, improved testing.
 
 ### NEXUS

From cde07c61252238f835e6ae403e5d3051333f0f73 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Fri, 18 Aug 2023 15:55:05 -0400
Subject: [PATCH 38/65] Mention regularizer

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e67ddcd64..e12e6bc7c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ be ported towards v4 by the core QMCPACK developers without prior arrangement. P
   of orbital rotation parameters [#4580](https://github.com/QMCPACK/qmcpack/pull/4580), support for disabled/frozen parameters
   [#4581](https://github.com/QMCPACK/qmcpack/pull/4581). 
 * Magnetization Density Estimator for non-collinear wavefunctions [#4531](https://github.com/QMCPACK/qmcpack/pull/4531)
+* Pathak-Wagner regularizer for forces [#4477](https://github.com/QMCPACK/qmcpack/pull/4477)
 * The legacy CUDA implementation, the version built with QMC_CUDA=1, has been removed from the codebase,
   [#4431](https://github.com/QMCPACK/qmcpack/pull/4431),
   [#4632](https://github.com/QMCPACK/qmcpack/pull/4632),[#4499](https://github.com/QMCPACK/qmcpack/pull/4499),

From 08cb9a454c5f1a8527de75bf905061986156f6c3 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Fri, 18 Aug 2023 18:39:21 -0400
Subject: [PATCH 39/65] Set development version

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3bdf6834a1..23a8b5f19b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
 ######################################################################
 project(
   qmcpack
-  VERSION 3.17.0
+  VERSION 3.17.9
   LANGUAGES C CXX)
 
 # add the automatically determined parts of the RPATH

From d292b1d091dabdd979c859cd453d9ff107e68848 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Mon, 21 Aug 2023 16:38:59 -0600
Subject: [PATCH 40/65] change name of timer

---
 src/QMCWaveFunctions/RotatedSPOs.cpp | 6 +++---
 src/QMCWaveFunctions/RotatedSPOs.h   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/QMCWaveFunctions/RotatedSPOs.cpp b/src/QMCWaveFunctions/RotatedSPOs.cpp
index e17bae15e0..8da7b77d28 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.cpp
+++ b/src/QMCWaveFunctions/RotatedSPOs.cpp
@@ -25,7 +25,7 @@ RotatedSPOs::RotatedSPOs(const std::string& my_name, std::unique_ptr<SPOSet>&& s
       Phi(std::move(spos)),
       nel_major_(0),
       params_supplied(false),
-      rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine))
+      apply_rotation_timer_(createGlobalTimer("RotatedSPOs::apply_rotation", timer_level_fine))
 {
   OrbitalSetSize = Phi->getOrbitalSetSize();
 }
@@ -414,7 +414,7 @@ void RotatedSPOs::apply_rotation(const std::vector<RealType>& param, bool use_st
   */
   exponentiate_antisym_matrix(rot_mat);
   {
-    ScopedTimer local(rotation_timer_);
+    ScopedTimer local(apply_rotation_timer_);
     Phi->applyRotation(rot_mat, use_stored_copy);
   }
 }
@@ -428,7 +428,7 @@ void RotatedSPOs::applyDeltaRotation(const std::vector<RealType>& delta_param,
   constructDeltaRotation(delta_param, old_param, m_act_rot_inds, m_full_rot_inds, new_param, new_rot_mat);
 
   {
-    ScopedTimer local(rotation_timer_);
+    ScopedTimer local(apply_rotation_timer_);
     Phi->applyRotation(new_rot_mat, true);
   }
 }
diff --git a/src/QMCWaveFunctions/RotatedSPOs.h b/src/QMCWaveFunctions/RotatedSPOs.h
index f1d89220bd..5e9ea75122 100644
--- a/src/QMCWaveFunctions/RotatedSPOs.h
+++ b/src/QMCWaveFunctions/RotatedSPOs.h
@@ -381,7 +381,7 @@ class RotatedSPOs : public SPOSet, public OptimizableObject
   opt_variables_type myVarsFull;
 
   /// timer for apply_rotation
-  NewTimer& rotation_timer_;
+  NewTimer& apply_rotation_timer_;
 
   /// List of previously applied parameters
   std::vector<std::vector<RealType>> history_params_;

From 332ad014db7d6d2e120b142083c0b3c1168e767a Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Tue, 22 Aug 2023 16:57:14 -0600
Subject: [PATCH 41/65] initial attempt at BLAS implementation, commented out
 since it doesn't actually pass the test

---
 src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp | 9 ++++++++-
 src/QMCWaveFunctions/BsplineFactory/SplineR2R.h   | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index ebe548dd25..918784cd35 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -17,6 +17,7 @@
 #include "SplineR2R.h"
 #include "spline2/MultiBsplineEval.hpp"
 #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp"
+#include "Platforms/CPU/BLAS.hpp"
 
 namespace qmcplusplus
 {
@@ -56,7 +57,7 @@ void SplineR2R<ST>::storeParamsBeforeRotation()
 {
   const auto spline_ptr     = SplineInst->getSplinePtr();
   const auto coefs_tot_size = spline_ptr->coefs_size;
-  coef_copy_                = std::make_shared<std::vector<RealType>>(coefs_tot_size);
+  coef_copy_                = std::make_shared<std::vector<ST>>(coefs_tot_size);
 
   std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
 }
@@ -135,6 +136,12 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
       spl_coefs[cur_elem] = newval;
     }
   }
+  
+  //std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
+  //for (auto i = 0; i < OrbitalSetSize; i++)
+  //  for (auto j = 0; j < OrbitalSetSize; j++)
+  //     rot_mat_padded[i * Nsplines + j] = rot_mat[i][j];
+  //BLAS::gemm('N', 'N', BasisSetSize, Nsplines, Nsplines, ST(1.0), (*coef_copy_).data(), BasisSetSize, rot_mat_padded.data(), Nsplines, ST(0.0), spl_coefs, BasisSetSize);
 }
 
 
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h
index a3ac0f919d..3de6fc33fc 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.h
@@ -59,7 +59,7 @@ class SplineR2R : public BsplineSet
   std::shared_ptr<MultiBspline<ST>> SplineInst;
 
   ///Copy of original splines for orbital rotation
-  std::shared_ptr<std::vector<RealType>> coef_copy_;
+  std::shared_ptr<std::vector<ST>> coef_copy_;
 
   ///thread private ratios for reduction when using nested threading, numVP x numThread
   Matrix<TT> ratios_private;

From 231d4f7915b0a7d7c02f2d18b669745e85ba9597 Mon Sep 17 00:00:00 2001
From: Jaron Krogel <krogeljt@ornl.gov>
Date: Wed, 23 Aug 2023 17:03:51 -0400
Subject: [PATCH 42/65] nexus: add spinor inputs

---
 nexus/lib/qmcpack_input.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/nexus/lib/qmcpack_input.py b/nexus/lib/qmcpack_input.py
index a11044969d..6f401765db 100644
--- a/nexus/lib/qmcpack_input.py
+++ b/nexus/lib/qmcpack_input.py
@@ -1810,10 +1810,10 @@ class simulationcell(QIxml):
 #end class simulationcell
 
 class particleset(QIxml):
-    attributes = ['name','size','random','random_source','randomsrc','charge','source']
+    attributes = ['name','size','random','random_source','randomsrc','charge','source','spinor']
     elements   = ['group','simulationcell']
     attribs    = ['ionid','position']
-    write_types= obj(random=yesno)
+    write_types= obj(random=yesno,spinor=yesno)
     identifier = 'name'
 #end class particleset
 
@@ -2319,7 +2319,7 @@ class dm1b(QIxml): # legacy
     tag         = 'estimator'
     identifier  = 'type'
     attributes  = ['type','name','reuse']#reuse is a temporary dummy keyword
-    parameters  = ['energy_matrix','basis_size','integrator','points','scale','basis','evaluator','center','check_overlap','check_derivatives','acceptance_ratio','rstats','normalized','volume_normed']
+    parameters  = ['energy_matrix','basis_size','integrator','points','scale','basis','evaluator','center','check_overlap','check_derivatives','acceptance_ratio','rstats','normalized','volume_normed','samples']
     write_types = obj(energy_matrix=yesno,check_overlap=yesno,check_derivatives=yesno,acceptance_ratio=yesno,rstats=yesno,normalized=yesno,volume_normed=yesno)
 #end class dm1b
 
@@ -2327,7 +2327,7 @@ class onebodydensitymatrices(QIxml): # batched
     tag         = 'estimator'
     identifier  = 'type'
     attributes  = ['type','name','reuse']#reuse is a temporary dummy keyword
-    parameters  = ['energy_matrix','basis_size','integrator','points','scale','basis','evaluator','center','check_overlap','check_derivatives','acceptance_ratio','rstats','normalized','volume_normed']
+    parameters  = ['energy_matrix','basis_size','integrator','points','scale','basis','evaluator','center','check_overlap','check_derivatives','acceptance_ratio','rstats','normalized','volume_normed','samples']
     write_types = obj(energy_matrix=yesno,check_overlap=yesno,check_derivatives=yesno,acceptance_ratio=yesno,rstats=yesno,normalized=yesno,volume_normed=yesno)
 #end class onebodydensitymatrices
 
@@ -2536,6 +2536,7 @@ class vmc(QIxml):
                   'blocks','steps','substeps','timestep','maxcpusecs','rewind',
                   'storeconfigs','checkproperties','recordconfigs','current',
                   'stepsbetweensamples','samplesperthread','samples','usedrift',
+                  'spinmass',
                   'walkers','nonlocalpp','tau','walkersperthread','reconfiguration', # legacy - batched
                   'dmcwalkersperthread','current','ratio','firststep',
                   'minimumtargetwalkers','max_seconds']
@@ -2558,6 +2559,7 @@ class dmc(QIxml):
                   'stepsbetweensamples','samplesperthread','samples','reconfiguration',
                   'nonlocalmoves','maxage','alpha','gamma','reserve','use_nonblocking',
                   'branching_cutoff_scheme','feedback','sigmabound',
+                  'spinmass',
                   'walkers','nonlocalmove','pop_control','targetwalkers',               # legacy - batched
                   'minimumtargetwalkers','energybound','feedback','recordwalkers',
                   'fastgrad','popcontrol','branchinterval','usedrift','storeconfigs',
@@ -2812,6 +2814,7 @@ class gen(QIxml):
     l2_diffusion     = 'L2_diffusion',
     maxage           = 'MaxAge',
     sigmabound       = 'sigmaBound',
+    spinmass         = 'spinMass',
     )
 # afqmc names
 Names.set_afqmc_expanded_names(

From 047b9016bbbc3aff1d3f619ba3840b7b917d3866 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Wed, 23 Aug 2023 17:42:55 -0600
Subject: [PATCH 43/65] got blas call working. row major vs. column major was
 the problem

---
 .../BsplineFactory/SplineR2R.cpp              | 25 ++++---------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index 918784cd35..9863aefdb9 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -121,27 +121,12 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
   }
 
-  // Apply rotation the dumb way b/c I can't get BLAS::gemm to work...
-  for (auto i = 0; i < BasisSetSize; i++)
-  {
+  std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
+  for (auto i = 0; i < OrbitalSetSize; i++)
     for (auto j = 0; j < OrbitalSetSize; j++)
-    {
-      const auto cur_elem = Nsplines * i + j;
-      auto newval{0.};
-      for (auto k = 0; k < OrbitalSetSize; k++)
-      {
-        const auto index = i * Nsplines + k;
-        newval += (*coef_copy_)[index] * rot_mat[k][j];
-      }
-      spl_coefs[cur_elem] = newval;
-    }
-  }
-  
-  //std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
-  //for (auto i = 0; i < OrbitalSetSize; i++)
-  //  for (auto j = 0; j < OrbitalSetSize; j++)
-  //     rot_mat_padded[i * Nsplines + j] = rot_mat[i][j];
-  //BLAS::gemm('N', 'N', BasisSetSize, Nsplines, Nsplines, ST(1.0), (*coef_copy_).data(), BasisSetSize, rot_mat_padded.data(), Nsplines, ST(0.0), spl_coefs, BasisSetSize);
+       rot_mat_padded[i * Nsplines + j] = rot_mat.data()[i * OrbitalSetSize + j];
+  BLAS::gemm('N', 'N', Nsplines, BasisSetSize, Nsplines, ST(1.0), rot_mat_padded.data(), Nsplines, (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
+
 }
 
 

From a1e64c2b5bf420f8219e4cab6ed46947ced14d55 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Wed, 23 Aug 2023 21:51:20 -0400
Subject: [PATCH 44/65] Update HDF5 detection

---
 CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 23a8b5f19b..29a0252979 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -658,9 +658,14 @@ else()
   set(HDF5_USE_STATIC_LIBRARIES off)
 endif()
 
-find_package(HDF5 1.10 COMPONENTS C)
+find_package(HDF5 COMPONENTS C)
 
 if(HDF5_FOUND)
+  if(HDF5_VERSION)
+    if (HDF5_VERSION VERSION_LESS 1.10.0)
+      message(FATAL_ERROR "QMCPACK requires HDF5 version >= 1.10.0")
+    endif()
+  endif(HDF5_VERSION)
   if(HDF5_IS_PARALLEL)
     if(HAVE_MPI)
       message(STATUS "Parallel HDF5 library found")

From 44eee493f2fec226719bc7f68a6e4b9ca9581783 Mon Sep 17 00:00:00 2001
From: "Rohr, Stephan" <stephan.rohr@intel.com>
Date: Thu, 24 Aug 2023 04:28:55 -0700
Subject: [PATCH 45/65] Disable Estimator tests if BUILD_UNIT_TESTS is set to
 OFF.

---
 src/Estimators/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Estimators/CMakeLists.txt b/src/Estimators/CMakeLists.txt
index 9f76ac4cf5..8e30d88811 100644
--- a/src/Estimators/CMakeLists.txt
+++ b/src/Estimators/CMakeLists.txt
@@ -47,4 +47,6 @@ endif()
 target_include_directories(qmcestimators PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 target_link_libraries(qmcestimators PUBLIC containers qmcham qmcparticle qmcutil)
 
-add_subdirectory(tests)
+if(BUILD_UNIT_TESTS)
+  add_subdirectory(tests)
+endif()

From 53bee1aae202a556a13222c583a4b59db7170f83 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 10:28:26 -0400
Subject: [PATCH 46/65] Add comment

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29a0252979..271631d012 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -658,7 +658,8 @@ else()
   set(HDF5_USE_STATIC_LIBRARIES off)
 endif()
 
-find_package(HDF5 COMPONENTS C)
+find_package(HDF5 COMPONENTS C) # Note: minimum version check is done below to bypass find_package
+                                # and HDF5 version compatibility subtleties
 
 if(HDF5_FOUND)
   if(HDF5_VERSION)

From 610dfe3f84788cd267d8256dec5a49c0958ac531 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 14:48:43 -0400
Subject: [PATCH 47/65] Update changelog and version

---
 CHANGELOG.md   | 11 +++++++++++
 CMakeLists.txt |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e12e6bc7c4..f14a24d137 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,17 @@
 
 Notable changes to QMCPACK are documented in this file.
 
+## [3.17.1] - 2023-08-25
+
+This minor release is recommended for all users and include a couple of build fixes and a NEXUS improvement.
+
+* Improved HDF5 detection. Fixes cases where HDF5 was not identified by CMake, including on FreeBSD (thanks @yurivict for the report). [#4708](https://github.com/QMCPACK/qmcpack/pull/4708)
+
+* Fix for building with BUILD_UNIT_TESTS=OFF [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)
+
+### NEXUS
+
+* NEXUS: Support for spinor inputs [#4707](https://github.com/QMCPACK/qmcpack/pull/4707)
 ## [3.17.0] - 2023-08-18
 
 This is a recommended release for all users. Thanks to everyone who contributed directly, reported an issue, or suggested an
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 271631d012..cc6225cbc8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
 ######################################################################
 project(
   qmcpack
-  VERSION 3.17.9
+  VERSION 3.17.1
   LANGUAGES C CXX)
 
 # add the automatically determined parts of the RPATH

From 6fabd5de111f16d9255f5711f6b91f2db334217c Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 14:49:34 -0400
Subject: [PATCH 48/65] Punctuation

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f14a24d137..d9bdd2b2bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,11 +8,11 @@ This minor release is recommended for all users and include a couple of build fi
 
 * Improved HDF5 detection. Fixes cases where HDF5 was not identified by CMake, including on FreeBSD (thanks @yurivict for the report). [#4708](https://github.com/QMCPACK/qmcpack/pull/4708)
 
-* Fix for building with BUILD_UNIT_TESTS=OFF [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)
+* Fix for building with BUILD_UNIT_TESTS=OFF. [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)
 
 ### NEXUS
 
-* NEXUS: Support for spinor inputs [#4707](https://github.com/QMCPACK/qmcpack/pull/4707)
+* NEXUS: Support for spinor inputs. [#4707](https://github.com/QMCPACK/qmcpack/pull/4707)
 ## [3.17.0] - 2023-08-18
 
 This is a recommended release for all users. Thanks to everyone who contributed directly, reported an issue, or suggested an

From 823f8a6d2d8c5cc26f8eb403286826d08b0ea610 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 14:52:56 -0400
Subject: [PATCH 49/65] Timer

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9bdd2b2bd..1cdd8e3aa4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ This minor release is recommended for all users and include a couple of build fi
 * Improved HDF5 detection. Fixes cases where HDF5 was not identified by CMake, including on FreeBSD (thanks @yurivict for the report). [#4708](https://github.com/QMCPACK/qmcpack/pull/4708)
 
 * Fix for building with BUILD_UNIT_TESTS=OFF. [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)
+* Add timer for orbital rotations. [#4706](https://github.com/QMCPACK/qmcpack/pull/4706)
 
 ### NEXUS
 

From c38b9d916eb6fe1185ecb846a9f3da0a9cfba31a Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 14:55:30 -0400
Subject: [PATCH 50/65] No line

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1cdd8e3aa4..d27f462986 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,6 @@ Notable changes to QMCPACK are documented in this file.
 This minor release is recommended for all users and include a couple of build fixes and a NEXUS improvement.
 
 * Improved HDF5 detection. Fixes cases where HDF5 was not identified by CMake, including on FreeBSD (thanks @yurivict for the report). [#4708](https://github.com/QMCPACK/qmcpack/pull/4708)
-
 * Fix for building with BUILD_UNIT_TESTS=OFF. [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)
 * Add timer for orbital rotations. [#4706](https://github.com/QMCPACK/qmcpack/pull/4706)
 

From 53f111e9ecbf98db70daef3c7a57c3024045bdc1 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 24 Aug 2023 13:26:01 -0600
Subject: [PATCH 51/65] make split in implementation depending on float vs.
 double

---
 .../BsplineFactory/SplineR2R.cpp              | 31 ++++++++++++++++---
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index 9863aefdb9..cf29232e51 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -121,11 +121,32 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
   }
 
-  std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
-  for (auto i = 0; i < OrbitalSetSize; i++)
-    for (auto j = 0; j < OrbitalSetSize; j++)
-       rot_mat_padded[i * Nsplines + j] = rot_mat.data()[i * OrbitalSetSize + j];
-  BLAS::gemm('N', 'N', Nsplines, BasisSetSize, Nsplines, ST(1.0), rot_mat_padded.data(), Nsplines, (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
+  
+  if constexpr (std::is_same_v<ST, ValueType>)
+  {
+    //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster
+    std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
+    for (auto i = 0; i < OrbitalSetSize; i++)
+      for (auto j = 0; j < OrbitalSetSize; j++)
+         rot_mat_padded[i * Nsplines + j] = rot_mat.data()[i * OrbitalSetSize + j];
+    BLAS::gemm('N', 'N', Nsplines, BasisSetSize, Nsplines, ST(1.0), rot_mat_padded.data(), Nsplines, (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
+  }
+  else
+  {
+    //Here, ST is float but ValueType is double for R2R. Due to issues with type conversions, just doing naive matrix multiplication in this case to not lose precision on rot_mat
+    for (auto i = 0; i < BasisSetSize; i++)
+      for (auto j = 0; j < OrbitalSetSize; j++)
+      {
+        const auto cur_elem = Nsplines * i + j;
+        auto newval{0.};
+        for (auto k = 0; k < OrbitalSetSize; k++)
+        {
+          const auto index = i * Nsplines + k;
+          newval += (*coef_copy_)[index] * rot_mat[k][j];
+        }
+        spl_coefs[cur_elem] = newval;
+      }
+  }
 
 }
 

From d8705ed893dddecedbc22fc49e07ee55df3a6f70 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Thu, 24 Aug 2023 16:12:12 -0400
Subject: [PATCH 52/65] Typo fix

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d27f462986..39f2e5f0a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ Notable changes to QMCPACK are documented in this file.
 
 ## [3.17.1] - 2023-08-25
 
-This minor release is recommended for all users and include a couple of build fixes and a NEXUS improvement.
+This minor release is recommended for all users and includes a couple of build fixes and a NEXUS improvement.
 
 * Improved HDF5 detection. Fixes cases where HDF5 was not identified by CMake, including on FreeBSD (thanks @yurivict for the report). [#4708](https://github.com/QMCPACK/qmcpack/pull/4708)
 * Fix for building with BUILD_UNIT_TESTS=OFF. [#4709](https://github.com/QMCPACK/qmcpack/pull/4709)

From c86e331c5250a97616ae443068bfa660c2c2026b Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 24 Aug 2023 14:27:53 -0600
Subject: [PATCH 53/65] removed padded rot_mat

---
 src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index cf29232e51..9c55f27f04 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -121,15 +121,12 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
   }
 
-  
+
   if constexpr (std::is_same_v<ST, ValueType>)
   {
     //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster
-    std::vector<ST> rot_mat_padded(Nsplines * Nsplines, 0);
-    for (auto i = 0; i < OrbitalSetSize; i++)
-      for (auto j = 0; j < OrbitalSetSize; j++)
-         rot_mat_padded[i * Nsplines + j] = rot_mat.data()[i * OrbitalSetSize + j];
-    BLAS::gemm('N', 'N', Nsplines, BasisSetSize, Nsplines, ST(1.0), rot_mat_padded.data(), Nsplines, (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
+    BLAS::gemm('N', 'N', OrbitalSetSize, BasisSetSize, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize,
+               (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
   }
   else
   {
@@ -147,7 +144,6 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
         spl_coefs[cur_elem] = newval;
       }
   }
-
 }
 
 

From 85b18c591bca1daf167e363ddd45d6973dff75ab Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 24 Aug 2023 16:50:30 -0600
Subject: [PATCH 54/65] add blas for SplineC2C applyRotation

---
 .../BsplineFactory/SplineC2C.cpp              | 54 ++++++++++++-------
 .../BsplineFactory/SplineC2C.h                |  2 +-
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
index 7f4b1d1bd5..dfa33f8da9 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
@@ -17,6 +17,7 @@
 #include "spline2/MultiBsplineEval.hpp"
 #include "QMCWaveFunctions/BsplineFactory/contraction_helper.hpp"
 #include "CPU/math.hpp"
+#include "CPU/BLAS.hpp"
 
 namespace qmcplusplus
 {
@@ -57,7 +58,7 @@ void SplineC2C<ST>::storeParamsBeforeRotation()
 {
   const auto spline_ptr     = SplineInst->getSplinePtr();
   const auto coefs_tot_size = spline_ptr->coefs_size;
-  coef_copy_                = std::make_shared<std::vector<RealType>>(coefs_tot_size);
+  coef_copy_                = std::make_shared<std::vector<ST>>(coefs_tot_size);
 
   std::copy_n(spline_ptr->coefs, coefs_tot_size, coef_copy_->begin());
 }
@@ -120,27 +121,40 @@ void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     std::copy_n(spl_coefs, coefs_tot_size, coef_copy_->begin());
   }
 
-  for (int i = 0; i < basis_set_size; i++)
-    for (int j = 0; j < OrbitalSetSize; j++)
-    {
-      // cur_elem points to the real componend of the coefficient.
-      // Imag component is adjacent in memory.
-      const auto cur_elem = Nsplines * i + 2 * j;
-      ST newval_r{0.};
-      ST newval_i{0.};
-      for (auto k = 0; k < OrbitalSetSize; k++)
+  if constexpr (std::is_same_v<ST, RealType>)
+  {
+    //if ST is double, go ahead and use blas to make things faster
+    //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals.
+    //Also casting them as ValueType so they are complex to do the correct gemm
+    BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize,
+               (ValueType*)(*coef_copy_).data(), Nsplines / 2, ST(0.0), (ValueType*)spl_coefs, Nsplines / 2);
+  }
+  else
+  {
+    // if ST is float, RealType is double and ValueType is std::complex<double> for C2C
+    // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix
+    for (int i = 0; i < basis_set_size; i++)
+      for (int j = 0; j < OrbitalSetSize; j++)
       {
-        const auto index = Nsplines * i + 2 * k;
-        ST zr            = (*coef_copy_)[index];
-        ST zi            = (*coef_copy_)[index + 1];
-        ST wr            = rot_mat[k][j].real();
-        ST wi            = rot_mat[k][j].imag();
-        newval_r += zr * wr - zi * wi;
-        newval_i += zr * wi + zi * wr;
+        // cur_elem points to the real componend of the coefficient.
+        // Imag component is adjacent in memory.
+        const auto cur_elem = Nsplines * i + 2 * j;
+        ST newval_r{0.};
+        ST newval_i{0.};
+        for (auto k = 0; k < OrbitalSetSize; k++)
+        {
+          const auto index = Nsplines * i + 2 * k;
+          ST zr            = (*coef_copy_)[index];
+          ST zi            = (*coef_copy_)[index + 1];
+          ST wr            = rot_mat[k][j].real();
+          ST wi            = rot_mat[k][j].imag();
+          newval_r += zr * wr - zi * wi;
+          newval_i += zr * wi + zi * wr;
+        }
+        spl_coefs[cur_elem]     = newval_r;
+        spl_coefs[cur_elem + 1] = newval_i;
       }
-      spl_coefs[cur_elem]     = newval_r;
-      spl_coefs[cur_elem + 1] = newval_i;
-    }
+  }
 }
 
 template<typename ST>
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h
index af082e0cea..9410e80cfb 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.h
@@ -64,7 +64,7 @@ class SplineC2C : public BsplineSet
   std::shared_ptr<MultiBspline<ST>> SplineInst;
 
   ///Copy of original splines for orbital rotation
-  std::shared_ptr<std::vector<RealType>> coef_copy_;
+  std::shared_ptr<std::vector<ST>> coef_copy_;
 
   vContainer_type mKK;
   VectorSoaContainer<ST, 3> myKcart;

From 3259ddeef6fefba3aabf7bc7ad656fef74b7cbee Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 24 Aug 2023 16:58:34 -0600
Subject: [PATCH 55/65] use valuetype in blas call

---
 src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
index dfa33f8da9..e608c272f3 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
@@ -126,8 +126,8 @@ void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     //if ST is double, go ahead and use blas to make things faster
     //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals.
     //Also casting them as ValueType so they are complex to do the correct gemm
-    BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize,
-               (ValueType*)(*coef_copy_).data(), Nsplines / 2, ST(0.0), (ValueType*)spl_coefs, Nsplines / 2);
+    BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0,0.0), rot_mat.data(), OrbitalSetSize,
+               (ValueType*)(*coef_copy_).data(), Nsplines / 2, ValueType(0.0,0.0), (ValueType*)spl_coefs, Nsplines / 2);
   }
   else
   {

From 2fb7ce3a9119d1638b97dd4fd38aaa02225c168b Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Thu, 24 Aug 2023 16:59:37 -0600
Subject: [PATCH 56/65] clang format

---
 src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
index e608c272f3..403cdaac10 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
@@ -126,8 +126,9 @@ void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     //if ST is double, go ahead and use blas to make things faster
     //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals.
     //Also casting them as ValueType so they are complex to do the correct gemm
-    BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0,0.0), rot_mat.data(), OrbitalSetSize,
-               (ValueType*)(*coef_copy_).data(), Nsplines / 2, ValueType(0.0,0.0), (ValueType*)spl_coefs, Nsplines / 2);
+    BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(),
+               OrbitalSetSize, (ValueType*)(*coef_copy_).data(), Nsplines / 2, ValueType(0.0, 0.0),
+               (ValueType*)spl_coefs, Nsplines / 2);
   }
   else
   {

From d06fd1f81783d5f4aea038284ccc47ce3f7d2a4c Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Fri, 25 Aug 2023 10:17:15 -0400
Subject: [PATCH 57/65] Set development version 3.17.9

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cc6225cbc8..271631d012 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ endif()
 ######################################################################
 project(
   qmcpack
-  VERSION 3.17.1
+  VERSION 3.17.9
   LANGUAGES C CXX)
 
 # add the automatically determined parts of the RPATH

From b1b093af8f0c0f6720be811c6ef8748a337a7f3a Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Fri, 25 Aug 2023 08:37:24 -0600
Subject: [PATCH 58/65] address comments

---
 src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp | 6 +++---
 src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
index 403cdaac10..805e8d2ef2 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
@@ -134,15 +134,15 @@ void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
   {
     // if ST is float, RealType is double and ValueType is std::complex<double> for C2C
     // Just use naive matrix multiplication in order to avoid losing precision on rotation matrix
-    for (int i = 0; i < basis_set_size; i++)
-      for (int j = 0; j < OrbitalSetSize; j++)
+    for (IndexType i = 0; i < basis_set_size; i++)
+      for (IndexType j = 0; j < OrbitalSetSize; j++)
       {
         // cur_elem points to the real componend of the coefficient.
         // Imag component is adjacent in memory.
         const auto cur_elem = Nsplines * i + 2 * j;
         ST newval_r{0.};
         ST newval_i{0.};
-        for (auto k = 0; k < OrbitalSetSize; k++)
+        for (IndexType k = 0; k < OrbitalSetSize; k++)
         {
           const auto index = Nsplines * i + 2 * k;
           ST zr            = (*coef_copy_)[index];
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index 9c55f27f04..1da6aa5023 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -131,12 +131,12 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
   else
   {
     //Here, ST is float but ValueType is double for R2R. Due to issues with type conversions, just doing naive matrix multiplication in this case to not lose precision on rot_mat
-    for (auto i = 0; i < BasisSetSize; i++)
-      for (auto j = 0; j < OrbitalSetSize; j++)
+    for (IndexType i = 0; i < BasisSetSize; i++)
+      for (IndexType j = 0; j < OrbitalSetSize; j++)
       {
         const auto cur_elem = Nsplines * i + j;
-        auto newval{0.};
-        for (auto k = 0; k < OrbitalSetSize; k++)
+        FullPrecValueType newval{0.};
+        for (IndexType k = 0; k < OrbitalSetSize; k++)
         {
           const auto index = i * Nsplines + k;
           newval += (*coef_copy_)[index] * rot_mat[k][j];

From 9a80bec1d2a1e7fc94afdaabc3f4962c39814c0b Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Fri, 25 Aug 2023 14:00:04 -0400
Subject: [PATCH 59/65] Mention backflow optimization removal in CHANGELOG

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 39f2e5f0a3..89782befbe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 Notable changes to QMCPACK are documented in this file.
 
+## [Unreleased]
+
+* Support for backflow optimization has been removed as part of refactoring and cleaning the codebase. QMC runs using backflow
+  wavefunctions are still supported. This feature is expected to eventually be reimplemented in v4. Users needing
+  backflow optimization can use previously released versions of QMCPACK or work towards its reimplementation in the modern code.
+  [#4688](https://github.com/QMCPACK/qmcpack/pull/4688)
+
 ## [3.17.1] - 2023-08-25
 
 This minor release is recommended for all users and includes a couple of build fixes and a NEXUS improvement.

From 91adaa9b714834f2e9c08ffaa36e990cd66c1694 Mon Sep 17 00:00:00 2001
From: Cody Melton <cmelton@sandia.gov>
Date: Mon, 28 Aug 2023 13:45:33 -0600
Subject: [PATCH 60/65] address comments

---
 src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp | 2 +-
 src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
index 805e8d2ef2..8a3cd77c60 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineC2C.cpp
@@ -127,7 +127,7 @@ void SplineC2C<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
     //Note that Nsplines needs to be divided by 2 since spl_coefs and coef_copy_ are stored as reals.
     //Also casting them as ValueType so they are complex to do the correct gemm
     BLAS::gemm('N', 'N', OrbitalSetSize, basis_set_size, OrbitalSetSize, ValueType(1.0, 0.0), rot_mat.data(),
-               OrbitalSetSize, (ValueType*)(*coef_copy_).data(), Nsplines / 2, ValueType(0.0, 0.0),
+               OrbitalSetSize, (ValueType*)coef_copy_->data(), Nsplines / 2, ValueType(0.0, 0.0),
                (ValueType*)spl_coefs, Nsplines / 2);
   }
   else
diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
index 1da6aa5023..5b0fa59ed3 100644
--- a/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
+++ b/src/QMCWaveFunctions/BsplineFactory/SplineR2R.cpp
@@ -122,11 +122,11 @@ void SplineR2R<ST>::applyRotation(const ValueMatrix& rot_mat, bool use_stored_co
   }
 
 
-  if constexpr (std::is_same_v<ST, ValueType>)
+  if constexpr (std::is_same_v<ST, RealType>)
   {
     //Here, ST should be equal to ValueType, which will be double for R2R. Using BLAS to make things faster
     BLAS::gemm('N', 'N', OrbitalSetSize, BasisSetSize, OrbitalSetSize, ST(1.0), rot_mat.data(), OrbitalSetSize,
-               (*coef_copy_).data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
+               coef_copy_->data(), Nsplines, ST(0.0), spl_coefs, Nsplines);
   }
   else
   {

From 6070bfd0d38ba5af27c136d4f4bc86201a9abd3c Mon Sep 17 00:00:00 2001
From: Alfredo Correa <alfredo.correa+work@gmail.com>
Date: Tue, 29 Aug 2023 01:33:40 -0700
Subject: [PATCH 61/65] Update Hamiltonian.hpp

fix type mismatch of multi array
---
 src/AFQMC/Hamiltonians/Hamiltonian.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/AFQMC/Hamiltonians/Hamiltonian.hpp b/src/AFQMC/Hamiltonians/Hamiltonian.hpp
index e1f5f6c63f..e1ea227817 100644
--- a/src/AFQMC/Hamiltonians/Hamiltonian.hpp
+++ b/src/AFQMC/Hamiltonians/Hamiltonian.hpp
@@ -70,7 +70,7 @@ class dummy_Hamiltonian
   boost::multi::array<SPComplexType, 1> halfRotatedHij(WALKER_TYPES type, PsiT_Matrix* Alpha, PsiT_Matrix* Beta)
   {
     throw std::runtime_error("calling visitor on dummy object");
-    return boost::multi::array<ComplexType, 1>(iextensions<1u>{1});
+    return boost::multi::array<SPComplexType, 1>(iextensions<1u>{1});
   }
 
   SpCType_shm_csr_matrix halfRotatedHijkl(WALKER_TYPES type,

From f1b95dc37915e19cbe0f4d3fb76a55caf56e9254 Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Wed, 30 Aug 2023 14:04:33 -0400
Subject: [PATCH 62/65] Update nightly ORNL versions and README.md

---
 README.md                                     | 71 +++++++++----------
 .../nightly_test_scripts/nightly_ornl.sh      | 50 ++++---------
 .../nightly_test_scripts/ornl_setup.sh        | 44 ++++--------
 .../ornl_setup_environments.sh                | 28 ++++----
 .../nightly_test_scripts/ornl_versions.sh     | 36 +++++-----
 5 files changed, 90 insertions(+), 139 deletions(-)

diff --git a/README.md b/README.md
index a94e8489b3..c62cd13901 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,20 @@ particular emphasis is placed on code quality and reproducibility.
 
 # Obtaining and installing QMCPACK
 
- Obtain the latest release from https://github.com/QMCPACK/qmcpack/releases or clone the development source from
- https://github.com/QMCPACK/qmcpack. A full installation guide and steps to perform an initial QMC calculation are given in the
- [extensive online documentation for QMCPACK](https://qmcpack.readthedocs.io/en/develop/index.html).
+Obtain the latest release from https://github.com/QMCPACK/qmcpack/releases or clone the development source from
+https://github.com/QMCPACK/qmcpack. A full installation guide and steps to perform an initial QMC calculation are given in the
+[extensive online documentation for QMCPACK](https://qmcpack.readthedocs.io/en/develop/index.html).
+
+The [CHANGELOG.md](CHANGELOG.md) describes key changes made in each release as well as any major changes to the development version.
+
+# Documentation and support
+
+For more information, consult QMCPACK pages at http://www.qmcpack.org, the manual at
+https://qmcpack.readthedocs.io/en/develop/index.html, or its sources in the docs directory.
+
+If you have trouble using or building QMCPACK, or have questions about its use, please post to the [Google QMCPACK
+group](https://groups.google.com/forum/#!forum/qmcpack), create a GitHub issue at https://github.com/QMCPACK/qmcpack/issues or
+contact a developer.
 
 # Prerequisites
 
@@ -36,36 +47,32 @@ particular emphasis is placed on code quality and reproducibility.
 We aim to support open source compilers and libraries released within two years of each QMCPACK release. Use of software versions
 over two years old may work but is discouraged and untested. Proprietary compilers (Intel, NVHPC) are generally supported over the
 same period but may require use of an exact version. We also aim to support the standard software environments on machines such as
-Summit at OLCF, Theta at ALCF, and Cori at NERSC. Use of the most recently released compilers and library versions is particularly
-encouraged for highest performance and easiest configuration.
+Frontier and Summit at OLCF, Aurora and Polaris at ALCF, and Perlmutter at NERSC. Use of the most recently released compilers and
+library versions is particularly encouraged for highest performance and easiest configuration.
 
-Nightly testing currently includes the following software versions on x86:
+Nightly testing currently includes at least the following software versions:
 
 * Compilers
-  * GCC 11.2.0, 9.2.0
-  * Clang/LLVM 13.0.0
-  * Intel 19.1.1.217 configured to use C++ library from GCC 9.1.0 
-  * NVIDIA HPC SDK 21.5 configured to use C++ library from GCC 9.1.0
-* Boost 1.77.0, 1.68.0
-* HDF5 1.12.1
+  * GCC 13.2.0, 11.4.0
+  * Clang/LLVM 16.0.6
+* Boost 1.83.0, 1.77.0
+* HDF5 1.14.2
 * FFTW 3.3.10, 3.3.8
-* CMake 3.21.1, 3.15.0
+* CMake 3.27.4, 3.21.3
 * MPI
-  * OpenMPI 4.1.1, 3.1.6
-  * Intel MPI 19.1.1.217
-* CUDA 11.4
+  * OpenMPI 4.1.5
+* CUDA 11.2
 
-Workflow tests are performed with Quantum Espresso v6.8.0 and PySCF v1.7.5. These check trial wavefunction generation and
-conversion through to actual QMC runs.
-
-On a developmental basis we also check the latest Clang and GCC development versions, AMD AOMP and Intel OneAPI compilers.
+GitHub Actions-based tests include additional version combinations from within our two year support window. On a developmental basis
+we also check the latest Clang and GCC development versions, AMD Clang and Intel OneAPI compilers. 
 
+Workflow tests are currently performed with Quantum Espresso v7.2.0 and PySCF v2.2.0. These check trial wavefunction generation and
+conversion through to actual QMC runs.
 # Building with CMake
 
- The build system for QMCPACK is based on CMake.  It will auto-configure based on the detected compilers and libraries. Previously
- QMCPACK made extensive use of toolchains, but the system has since been updated to eliminate the use of toolchain files for most
- cases.  Specific compile options can be specified either through specific environment or CMake variables.  When the libraries are
- installed in standard locations, e.g., /usr, /usr/local, there is no need to set environment or CMake variables for the packages.
+ The build system for QMCPACK is based on CMake.  It will auto-configure based on the detected compilers and libraries. Specific
+ compile options can be specified either through specific environment or CMake variables.  When the libraries are installed in
+ standard locations, e.g., /usr, /usr/local, there is no need to set environment or CMake variables for the packages.
 
  See the manual linked at https://qmcpack.readthedocs.io/en/develop/ and https://www.qmcpack.org/documentation or buildable using
  sphinx from the sources in docs/. A PDF version is still available at https://qmcpack.readthedocs.io/_/downloads/en/develop/pdf/
@@ -100,10 +107,10 @@ cmake ..
 make -j 8
 ```
 
- The complexities of modern computer hardware and software systems are
- such that you should check that the auto-configuration system has made
- good choices and picked optimized libraries and compiler settings
- before doing significant production. i.e. Check the details below.
+The complexities of modern computer hardware and software systems are
+such that you should check that the auto-configuration system has made
+good choices and picked optimized libraries and compiler settings
+before doing significant production. i.e. Check the details below. For MPI builds we recommend
 
 ## Set the environment
 
@@ -336,14 +343,6 @@ Individual tests can be run by specifying their name
 ctest -R name-of-test-to-run
 ```
 
-# Documentation and support
-
-For more information, consult QMCPACK pages at http://www.qmcpack.org, the manual at
-https://qmcpack.readthedocs.io/en/develop/index.html, or its sources in the docs directory.
-
-If you have trouble using or building QMCPACK, or have questions about its use, please post to the [Google QMCPACK
-group](https://groups.google.com/forum/#!forum/qmcpack), create a GitHub issue at https://github.com/QMCPACK/qmcpack/issues or
-contact a developer.
 
 # Contributing
 
diff --git a/tests/test_automation/nightly_test_scripts/nightly_ornl.sh b/tests/test_automation/nightly_test_scripts/nightly_ornl.sh
index 5b7b5f860b..e06f116933 100755
--- a/tests/test_automation/nightly_test_scripts/nightly_ornl.sh
+++ b/tests/test_automation/nightly_test_scripts/nightly_ornl.sh
@@ -71,9 +71,9 @@ echo --- Host is $ourhostname
 case "$ourhostname" in
     sulfur )
 	if [[ $jobtype == "nightly" ]]; then
-	    buildsys="clangnewmpi gccnewnompi gccnewmpi gccoldmpi clangoffloadnompi_offloadcuda clangoffloadmpi_offloadcuda clangoffloadmpi_offloadcuda_complex clangnewmpi_complex gccnewnompi_complex gccnewmpi_complex clangnewmpi_mixed gccnewnompi_mixed gccnewmpi_mixed clangnewmpi_mixed_complex gccnewnompi_mixed_complex gccnewmpi_mixed_complex"
+	    buildsys="clangoffloadnompi_offloadcuda clangoffloadmpi_offloadcuda clangoffloadmpi_offloadcuda_complex clangnewmpi gccnewnompi gccnewmpi gccoldmpi clangnewmpi_complex gccnewnompi_complex gccnewmpi_complex clangnewmpi_mixed gccnewnompi_mixed gccnewmpi_mixed clangnewmpi_mixed_complex gccnewnompi_mixed_complex gccnewmpi_mixed_complex clangoffloadmpi_offloadcuda"
 	else
-	    buildsys="clangnewmpi gccnewmpi clangoffloadmpi_offloadcuda clangnewmpi_complex clangnewmpi_mixed clangnewmpi_mixed_complex"
+	    buildsys="clangoffloadmpi_offloadcuda clangnewmpi gccnewmpi clangnewmpi_complex clangnewmpi_mixed clangnewmpi_mixed_complex"
 	fi
 	export QMC_DATA=/scratch/${USER}/QMC_DATA_FULL # Route to directory containing performance test files
 	;;
@@ -115,18 +115,14 @@ esac
 case "$jobtype" in
     weekly )
 	export GLOBALTCFG="-j 48 --timeout 7200 -VV"
-#	export QMC_OPTIONS="-DQMC_PERFORMANCE_NIO_MAX_ATOMS=256"
 	export QMC_OPTIONS="-DQMC_PERFORMANCE_NIO_MAX_ATOMS=256;-DQMC_PERFORMANCE_C_MOLECULE_MAX_ATOMS=64;-DQMC_PERFORMANCE_C_GRAPHITE_MAX_ATOMS=64"
 	export LIMITEDTESTS=""
 	export LESSLIMITEDTESTS=""
 	;;
     nightly )
-#	export GLOBALTCFG="-j 48 --timeout 900 -VV"
 	export GLOBALTCFG="-j 48 --timeout 300 -VV"
-#	export QMC_OPTIONS="-DQMC_PERFORMANCE_NIO_MAX_ATOMS=255;-DQMC_PERFORMANCE_C_MOLECULE_MAX_ATOMS=12;-DQMC_PERFORMANCE_C_GRAPHITE_MAX_ATOMS=16"
 	export QMC_OPTIONS="-DQMC_PERFORMANCE_NIO_MAX_ATOMS=128;-DQMC_PERFORMANCE_C_MOLECULE_MAX_ATOMS=12;-DQMC_PERFORMANCE_C_GRAPHITE_MAX_ATOMS=16"
-        export LIMITEDTESTS="-LE unstable --exclude-regex 'short-.*|long-.*|example.*'"
-#	export LESSLIMITEDTESTS="--exclude-regex 'long-.*'"
+        export LIMITEDTESTS="-E 'short-.*|long-.*|example.*'"
 	export LESSLIMITEDTESTS="-E long"
 	;;
     * )
@@ -260,8 +256,15 @@ case "$sys" in
     clangoffload*mpi*) echo $ourenv
 		;;
     amdclang*) echo $ourenv
-	       export ROCM_PATH=/opt/rocm
-	       export PATH=$PATH:$ROCM_PATH/bin
+	       for rocp in /opt/rocm /opt/rocm-5.6.1 /opt/rocm-5.6.0 /opt/rocm-5.5.1
+	       do
+		   if [ -e $rocp/bin/rocminfo ]; then
+		       echo Found rocminfo under $rocp
+		       export ROCM_PATH=$rocp
+		       break
+		   fi
+	       done
+	       export PATH=$PATH:$ROCM_PATH/bin:$ROCM_PATH/llvm/bin
 		;;
     
     *) echo "Problems: Unknown build environment"
@@ -288,18 +291,6 @@ CTCFG="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DQMC_MPI=1"
 export OMPI_CC=gcc
 export OMPI_CXX=g++
 
-if [[ $sys == *"gccnew"* ]]; then
-# Add QE to any gccnew MPI builds
-# Restrict to gccnew to avoid problems with mismatched libraries, mpi etc.
-CTCFG="$CTCFG -DQE_BIN=${QE_BIN}" 
-fi
-
-if [[ $sys == *"gcclegacycuda"* ]]; then
-# Add QE to any gcclegacycuda MPI builds
-# GCC compilers will be mismatched from QE and QMCPACK builds
-CTCFG="$CTCFG -DQE_BIN=${QE_BIN}" 
-fi
-
 fi
 fi
 
@@ -343,21 +334,6 @@ if [[ $sys == *"clang"* ]]; then
     fi
 fi
 
-#OLD#AOMP (fork of Clang/LLVM)
-#OLDif [[ $sys == *"aomp"* ]]; then
-#OLD    compilerversion=`aompversion|sed 's/-.*//g'`
-#OLD    if [[ $sys == *"nompi"* ]]; then
-#OLD	QMCPACK_TEST_SUBMIT_NAME=AOMP${compilerversion}-Offload-NoMPI
-#OLD	CTCFG="-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DQMC_MPI=0"
-#OLD	QMC_OPTIONS="${QMC_OPTIONS};-DENABLE_OFFLOAD=ON;-DOFFLOAD_TARGET=amdgcn-amd-amdhsa;-DOFFLOAD_ARCH=${amdgpuarch}"
-#OLD    else
-#OLD	QMCPACK_TEST_SUBMIT_NAME=AOMP${compilerversion}-Offload
-#OLD	CTCFG="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DQMC_MPI=1"
-#OLD	QMC_OPTIONS="${QMC_OPTIONS};-DENABLE_OFFLOAD=ON;-DOFFLOAD_TARGET=amdgcn-amd-amdhsa;-DOFFLOAD_ARCH=${amdgpuarch}"
-#OLD	export OMPI_CC=clang
-#OLD	export OMPI_CXX=clang++
-#OLD    fi
-#OLDfi
 
 # Intel
 if [[ $sys == *"intel"* ]]; then
@@ -504,7 +480,7 @@ fi
 
 # Adjust which tests are run to control overall runtime
 case "$sys" in
-    *intel2020*|*gccnew*|*clangnew*|*gcc*legacycuda*|*gcc*cu2hip*) echo "Running full ("less limited") test set for $sys"
+    *intel2020*|*gccnew*|*clangnew*|*gcc*legacycuda*|*gcc*cu2hip*|amdclang*) echo "Running full ("less limited") test set for $sys"
 							     THETESTS=$LESSLIMITEDTESTS
 							     ;;
     *) echo "Running limited test set for $sys"
diff --git a/tests/test_automation/nightly_test_scripts/ornl_setup.sh b/tests/test_automation/nightly_test_scripts/ornl_setup.sh
index c93f623235..bf3b414da5 100755
--- a/tests/test_automation/nightly_test_scripts/ornl_setup.sh
+++ b/tests/test_automation/nightly_test_scripts/ornl_setup.sh
@@ -13,18 +13,6 @@
 #	insteadOf = git://
 
 echo --- START initial setup `date`
-# Bug avoidance 20230404
-if [ -e /opt/rocm-*/bin/rocminfo ]; then
-    echo Spack LLVM16 installation will fail with ROCm present
-    echo Suggest temporarily: sudo chmod og-rx /opt/rocm-*
-    exit 1
-fi
-# Bug avoidance 20200902
-#if [ -e /usr/lib/aomp/bin/clang ]; then
-#    echo AOMP Clang install detected. This will break LLVM install.
-#    echo Suggest temporarily: sudo chmod og-rx /usr/lib/aomp
-#    exit 1
-#fi
 
 here=`pwd`
 if [ -e `dirname "$0"`/ornl_versions.sh ]; then
@@ -194,12 +182,14 @@ cd $HOME/apps/spack
 
 # For reproducibility, use a specific version of Spack
 # Prefer to use tagged releases https://github.com/spack/spack/releases
-git checkout 6edfc070926f7934eda5882de20ac3fb193e310a
-#commit 6edfc070926f7934eda5882de20ac3fb193e310a (HEAD -> develop, origin/develop, origin/HEAD)
-#Author: Alec Scott <hi@alecbcs.com>
-#Date:   Mon Apr 24 12:04:31 2023 -0700
+git checkout 74fba221f1803e25139440d6b8ed6e6a9364b626
+#commit 74fba221f1803e25139440d6b8ed6e6a9364b626 (grafted, HEAD -> develop, origin/develop, origin/HEAD)
+#Author: Massimiliano Culpo <massimiliano.culpo@gmail.com>
+#Date:   Mon Aug 28 16:58:16 2023 +0200
 #
-#    megadock: add v4.1.1 (#37154)
+#    GnuPG: add v2.4.3 (#39654)
+#    
+#    Also updates a few dependencies
 
 echo --- Git version and last log entry
 git log -1
@@ -211,16 +201,10 @@ cd bin
 # Consider using a GCC toolset on Red Hat systems to use
 # recent compilers with better architecture support.
 # e.g. dnf install gcc-toolset-11
-#if [ -e /opt/rh/gcc-toolset-11/root/bin/gcc ]; then
-#    echo --- Added gcc-toolset-11 to path for RHEL provided GCC11 compilers
-#    export PATH=/opt/rh/gcc-toolset-11/root/bin/:$PATH
-#fi
-#else
-#if [ -e /opt/rh/gcc-toolset-10/root/bin/gcc ]; then
-#    echo --- Added gcc-toolset-10 to path for RHEL provided GCC10 compilers
-#    export PATH=/opt/rh/gcc-toolset-10/root/bin/:$PATH
-#fi
-#fi
+if [ -e /opt/rh/gcc-toolset-12/enable ]; then
+    echo --- Using gcc-toolset-12 for newer compilers
+    source /opt/rh/gcc-toolset-12/enable 
+fi
 
 export DISPLAY="" 
 export SPACK_ROOT=$HOME/apps/spack
@@ -302,7 +286,8 @@ spack unload gcc@${gcc_vllvmoffload}
 echo --- llvm@${llvm_voffload} for offload  `date`
 spack install gcc@${gcc_vllvmoffload}
 spack install cuda@${cuda_voffload} +allow-unsupported-compilers
-spack install llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} ~libcxx +compiler-rt ~lldb ~gold ~omp_as_runtime targets=all
+#spack install llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} ~libcxx +compiler-rt ~lldb ~gold ~omp_as_runtime targets=all
+spack install llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} targets=all
 spack load llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload}
 spack compiler find
 spack unload llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload}
@@ -314,6 +299,3 @@ module list
 echo --- End listings
 echo --- FINISH initial setup `date`
 bash $HOME/.cron_jobs/ornl_setup_environments.sh
-
-echo --- REMEMBER REMEMBER
-echo If ROCm installed, sudo chmod og+rx /opt/rocm-*
diff --git a/tests/test_automation/nightly_test_scripts/ornl_setup_environments.sh b/tests/test_automation/nightly_test_scripts/ornl_setup_environments.sh
index 62bd0a6f8e..9e7c8fc474 100755
--- a/tests/test_automation/nightly_test_scripts/ornl_setup_environments.sh
+++ b/tests/test_automation/nightly_test_scripts/ornl_setup_environments.sh
@@ -28,8 +28,6 @@ case "$parallelmode" in
 	exit 1
 	;;
 esac
-#spack module tcl refresh -y
-#spack env loads
 }
 
 here=`pwd`
@@ -80,7 +78,7 @@ spack add python%gcc@${gcc_vnew}
 spack add openmpi@${ompi_vnew}%gcc@${gcc_vnew}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vnew} +fortran +hl +mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vnew} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vnew} threads=openmp
+spack add openblas%gcc@${gcc_vnew} threads=openmp
 #spack add blis%gcc@${gcc_vnew} threads=openmp
 #spack add libflame%gcc@${gcc_vnew} threads=openmp
 
@@ -113,7 +111,7 @@ spack add python%gcc@${gcc_vnew}
 #spack add openmpi@${ompi_vnew}%gcc@${gcc_vnew}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vnew} +fortran +hl ~mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vnew} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vnew} threads=openmp
+spack add openblas%gcc@${gcc_vnew} threads=openmp
 #spack add blis%gcc@${gcc_vnew} threads=openmp
 #spack add libflame%gcc@${gcc_vnew} threads=openmp
 
@@ -143,7 +141,7 @@ spack add python%gcc@${gcc_vold}
 #spack add openmpi@${ompi_vnew}%gcc@${gcc_vold}
 spack add hdf5@${hdf5_vold}%gcc@${gcc_vold} +fortran +hl ~mpi
 spack add fftw@${fftw_vold}%gcc@${gcc_vold} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vold} threads=openmp
+spack add openblas%gcc@${gcc_vold} threads=openmp
 #spack add blis%gcc@${gcc_vold} threads=openmp
 #spack add libflame%gcc@${gcc_vold} threads=openmp
 
@@ -173,7 +171,7 @@ spack add python%gcc@${gcc_vold}
 spack add openmpi@${ompi_vnew}%gcc@${gcc_vold}
 spack add hdf5@${hdf5_vold}%gcc@${gcc_vold} +fortran +hl +mpi
 spack add fftw@${fftw_vold}%gcc@${gcc_vold} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vold} threads=openmp
+spack add openblas%gcc@${gcc_vold} threads=openmp
 #spack add blis%gcc@${gcc_vold} threads=openmp
 #spack add libflame%gcc@${gcc_vold} threads=openmp
 
@@ -204,7 +202,7 @@ spack add python%gcc@${gcc_vnew}
 spack add openmpi@${ompi_vnew}%gcc@${gcc_vnew}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vnew} +fortran +hl +mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vnew} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vnew} threads=openmp
+spack add openblas%gcc@${gcc_vnew} threads=openmp
 #spack add blis%gcc@${gcc_vnew} threads=openmp
 #spack add libflame%gcc@${gcc_vnew} threads=openmp
 
@@ -229,8 +227,7 @@ spack env activate $theenv
 
 spack add gcc@${gcc_vllvmoffload}
 spack add cuda@${cuda_voffload} +allow-unsupported-compilers
-#spack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} +cuda cuda_arch=70 targets=nvptx ^cuda@${cuda_voffload}
-spack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} ~libcxx +compiler-rt ~lldb ~gold ~omp_as_runtime targets=all
+spack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} targets=all
 
 spack add hwloc
 spack add git
@@ -243,7 +240,7 @@ spack add python%gcc@${gcc_vllvmoffload}
 spack add openmpi@${ompi_vnew}%gcc@${gcc_vllvmoffload}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vllvmoffload} +fortran +hl +mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vllvmoffload} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vllvmoffload} threads=openmp
+spack add openblas%gcc@${gcc_vllvmoffload} threads=openmp
 #spack add blis%gcc@${gcc_vllvmoffload} threads=openmp
 #spack add libflame%gcc@${gcc_vllvmoffload} threads=openmp
 
@@ -264,8 +261,7 @@ spack env activate $theenv
 
 spack add gcc@${gcc_vllvmoffload}
 spack add cuda@${cuda_voffload} +allow-unsupported-compilers
-#OLDspack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} +cuda cuda_arch=70 targets=nvptx ^cuda@${cuda_voffload}
-spack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} ~libcxx +compiler-rt ~lldb ~gold ~omp_as_runtime targets=all
+spack add llvm@${llvm_voffload}%gcc@${gcc_vllvmoffload} targets=all
 
 spack add hwloc
 spack add git
@@ -278,7 +274,7 @@ spack add python%gcc@${gcc_vllvmoffload}
 #spack add openmpi@${ompi_vnew}%gcc@${gcc_vllvmoffload}
 spack add hdf5@${hdf5_vold}%gcc@${gcc_vllvmoffload} +fortran +hl ~mpi
 spack add fftw@${fftw_vold}%gcc@${gcc_vllvmoffload} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vllvmoffload} threads=openmp
+spack add openblas%gcc@${gcc_vllvmoffload} threads=openmp
 #spack add blis%gcc@${gcc_vllvmoffload} threads=openmp
 #spack add libflame%gcc@${gcc_vllvmoffload} threads=openmp
 
@@ -310,7 +306,7 @@ spack add python%gcc@${gcc_vnew}
 spack add openmpi@${ompi_vnew}%gcc@${gcc_vnew}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vnew} +fortran +hl +mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vnew} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vnew} threads=openmp
+spack add openblas%gcc@${gcc_vnew} threads=openmp
 #spack add blis%gcc@${gcc_vnew} threads=openmp
 #spack add libflame%gcc@${gcc_vnew} threads=openmp
 
@@ -342,7 +338,7 @@ spack add python%gcc@${gcc_vnew}
 #spack add openmpi@${ompi_vnew}%gcc@${gcc_vnew}
 spack add hdf5@${hdf5_vnew}%gcc@${gcc_vnew} +fortran +hl ~mpi
 spack add fftw@${fftw_vnew}%gcc@${gcc_vnew} -mpi #Avoid MPI for simplicity
-spack add openblas@develop%gcc@${gcc_vnew} threads=openmp
+spack add openblas%gcc@${gcc_vnew} threads=openmp
 #spack add blis%gcc@${gcc_vnew} threads=openmp
 #spack add libflame%gcc@${gcc_vnew} threads=openmp
 
@@ -409,6 +405,7 @@ install_environment
 spack env deactivate
 fi
 
+# CAUTION: Removing build deps reveals which spack packages to not have correct deps specified and may cause breakage
 #echo --- Removing build deps
 #for f in `spack env list`
 #do
@@ -420,7 +417,6 @@ fi
 #    spack env deactivate
 #done
 
-# possibly fully redundant: loads files creation now done in install_environment function
 echo --- Making loads files
 for f in `spack env list`
 do
diff --git a/tests/test_automation/nightly_test_scripts/ornl_versions.sh b/tests/test_automation/nightly_test_scripts/ornl_versions.sh
index e79f978cd3..b2d6d9f39c 100755
--- a/tests/test_automation/nightly_test_scripts/ornl_versions.sh
+++ b/tests/test_automation/nightly_test_scripts/ornl_versions.sh
@@ -4,38 +4,36 @@
 
 # GCC
 # Dates at https://gcc.gnu.org/releases.html
-gcc_vnew=12.2.0 # Released 2022-08-19
-gcc_vold=10.4.0 # Released 2022-06-28
+gcc_vnew=13.2.0 # Released 2023-07-27
+gcc_vold=11.4.0 # Released 2023-05-29
 
-#gcc_vcuda=10.2.0  # Released 2020-07-23 https://docs.nvidia.com/hpc-sdk/hpc-sdk-release-notes/index.html
-#gcc_vcuda=12.1.0
-#gcc_vintel=10.2.0 # Released 2020-07-23 Compiler for C++ library used by Intel compiler
-gcc_vintel=10.4.0
-#gcc_vnvhpc=10.2.0 # Released 2020-07-23 Use makelocalrc to configure NVHPC with this compiler 
-gcc_vnvhpc=12.2.0
+gcc_vcuda=11.4.0 # https://docs.nvidia.com/hpc-sdk/hpc-sdk-release-notes/index.html
+gcc_vintel=11.4.0 # Compiler for C++ library used by Intel compiler
+gcc_vnvhpc=11.4.0 # Use makelocalrc to configure NVHPC with this compiler 
+gcc_vllvmoffload=9.5.0 # Version for LLVM offload builds, should be compatible with CUDA version used
 
 # LLVM 
 # Dates at https://releases.llvm.org/
-llvm_vnew=15.0.2 # Released 2022-10-04
+llvm_vnew=16.0.6 # Released 2023-06-19
+llvm_voffload=16.0.6
+cuda_voffload=11.2.0 # CUDA version for offload builds
 
 # HDF5
 # Dates at https://portal.hdfgroup.org/display/support/Downloads
-#hdf5_vnew=1.13.0 # Released 2021-12-01 # odd versions are development versions
-hdf5_vnew=1.12.2 # Released 2022-04-27
+hdf5_vnew=1.14.2 # Released 2023-08-11
 hdf5_vold=${hdf5_vnew}
 
 # CMake 
 # Dates at https://cmake.org/files/
-cmake_vnew=3.24.2 # Released 2022-09-13
-cmake_vold=3.18.4 # Released 2020-19-06
+cmake_vnew=3.27.4 # Relased 2023-08-23
+cmake_vold=3.21.3 # Release 2021-09-20
 
 # OpenMPI
 # Dates at https://www.open-mpi.org/software/ompi/v4.1/
-ompi_vnew=4.1.4 # Released 2022-05-26
-ompi_vold=3.1.6 # Released 2020-03-18
+ompi_vnew=4.1.5 # Released 2023-02-23
 
 # Libxml2
-libxml2_v=2.9.13 # Released 2022-02-20 See http://xmlsoft.org/sources/
+libxml2_v=2.10.3 # Released 2022-12? See https://gitlab.gnome.org/GNOME/libxml2/-/releases
 
 # FFTW
 # Dates at http://www.fftw.org/release-notes.html
@@ -44,12 +42,12 @@ fftw_vold=3.3.8 # Released 2018-05-28
 
 # BOOST
 # Dates at https://www.boost.org/users/history/
-boost_vnew=1.79.0 # Released 2022-04-13
-boost_vold=1.74.0 # Released 2020-08-14
+boost_vnew=1.83.0 # Released 2023-08-11
+boost_vold=1.77.0 # Released 2021-08-11
 
 # Python
 # Use a single version to reduce dependencies. Ideally the spack prefered version.
-python_version=3.9.13
+python_version=3.10.12
 
 
 

From cdbc1449e3051c837495654288e2ab8dfea2dcbc Mon Sep 17 00:00:00 2001
From: Paul Kent <kentpr@ornl.gov>
Date: Wed, 30 Aug 2023 14:09:20 -0400
Subject: [PATCH 63/65] Newline

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c62cd13901..8d2b0a83df 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@ we also check the latest Clang and GCC development versions, AMD Clang and Intel
 
 Workflow tests are currently performed with Quantum Espresso v7.2.0 and PySCF v2.2.0. These check trial wavefunction generation and
 conversion through to actual QMC runs.
+
 # Building with CMake
 
  The build system for QMCPACK is based on CMake.  It will auto-configure based on the detected compilers and libraries. Specific

From 01e0d455673a7559ac3358f0698d6ce715a2f590 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Thu, 31 Aug 2023 10:05:57 -0400
Subject: [PATCH 64/65] Remove fragment

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8d2b0a83df..35686f518d 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,7 @@ make -j 8
 The complexities of modern computer hardware and software systems are
 such that you should check that the auto-configuration system has made
 good choices and picked optimized libraries and compiler settings
-before doing significant production. i.e. Check the details below. For MPI builds we recommend
+before doing significant production. i.e. Check the details below.
 
 ## Set the environment
 

From 37c99748384d8ec6b693d3afe9a86f01602547d2 Mon Sep 17 00:00:00 2001
From: "Paul R. C. Kent" <kentpr@ornl.gov>
Date: Thu, 31 Aug 2023 10:09:10 -0400
Subject: [PATCH 65/65] Revise

---
 README.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 35686f518d..bc57cd14b9 100644
--- a/README.md
+++ b/README.md
@@ -71,12 +71,11 @@ conversion through to actual QMC runs.
 
 # Building with CMake
 
- The build system for QMCPACK is based on CMake.  It will auto-configure based on the detected compilers and libraries. Specific
- compile options can be specified either through specific environment or CMake variables.  When the libraries are installed in
- standard locations, e.g., /usr, /usr/local, there is no need to set environment or CMake variables for the packages.
+The build system for QMCPACK is based on CMake.  It will auto-configure based on the detected compilers and libraries. When these 
+are installed in standard locations, e.g., /usr, /usr/local, there is no need to set either environment or CMake variables.
 
- See the manual linked at https://qmcpack.readthedocs.io/en/develop/ and https://www.qmcpack.org/documentation or buildable using
- sphinx from the sources in docs/. A PDF version is still available at https://qmcpack.readthedocs.io/_/downloads/en/develop/pdf/
+See the manual linked at https://qmcpack.readthedocs.io/en/develop/ and https://www.qmcpack.org/documentation or buildable using
+sphinx from the sources in docs/. A PDF version is still available at https://qmcpack.readthedocs.io/_/downloads/en/develop/pdf/
 
 ## Quick build