diff --git a/include/macis/asci/determinant_search.hpp b/include/macis/asci/determinant_search.hpp
index 7d66813c..486aa557 100644
--- a/include/macis/asci/determinant_search.hpp
+++ b/include/macis/asci/determinant_search.hpp
@@ -539,12 +539,13 @@ std::vector<wfn_t<N>> asci_search(
   // and keep only the duplicate with positive coefficient.
   keep_only_largest_copy_asci_pairs(asci_pairs);
 
-  asci_pairs.erase(std::partition(asci_pairs.begin(), asci_pairs.end(),
-                                  [](const auto& p) { return p.rv < 0.0; }),
-                   asci_pairs.end());
+  // asci_pairs.erase(std::partition(asci_pairs.begin(), asci_pairs.end(),
+  //                                 [](const auto& p) { return p.rv < 0.0; }),
+  //                  asci_pairs.end());
 
   // Only do top-K on (ndets_max - ncdets) b/c CDETS will be added later
-  const size_t top_k_elements = ndets_max - ncdets;
+  // const size_t top_k_elements = ndets_max - ncdets;
+  const size_t top_k_elements = ndets_max;
 
   auto keep_large_en = clock_type::now();
   duration_type keep_large_dur = keep_large_en - keep_large_st;
@@ -650,7 +651,7 @@ std::vector<wfn_t<N>> asci_search(
                  [](auto x) { return x.state; });
 
   // Insert the CDETS back in
-  new_dets.insert(new_dets.end(), cdets_begin, cdets_end);
+  // new_dets.insert(new_dets.end(), cdets_begin, cdets_end);
   new_dets.shrink_to_fit();
 
   logger->info("  * New Dets Mem = {:.2e} GiB", to_gib(new_dets));
diff --git a/include/macis/asci/grow.hpp b/include/macis/asci/grow.hpp
index f3f4260e..4ea0c32c 100644
--- a/include/macis/asci/grow.hpp
+++ b/include/macis/asci/grow.hpp
@@ -61,7 +61,8 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
     dur_t ai_dur = ai_en - ai_st;
     logger->trace("  * ASCI_ITER_DUR = {:.2e} ms", ai_dur.count());
     if(ndets_new > wfn.size())
-      throw std::runtime_error("Wavefunction didn't grow enough...");
+      logger->info("Wavefunction didn't grow enough...");
+    // throw std::runtime_error("Wavefunction didn't grow enough...");
 
     logger->info(fmt_string, iter++, E, E - E0, wfn.size());
     if(asci_settings.grow_with_rot and
@@ -127,7 +128,7 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
       selected_ci_diag(
           wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol,
           mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol,
-          X_local MACIS_MPI_CODE(, comm));
+          X_local MACIS_MPI_CODE(, comm), false, mcscf_settings.ci_nstates);
 
       if(world_size > 1) {
 #ifdef MACIS_ENABLE_MPI
diff --git a/include/macis/asci/iteration.hpp b/include/macis/asci/iteration.hpp
index aa6e271c..d6d489e2 100644
--- a/include/macis/asci/iteration.hpp
+++ b/include/macis/asci/iteration.hpp
@@ -34,7 +34,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
   auto E = selected_ci_diag<N, index_t>(
       wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol,
       mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol,
-      X_local MACIS_MPI_CODE(, comm));
+      X_local MACIS_MPI_CODE(, comm), false, mcscf_settings.ci_nstates);
 
 #ifdef MACIS_ENABLE_MPI
   auto world_size = comm_size(comm);
@@ -64,6 +64,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
   X = std::move(X_local);  // Serial
 #endif
 
+  if(wfn.size() > 1) reorder_ci_on_coeff(wfn, X);
   return std::make_tuple(E, wfn, X);
 }
 
diff --git a/include/macis/bitset_operations.hpp b/include/macis/bitset_operations.hpp
index e0f5ec27..e11b5f69 100644
--- a/include/macis/bitset_operations.hpp
+++ b/include/macis/bitset_operations.hpp
@@ -7,6 +7,9 @@
  */
 
 #pragma once
+#include <string.h>
+#include <strings.h>
+
 #include <bit>
 #include <cassert>
 #include <climits>
diff --git a/include/macis/gf/bandlan.hpp b/include/macis/gf/bandlan.hpp
new file mode 100644
index 00000000..c3603b13
--- /dev/null
+++ b/include/macis/gf/bandlan.hpp
@@ -0,0 +1,279 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+/**
+ * @file bandlan.h++
+ *
+ * @brief Implements simple Band Lanczos routine.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+#pragma once
+#include <assert.h>
+#include <sys/stat.h>
+
+#include <complex>
+#include <fstream>
+#include <iomanip>
+#include <limits>
+#include <lobpcgxx/lobpcg.hpp>
+#include <map>
+#include <sparsexx/matrix_types/csr_matrix.hpp>
+#include <sparsexx/spblas/pspmbv.hpp>
+#include <sparsexx/spblas/spmbv.hpp>
+#include <utility>
+
+#include "macis/solvers/davidson.hpp"
+
+namespace macis {
+
+/**
+ * @ brief Wrapper for QR decomposition in LAPACK, basically
+ *         calling dgeqrf and dorgqr to evaluate the R and
+ *         Q matrices, which are returned. Here, the input
+ *         matrix has more rows than columns.
+ *
+ * @param[inout] std::vector<double> &Q: On input,
+ *               matrix for which to evaluate the QR decomposition.
+ *               On output, Q-matrix.
+ * @param[out] std::vector<double> &R: On output, R
+ *             matrix in the QR decomposition.
+ * @param[in] int Qrows: Nr. of rows in matrix Q.
+ * @param[in] int Qcols: Nr. of cols in matrix Q.
+ *
+ * @returns bool: Error code from LAPACK routines.
+ *
+ * @author Carlos Mejuto-Zaera
+ * @date 25/04/2022
+ */
+bool QRdecomp(std::vector<double> &Q, std::vector<double> &R, int Qrows,
+              int Qcols);
+
+/**
+ * @ brief Wrapper for QR decomposition in LAPACK, basically
+ *         calling dgeqrf and dorgqr to evaluate the R and
+ *         Q matrices, which are returned. Here, the input
+ *         matrix has more columns than rows.
+ *
+ * @param[inout] std::vector<double> &Q: On input,
+ *               matrix for which to evaluate the QR decomposition.
+ *               On output, Q-matrix.
+ * @param[out] std::vector<double> &R: On output, R
+ *             matrix in the QR decomposition.
+ * @param[in] int Qrows: Nr. of rows in Q.
+ * @param[in] int Qcols: Nr. of cols in Q.
+ *
+ * @returns bool: Error code from LAPACK routines.
+ *
+ * @author Carlos Mejuto-Zaera
+ * @date 25/04/2022
+ */
+bool QRdecomp_tr(std::vector<double> &Q, std::vector<double> &R, int Qrows,
+                 int Qcols);
+
+/**
+ * @brief Wrapper to LAPACK routine to evaluate the eigenvectors
+ *        and eigenvalues of the symmetric matrix mat.
+ *
+ * @param[inout] std::vector<double> &mat: Matrix for
+ *               which to compute the eigenvalues/vectors. Erased
+ *               during computation.
+ * @param[out] std::vector<double> &eigvals: Eigenvalues, sorted from smallest
+ *             to largest.
+ * @param[out] std::vector<double> &eigvecs: Eigenvectors,
+ *             stored as row vectors.
+ * @param[in] int matsize: Nr. of rows/columns of the square matrix mat.
+ *
+ * @returns bool: Error code from LAPACK.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+bool GetEigsys(std::vector<double> &mat, std::vector<double> &eigvals,
+               std::vector<double> &eigvecs, int matsize);
+
+/**
+ * @brief Wrapper to LAPACK routine to evaluate the eigenvectors
+ *        and eigenvalues of the symmetric band matrix mat.
+ *
+ * @param[inout] std::vector<double> &mat: Matrix for
+ *               which to compute the eigenvalues/vectors. Erased
+ *               during computation.
+ * @param[in] int nSupDiag: Nr. of bands.
+ * @param[out] std::vector<double> &eigvals: Eigenvalues, sorted from smallest
+ *             to largest.
+ * @param[out] std::vector<double> &eigvecs: Eigenvectors,
+ *             stored as row vectors.
+ * @param[in] int matsize: Nr. of rows/cols of the square matrix mat.
+ *
+ * @returns bool: Error code from LAPACK.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+bool GetEigsysBand(std::vector<double> &mat, int nSupDiag,
+                   std::vector<double> &eigvals, std::vector<double> &eigvecs,
+                   int matsize);
+
+/**
+ * @brief Perform a band Lanczos calculation on the Hamiltonian operator H,
+ * starting from vectors qs, for at most nLanIts iterations. The resulting
+ * band-diagonal matrix Hamiltonian will be stored in bandH. Note that this
+ * implementation does not account for deflations (i.e., pruning the span of the
+ * qs for linear dependencies in higher powers of H).
+ *
+ * @param[in] const sparseexx::csr_matrix<double, int32_t> &H: Hamiltonian
+ * oprator. Just needs to implement a matrix vector product.
+ * @param[in] std::vector<Cont> &qs: Initial set of vetors to
+ * perform the band Lanczos on. Deleted on exit.
+ * @param[in] std::vector<Cont> &bandH: On exit, band-diagonal
+ * Hamiltonian approximation.
+ * @param[inout] int &nLanIts: Number of Lanczos iterations to perform.
+ * @param[in] double thres: Threshold determining when to ignore beta's for
+ * being too small.
+ * @param[in] bool print: If true, write intermediate results to file.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+template <class Cont, class Functor>
+void BandLan(const Functor &H, std::vector<Cont> &qs, std::vector<Cont> &bandH,
+             int &nLanIts, int nbands, int N, double thres = 1.E-6,
+             bool print = false) {
+  // BAND LANCZOS ROUTINE. TAKES AS INPUT THE HAMILTONIAN H, INITIAL VECTORS qs
+  // AND RETURNS THE BAND HAMILTONIAN bandH. IT PERFORMS nLanIts ITERATIONS,
+  // STOPPING IF THE NORM OF ANY NEW KRYLOV VECTOR IS BELOW thres. IF LANCZOS IS
+  // STOPPED PREMATURELY , nLanIts IS OVERWRITTEN WITH THE ACTUAL NUMBER OF
+  // ITERATIONS! THE qs VECTOR IS ERASED AT THE END OF THE CALCULATION
+  using dbl = std::numeric_limits<double>;
+  bandH.clear();
+  bandH.resize(nLanIts * nLanIts, 0.);
+
+  auto VecNorm = [](const std::vector<Cont> &vec) -> double {
+    return std::sqrt(
+        std::real(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1)));
+  };
+
+  // MAKE SPACE FOR 2 * nbands VECTORS
+  qs.resize(2 * nbands * N);
+  std::vector<Cont> temp(N, 0.);
+  if(print) {
+    for(int i = 0; i < nbands; i++) {
+      std::ofstream ofile("lanvec_" + std::to_string(i + 1) + ".dat",
+                          std::ios::out);
+      ofile.precision(dbl::max_digits10);
+      for(size_t el = 0; el < N; el++)
+        ofile << std::scientific << qs[el + i * N] << std::endl;
+      ofile.close();
+    }
+  }
+  // DICTIONARY TO KEEP THE REAL INDICES OF THE KRYLOV VECTORS
+  // THIS IS NECESSARY IN ORDER TO ONLY STORE 2* nbands OF THEM
+  // AT ANY POINT IN TIME, PLUS ONE SCRATCH VECTOR TO BE DEFINED
+  // INSIDE THE FOR LOOP
+  std::vector<int> true_indx(nLanIts + 1);
+  for(int i = 0; i < nbands; i++) true_indx[i + 1] = i;
+  int next_indx = nbands;
+
+  for(int it = 1; it <= nLanIts; it++) {
+    int band_indx_i =
+        true_indx[it];  // TO WHAT ELEMENT OF THE VECTOR SET DO WE APPLY THIS
+    H.operator_action(1, 1., qs.data() + band_indx_i * N, N, 0., temp.data(),
+                      N);
+    if(print) {
+      std::ofstream ofile("Htimes_lanvec_" + std::to_string(it) + ".dat",
+                          std::ios::out);
+      ofile.precision(dbl::max_digits10);
+      for(size_t el = 0; el < temp.size(); el++)
+        ofile << std::scientific << temp[el] << std::endl;
+      ofile.close();
+    }
+    for(int jt = std::max(1, it - nbands); jt <= std::min(it - 1, nLanIts);
+        jt++) {
+      int band_indx_j = true_indx[jt];
+      blas::axpy(N, -1. * bandH[(it - 1) * nLanIts + jt - 1],
+                 qs.data() + N * band_indx_j, 1, temp.data(), 1);
+    }
+    for(int jt = it; jt <= std::min(it + nbands - 1, nLanIts); jt++) {
+      int band_indx_j = true_indx[jt];
+      bandH[(it - 1) * nLanIts + jt - 1] =
+          blas::dot(N, temp.data(), 1, qs.data() + band_indx_j * N, 1);
+      bandH[(jt - 1) * nLanIts + it - 1] = bandH[(it - 1) * nLanIts + jt - 1];
+      blas::axpy(N, -1. * bandH[(it - 1) * nLanIts + jt - 1],
+                 qs.data() + N * band_indx_j, 1, temp.data(), 1);
+    }
+    if(it + nbands <= nLanIts) {
+      bandH[(it - 1) * nLanIts + it + nbands - 1] = VecNorm(temp);
+      bandH[(it + nbands - 1) * nLanIts + it - 1] =
+          bandH[(it - 1) * nLanIts + it + nbands - 1];
+      true_indx[it + nbands] = next_indx;
+      if(std::abs(bandH[(it - 1) * nLanIts + it + nbands - 1]) < thres) {
+        // std::cout
+        //     << "BAND LANCZOS STOPPED PREMATURELY DUE TO SMALL NORM! NAMELY "
+        //     << bandH[(it - 1) * nLanIts + it + nbands - 1]
+        //     << ", STOPPED AT ITERATION: " << it << std::endl;
+        // nLanIts = it;
+        // bandH.resize(nLanIts * nLanIts);
+        // break;
+        blas::scal(N, 0., qs.data() + true_indx[it + nbands] * N, 1);
+        std::cout << "FOUND A ZERO VECTOR AT POSITION " << next_indx
+                  << std::endl;
+      } else {
+        blas::copy(N, temp.data(), 1, qs.data() + true_indx[it + nbands] * N,
+                   1);
+        blas::scal(N, 1. / bandH[(it - 1) * nLanIts + it + nbands - 1],
+                   qs.data() + true_indx[it + nbands] * N, 1);
+        if(print) {
+          std::ofstream ofile("lanvec_" + std::to_string(it + nbands) + ".dat",
+                              std::ios::out);
+          ofile.precision(dbl::max_digits10);
+          for(size_t el = 0; el < N; el++)
+            ofile << std::scientific << qs[true_indx[it + nbands] * N + el]
+                  << std::endl;
+          ofile.close();
+        }
+      }
+      next_indx = (next_indx + 1 >= 2 * nbands) ? 0 : next_indx + 1;
+    }
+  }
+  qs.clear();
+}
+
+/**
+ * @brief Evaluates the expectation values of the resolvent of Hamiltonian H
+ * along a frequency grid ws with respect to the vectors vecs, using the band
+ * Lanczos algorithm.
+ *
+ * @param[in] const sparsex::dist_sparse_matrix<sparsexx::csr_matrix<double,
+ * int32_t> > &H: Hamiltonian operator.
+ * @param[in] std::vector<double> &vecs: Vectors for which to
+ * compute the resolvent matrix elements in format res[freq.][iorb1 * norbs +
+ * iorb2].
+ * @param[in] std::vector<std::complex<double> > &ws: Frequency grid over which
+ * to evaluate the resolvent.
+ * @param[out] std::vector<std::vector<std::complex<double> > >
+ * &res: On exit, resolvent elements.
+ * @param[in] int nLanIts: Max number of iterations.
+ * @param[in] double E0: Ground state energy, for shifting the resolvent.
+ * @param[in] bool ispart: If true, computes resolvent for particle GF,
+ * otherwise for hole GF.
+ * @param[in] bool print: If true, write intermediate results to file.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+void BandResolvent(
+    const sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t> >
+        &H,
+    std::vector<double> &vecs, const std::vector<std::complex<double> > &ws,
+    std::vector<std::vector<std::complex<double> > > &res, int nLanIts,
+    double E0, bool ispart, int nvecs, int len_vec, bool print = false,
+    bool saveGFmats = false);
+
+}  // namespace macis
diff --git a/include/macis/gf/eigsolver.hpp b/include/macis/gf/eigsolver.hpp
new file mode 100644
index 00000000..295789fe
--- /dev/null
+++ b/include/macis/gf/eigsolver.hpp
@@ -0,0 +1,54 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+/**
+ * @file eigsolver.h++
+ * @brief Wrapper to Lapack routine for diagonalizing
+ *        a tridiagonal, symmetric matrix.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+#pragma once
+#include <Eigen/Core>
+#include <Eigen/Sparse>
+#include <lapack.hh>
+
+namespace macis {
+
+/**
+ * @brief Computes the eigenvalues and eigenvectors of a tridiagonal, symmetric
+ * matrix using Lapack.
+ *
+ * @param [in] const std::vector<double> &alphas: Diagonal of the matrix.
+ * @param [in] const std::vector<double> &betas: Off-diagonal of the matrix.
+ * @param [out] Eigen::VectorXd &eigvals: Eigenvalues.
+ * @param [out] Eigen::MatrixXd &eigvecs: Eigenvectors.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+void Hste_v(const std::vector<double> &alphas, const std::vector<double> &betas,
+            Eigen::VectorXd &eigvals, Eigen::MatrixXd &eigvecs);
+
+/**
+ * @brief Computes the eigenvalues and eigenvectors of a tridiagonal, symmetric
+ * matrix using Lapack.
+ *
+ * @param [in] const std::vector<double> &alphas: Diagonal of the matrix.
+ * @param [in] const std::vector<double> &betas: Off-diagonal of the matrix.
+ * @param [out] Eigen::VectorXd &eigvals: Eigenvalues.
+ * @param [out] Eigen::MatrixXd &eigvecs: Eigenvectors.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+void Hsyev(const Eigen::MatrixXd &H, Eigen::VectorXd &eigvals,
+           Eigen::MatrixXd &eigvecs);
+
+}  // namespace macis
diff --git a/include/macis/gf/gf.hpp b/include/macis/gf/gf.hpp
new file mode 100644
index 00000000..ad2272a7
--- /dev/null
+++ b/include/macis/gf/gf.hpp
@@ -0,0 +1,678 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+/**
+ * @brief Collection of routines to compute Green's functions
+ *        within ED or CI-based approaches. By this, we mean
+ *        any approximated approach where the wave function for
+ *        which we want to compute the Green's function is written
+ *        as a list of determinants.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/04/2022
+ */
+#pragma once
+#include <Eigen/Core>
+#include <Eigen/Dense>
+#include <Eigen/IterativeLinearSolvers>
+#include <Eigen/Sparse>
+#include <Eigen/SparseQR>
+#include <chrono>
+#include <unsupported/Eigen/SparseExtra>
+
+#include "macis/csr_hamiltonian.hpp"
+#include "macis/gf/bandlan.hpp"
+#include "macis/gf/lanczos.hpp"
+#include "macis/hamiltonian_generator.hpp"
+#include "macis/sd_operations.hpp"
+#if __has_include(<boost/sort/pdqsort/pdqsort.hpp>)
+#define MACIS_USE_BOOST_SORT
+#include <boost/sort/pdqsort/pdqsort.hpp>
+#endif
+
+namespace macis {
+
+struct GFSettings {
+  size_t norbs = 0;
+  size_t trunc_size = 0;
+  int tot_SD = 1;
+  double GFseedThres = 1.E-4;
+  double asThres = 1.E-4;
+  bool use_bandLan = true;
+  std::vector<int> GF_orbs_basis = std::vector<int>(0);
+  std::vector<int> GF_orbs_comp = std::vector<int>(0);
+  std::vector<bool> is_up_basis = std::vector<bool>(0);
+  std::vector<bool> is_up_comp = std::vector<bool>(0);
+  int nLanIts = 1000;
+  bool writeGF = false;
+  bool print = false;
+  bool saveGFmats = false;
+  double wmin = -8.;
+  double wmax = 8.;
+  size_t nws = 2001;
+  double eta = 0.1;
+  std::string w_scale = "lin";
+  bool real_g = true;
+  bool beta = 1.;
+};
+
+/**
+ * @brief Generates the frequency grid over which to evaluate the GF,
+ *        details of which are specified in the settings instance. We
+ *        have implemented linear, Matsubara and logarithmic frequency
+ *        grids.
+ *
+ * @param[in] const GFSettings& settings: Settings defining the frequency
+ *            grid.
+ *
+ * @returns std::vector<std::complex<double> >: Vector of complex frequencies
+ *          building the grid.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 09/10/2023
+ */
+std::vector<std::complex<double>> GetGFFreqGrid(const GFSettings &settings);
+
+/**
+ * @brief Gets fermionic sign incurred by inserting an electron of spin
+ *        up in a given orbital to a given determinant.
+ *
+ * @param[in] const std::bitset<nbits> &st: Determinant to which we are adding
+ *            an electron.
+ * @param[in] size_t orb: Index of the orbital where we are adding the electron.
+ *
+ * @returns double: Fermionic sign of the operation.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
+template <size_t nbits>
+inline double GetInsertionUpSign(const std::bitset<nbits> &st, size_t orb) {
+  std::bitset<nbits> mask = full_mask<nbits>(orb);
+  return ((st & mask).count() % 2 == 1 ? -1. : 1.);
+}
+
+/**
+ * @brief Gets fermionic sign incurred by inserting an electron of spin
+ *        down in a given orbital to a given determinant.
+ *
+ * @param[in] const std::bitset<nbits> &st: Determinant to which we are adding
+ *            an electron.
+ * @param[in] size_t orb: Index of the orbital where we are adding the electron.
+ *
+ * @returns double: Fermionic sign of the operation.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
+template <size_t nbits>
+inline double GetInsertionDoSign(const std::bitset<nbits> &st, size_t orb) {
+  std::bitset<nbits> mask = full_mask<nbits>(orb + nbits / 2);
+  return ((st & mask).count() % 2 == 1 ? -1. : 1.);
+}
+
+/**
+ * @brief Routine to compute single diagonal Green's function element.
+ *        Essentially, evaluates the resolvent
+ *        G(w) = <state_0| 1/(freq - H) |state_0>
+ *        using one-band Lanczos and continuous fraction representation.
+ *
+ * @param [in] const VectorXd &state_0: Reference state, for which to evaluate
+ *             the resolvent.
+ * @param [in] const MatOp &H: Matrix operator representing the effective
+ *             Hamiltonian.
+ * @param [in] const std::vector<std::complex<double> > &freqs: Frequency grid
+ * over which to evaluate the resolvent.
+ * @param [inout] std::vector<std::complex<double> > &gf: On successful exit,
+ * green's function along the frequency grid.
+ * @param [in] int nLanIts: Max. number of Lanczos iterations.
+ * @param [in] bool saveABtofile: If true, save the alpha and beta parameters
+ *             from the Lanczos calculation to file.
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
+template <class MatOp>
+void GF_Diag(const Eigen::VectorXd &state_0, const MatOp &H,
+             const std::vector<std::complex<double>> &freqs,
+             std::vector<std::complex<double>> &gf, double E0, bool ispart,
+             int nLanIts = 1000, bool saveABtofile = false,
+             std::string fpref = "") {
+  using dbl = std::numeric_limits<double>;
+  // FIRST, WE HAVE TO COMPUTE THE LANCZOS alphas AND betas
+  double tol = 1.E-6;
+  std::vector<double> alphas, betas;
+
+  Lanczos(state_0, H, nLanIts, alphas, betas, tol);
+
+  int kry_size = 0;
+  for(int i = 1; i < nLanIts; i++) {
+    if(abs(betas[i]) <= tol) break;
+    kry_size++;
+  }
+
+  double sign = ispart ? -1. : 1.;
+
+  if(saveABtofile) {
+    std::ofstream ofile(fpref + "alphas.dat", std::ios::out);
+    ofile.precision(dbl::max_digits10);
+    for(int i = 0; i <= kry_size; i++)
+      ofile << std::scientific << alphas[i] << std::endl;
+    ofile.close();
+    ofile.open(fpref + "betas.dat", std::ios::out);
+    ofile.precision(dbl::max_digits10);
+    for(int i = 0; i <= kry_size; i++)
+      ofile << std::scientific << betas[i] << std::endl;
+    ofile.close();
+  }
+
+  // FINALLY, WE CAN COMPUTE THE GREEN'S FUNCTION
+  gf.clear();
+  gf.resize(freqs.size(), std::complex<double>(0., 0.));
+
+#pragma omp parallel for
+  for(int indx_w = 0; indx_w < freqs.size(); indx_w++) {
+    gf[indx_w] = betas[kry_size] * betas[kry_size] /
+                 (freqs[indx_w] + sign * (alphas[kry_size] - E0));
+    for(int i = kry_size - 1; i >= 0; i--)
+      gf[indx_w] = betas[i] * betas[i] /
+                   (freqs[indx_w] + sign * (alphas[i] - E0) -
+                    gf[indx_w]);  // SINCE I CHOSE betas[0] = normpsi^2
+  }
+}
+
+/**
+ * @brief Routine to build basis to compute the basis for Green's function
+ *        calculation on the state |wfn> considering orbital orb. Templated
+ *        to allow for differente slater determinant implementations (thinking
+ *        beyond ED).
+ *
+ * @tparam class DetType: Slater determiant type. Should implement functions to
+ * check for orbital occupation, returning state bitstring and finding single
+ * excitations accounting for active space structure.
+ * @tparam DetStateType: State bitstring type. Encodes fermionic state in second
+ * quatization, should allow for basic bit operations, and being an index in a
+ * dictionary.
+ *
+ * @param [in] int orb: Orbital index for the Green's function basis.
+ * @param [in] bool sp_up: If true, the orbital is spin up, otherwise it's spin
+ *             down.
+ * @param [in] bool is_part: If true, compute GF basis for particle sector
+ * (particle addition), otherwise compute it for hole sector (particle removal).
+ * @param [in] const VectorXd: Coefficient of the wave function describing the
+ *             state whose Green's function we are computing.
+ * @param [in] const std::vector<DetType> &old_basis: Basis of determinants
+ * describing the wave function wfn.
+ * @param [out] std::vector<DetType> &new_basis: On return, the built basis for
+ * the Green's function calculation.
+ * @param [in] const std::vector<double> &occs: Occupation numbers of all
+ * orbitals. Used to define active spaces.
+ * @param [in] const GFSettings &settings: Includes input parameters, in
+ * particular: o) double asThres: Threshold for the occupation numbers to define
+ * the active space. If 0+asThres <= occ[i] <= 2 - asThres, then the orbital i
+ * is added into the active space. o) double GFseedThreshold: Threshold to
+ * determine from which determinants to get excitations in the basis
+ * construction. o) int tot_SD: Number of layers of single excitations to build
+ * the basis. o) size_t trunc_size: Max. size for the Green's function basis.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
+template <size_t nbits, typename index_t = int32_t>
+void get_GF_basis_AS_1El(int orb, bool sp_up, bool is_part,
+                         const Eigen::VectorXd &wfn,
+                         const std::vector<std::bitset<nbits>> &old_basis,
+                         std::vector<std::bitset<nbits>> &new_basis,
+                         const std::vector<double> &occs,
+                         const GFSettings &settings) {
+  // CARLOS: BUILDS BASIS FOR THE ADD SPACE NEEDED TO DESCRIBE THE DIAGONAL
+  // PARTICLE GF ELEMENT OF ORBITAL orb.
+  using bitset_map =
+      std::map<std::bitset<nbits>, size_t, bitset_less_comparator<nbits>>;
+  using bitset_map_iterator = typename bitset_map::iterator;
+
+  size_t norbs = settings.norbs;
+  size_t trunc_size = settings.trunc_size;
+  int tot_SD = settings.tot_SD;
+  double GFseedThreshold = settings.GFseedThres;
+  double asThres = settings.asThres;
+
+  std::cout << "COMPUTING GF SPACE IN *" << (is_part ? "PARTICLE" : "HOLE")
+            << "* SECTOR FOR ORBITAL " << orb << ", WITH SPIN *"
+            << (sp_up ? "UP" : "DOWN") << "*" << std::endl;
+
+  size_t ndets = old_basis.size();
+  size_t cgf = -1;
+  size_t sporb = sp_up ? orb : orb + nbits / 2;
+  std::bitset<nbits> uni_string;
+  std::vector<std::bitset<nbits>> founddets;
+  founddets.reserve(trunc_size);
+  bitset_map founddet_pos, basedet_pos;
+  bitset_map_iterator it;
+  // ACTIVE SPACE
+  std::vector<uint32_t> as_orbs;
+  for(size_t i = 0; i < occs.size(); i++) {
+    if(occs[i] >= asThres && occs[i] <= (1. - asThres)) as_orbs.push_back(i);
+  }
+  std::cout << "ACTIVE SPACE:   [";
+  for(int iii = 0; iii < as_orbs.size(); iii++)
+    std::cout << as_orbs[iii] << ", ";
+  std::cout << "]" << std::endl;
+
+  // INITIALIZE THE DICTIONARY OF BASE DETERMINANTS
+  for(size_t iii = 0; iii < ndets; iii++) basedet_pos[old_basis[iii]] = iii;
+
+  // LOOP OVER ALL STATES IN THE BASIS AND BUILD THE BASIS
+  for(size_t iii = 0; iii < ndets; iii++) {
+    size_t norb1 = orb;
+    // CHECK WHETHER IT CORRESPONDS TO this GF: a_i^+|wfn> OR a_i|wfn>
+    bool ingf = false;
+    if(is_part && !old_basis[iii][sporb])  // PARTICLE
+      ingf = true;
+    else if(!is_part && old_basis[iii][sporb])  // HOLE
+      ingf = true;
+    if(ingf) {
+      // YES, ADD TO LIST
+      std::bitset<nbits> temp = old_basis[iii];
+      temp.flip(sporb);
+      it = founddet_pos.find(temp);
+      if(it == founddet_pos.end()) {
+        cgf++;
+        founddet_pos[temp] = cgf;
+        founddets.push_back(temp);
+      }
+    }
+  }
+
+  // IF NO STATE FOUND, THIS BASIS IS EMPTY
+  if(cgf + 1 == 0) {
+    new_basis.resize(cgf + 1);
+    return;
+  }
+  // NOW, ADD SINGLE-DOUBLES. THIS AMOUNTS TO ADDING THE
+  // SINGLE EXCITATIONS ON TOP OF FOUNDDET
+  std::cout << "BEFORE ADDING SINGLE-DOUBLES, cgf = " << cgf << std::endl;
+  size_t orig_cgf = cgf;
+
+  std::cout << "Nr. OF STATES: " << cgf + 1 << std::endl;
+
+  size_t norb = orb;
+  Eigen::VectorXd b = Eigen::VectorXd::Zero(cgf + 1);
+  // COMPUTE VECTOR b IN THE NEW BASIS
+  for(size_t ndet = 0; ndet < cgf + 1; ndet++) {
+    // CHECK, CAN ndet COME FROM ai^+|GS> / ai|wfn> WITH THE ORBITAL *orb?
+    bool fromwfn = false;
+    if(is_part && founddets[ndet][sporb])
+      fromwfn = true;
+    else if(!is_part && !founddets[ndet][sporb])
+      fromwfn = true;
+    if(fromwfn) {
+      // YES, CHECK WHETHER THE GENERATING STATE COMES FROM THE BASE DETERMINANT
+      // SPACE
+      std::bitset<nbits> temp = founddets[ndet];
+      temp.flip(sporb);
+      it = basedet_pos.find(temp);
+      if(it != basedet_pos.end()) {
+        // IT DOES COME INDEED FROM A DETERMINANT IN THE ORIGINAL GROUND STATE
+        // THUS, IT CONTRIBUTES TO wfns1
+        double sign = sp_up ? GetInsertionUpSign(temp, orb)
+                            : GetInsertionDoSign(temp, orb);
+        double fac = wfn(it->second);
+        b(ndet) += fac * sign;
+      }
+    }
+  }
+
+  std::cout << "GOING TO ITERATIVELY ADD SINGLES AND DOUBLES!" << std::endl;
+  std::cout << "--BEFORE ADDING SINGLE-DOUBLES, nterms = " << cgf + 1
+            << std::endl;
+  size_t orig_nterms = cgf + 1;
+
+  int startSD = 0, endSD = orig_nterms - 1, GFseed = 0;
+  for(int nSD = 1; nSD <= tot_SD && cgf <= trunc_size; nSD++) {
+    for(int iii = startSD; iii <= endSD && cgf <= trunc_size; iii++) {
+      if(nSD == 1)
+        if(abs(b(iii)) < GFseedThreshold)
+          continue;  // FROM THE ORIGINAL SET, ONLY CONSIDER THE MOST IMPORTANT
+                     // ONES
+      // GET SINGLES
+      GFseed += (nSD == 1) ? 1 : 0;
+      std::vector<std::bitset<nbits>> tdets;
+      generate_singles_spin_as(norbs, founddets[iii], tdets, as_orbs);
+
+      for(size_t jjj = 0; jjj < tdets.size(); jjj++) {
+        it = founddet_pos.find(tdets[jjj]);
+        if(it ==
+           founddet_pos.end()) {  // FOR ZERO STATES ONLY, ADD: and matches > 0
+          cgf++;
+          founddet_pos[tdets[jjj]] = cgf;
+          founddets.push_back(tdets[jjj]);
+        }
+      }
+    }
+    startSD = endSD + 1;
+    endSD = cgf;
+  }
+  std::cout << "--AFTER CUTTING BY: " << GFseedThreshold
+            << ", GF-SPACE WITH GF SEED: " << GFseed
+            << ", WE HAVE STATES: " << cgf + 1 << std::endl;
+
+  // NOW THAT WE FOUND THE BASIS, JUST STORE IT
+  new_basis.resize(cgf + 1);
+  new_basis.assign(founddets.begin(), founddets.end());
+}
+
+/**
+ * @brief Routine to prepare the wave functions for the Lanczos computation of
+ * the Green's function (be it Band Lanczos or regular Lanczos).
+ *
+ * @tparam class DetType: Slater determiant type. Should implement functions to
+ * check for orbital occupation, returning state bitstring and finding single
+ * excitations accounting for active space structure.
+ * @tparam DetStateType: State bitstring type. Encodes fermionic state in second
+ * quatization, should allow for basic bit operations, and being an index in a
+ * dictionary.
+ *
+ * @param [in] const VectorXd &base_wfn: Wave function from which to compute the
+ * Green's function.
+ * @param [in] const std::vector<int> &GF_orbs: Orbital indices for which to
+ * compute the GF.
+ * @param [in] const std::vector<bool> &is_up: Flags regarding the spin of each
+ * orbital. True means spin up, false means spin down.
+ * @param [in] const std::vector<DetType> &base_dets: Vector of the Slater
+ * determinants in the basis.
+ * @param [in] const std::vector<DetType> &GF_gets: Vector of the Slater
+ * determinants in the GF basis.
+ * @param [in] bool is_part: Flag to determine GF sector. For true, we are in
+ * the particle sector. For false we are in the hole sector.
+ * @param [out] std::vector<int>& todelete: On return, contains a list of the
+ * orbitals for which the Green's function vector ai/ai+|base_wfn> vanishes, and
+ * hence is eliminated from the list of wave functions.
+ * @param [in] double zero_thresh: Threshold to decide whether a computed vector
+ * is zero or not, judging by the magnitude of the norm.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 01/02/2022
+ */
+template <size_t nbits, typename index_t = int32_t>
+auto BuildWfn4Lanczos(const Eigen::VectorXd &base_wfn,
+                      const std::vector<int> &GF_orbs,
+                      const std::vector<bool> &is_up,
+                      const std::vector<std::bitset<nbits>> &base_dets,
+                      const std::vector<std::bitset<nbits>> &GF_dets,
+                      bool is_part, std::vector<int> &todelete,
+                      double zero_thresh = 1.E-7) {
+  // INITIALIZE THE DICTIONARY OF BASE DETERMINANTS
+  using bitset_map =
+      std::map<std::bitset<nbits>, size_t, bitset_less_comparator<nbits>>;
+  using bitset_map_iterator = typename bitset_map::const_iterator;
+
+  bitset_map base_dets_pos;
+  for(size_t iii = 0; iii < base_dets.size(); iii++)
+    base_dets_pos[base_dets[iii]] = iii;
+
+  // PREPARE THE WAVEFUNCTIONS FOR THE BAND LANCZOS
+  size_t nterms = GF_dets.size();
+  std::vector<double> wfns(GF_orbs.size() * nterms, 0.);
+  for(size_t iorb = 0; iorb < GF_orbs.size(); iorb++) {
+    int orb = GF_orbs[iorb];
+    bool sp_up = is_up[iorb];
+    int sporb = sp_up ? orb : orb + nbits / 2;
+    // BUILD THE WAVEFUNCTION FOR ORBITAL orb
+    for(size_t ndet = 0; ndet < nterms; ndet++) {
+      // CHECK, CAN ndet COME FROM ai^+|GS> *OR* ai|GS> WITH THE ORBITAL orb?
+      bool in_gf = false;
+      if(is_part && GF_dets[ndet][sporb])  // PARTICLE
+        in_gf = true;
+      else if(!is_part && !GF_dets[ndet][sporb])  // HOLE
+        in_gf = true;
+      if(in_gf) {
+        // YES, CHECK WHETHER THE GENERATING STATE COMES FROM THE BASE
+        // DETERMINANT SPACE
+        std::bitset<nbits> temp(GF_dets[ndet]);
+        temp.flip(sporb);
+        bitset_map_iterator it = base_dets_pos.find(temp);
+        if(it != base_dets_pos.end()) {
+          // IT DOES COME INDEED FROM A DETERMINANT IN THE ORIGINAL GROUND STATE
+          // THUS, IT CONTRIBUTES TO wfns1
+          double sign = sp_up ? GetInsertionUpSign(temp, orb)
+                              : GetInsertionDoSign(temp, orb);
+          double fac = base_wfn(it->second);
+          wfns[iorb * nterms + ndet] += fac * sign;
+        }
+      }
+    }
+  }
+
+  // CHECK WHETHER ANY OF THE VECTORS IS EXACTLY ZERO. IF SO, TAKE IT OUT!
+  todelete.clear();
+  for(int orb_indx = 0; orb_indx < GF_orbs.size(); orb_indx++) {
+    double st_nrm = blas::dot(nterms, wfns.data() + orb_indx * nterms, 1,
+                              wfns.data() + orb_indx * nterms, 1);
+    if(abs(st_nrm) <= zero_thresh) todelete.push_back(orb_indx);
+  }
+  int nvecs = GF_orbs.size() - todelete.size();
+  for(int i = 0; i < todelete.size(); i++)
+    wfns.erase(wfns.begin() + (todelete[i] - i) * nterms,
+               wfns.begin() + (todelete[i] - i + 1) * nterms);
+  std::cout << "ORBITALS WITH NO CORRESPONING ADD-VECTOR: [";
+  for(int i = 0; i < todelete.size(); i++) std::cout << todelete[i] << ", ";
+  std::cout << "]" << std::endl;
+
+  return std::tuple(wfns, nvecs);
+}
+
+/**
+ * @brief Routine to write Green's function to file. It stores the frequency
+ * grid together with the full GF matrix. Also, in case of a multi-orbitla GF,
+ * it stores the orbital indices in a separate file.
+ *
+ * @param [in] const std::vector<std::vector<std::complex double> >
+ * > &GF: GF to store. Written as GF[freq-axis][orb1 * norbs + orb2].
+ * @param [in] const std::vector<std::complex<double> > &ws: Frequency grid.
+ * @param [in] const std::vector<int> &GF_orbs: Orbital indices for the Green's
+ * function, as requested originally in the GF computation. Some of them may not
+ * have been actually used, if the corresponding ai/ai+|wfn0> states were zero.
+ * @param [in] const std::vector<int> &todelete: List of deleted orbital indices
+ * in the case just described.
+ * @param [in] const bool is_part: Flag to label GF file depending on whether it
+ * is a particle GF (True), or a hole GF (False).
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 02/02/2022
+ */
+void write_GF(const std::vector<std::vector<std::complex<double>>> &GF,
+              const std::vector<std::complex<double>> &ws,
+              const std::vector<int> &GF_orbs, const std::vector<int> &todelete,
+              const bool is_part);
+
+/**
+ * @brief Routine to run Green's function calculation at zero temperature from
+ * some input ref. wave function. Allows for choosing to compute particle/hole
+ * GF, using normal/band Lanczos, and simplifying the calculation by virtue of
+ * exploiting some active space structure.
+ *
+ * @tparam class DetType: Slater determiant type. Should implement functions to
+ * check for orbital occupation, returning state bitstring and finding single
+ * excitations accounting for active space structure.
+ * @tparam DetStateType: State bitstring type. Encodes fermionic state in second
+ * quatization, should allow for basic bit operations, and being an index in a
+ * dictionary.
+ *
+ * @param [out] std::vector<std::vector<std::complex<double> > >:
+ * On output, contains the computed Green's function, in format
+ * GF[freq.][orb1 * norbs + orb2].
+ * @param [in] const Eigen::VectorXd &wfn0: Reference wave function from which
+ * to compute the GF.
+ * @param [in] const FermionHamil &H: Fermionic Hamiltonian defining the system.
+ * @param [in] const std::vector<DetType> &base_dets: Basis of Slater
+ * determinants for the description of wfn0.
+ * @param [in] const double energ: Energy of reference state wfn0.
+ * @param [in] const bool is_part: Flag to determine GF sector. For true, we are
+ * in the particle sector. Otherwise, we are in the hole sector.
+ * @param [in] const std::vector<std::complex<double> > &ws: Frequency grid over
+ * which to compute the Green's function.
+ * @param [in] const std::vector<double> &occs: Occupation numbers for each
+ * orbital.
+ * @param [in] const GFSettings &settings: Structure with various parameters for
+ * Green's function calculation.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 01/02/2022
+ */
+template <size_t nbits, typename index_t = int32_t>
+void RunGFCalc(std::vector<std::vector<std::complex<double>>> &GF,
+               const Eigen::VectorXd &wfn0, HamiltonianGenerator<nbits> &Hgen,
+               const std::vector<std::bitset<nbits>> &base_dets,
+               const double energ, const bool is_part,
+               const std::vector<std::complex<double>> &ws,
+               const std::vector<double> &occs, const GFSettings &settings) {
+  using Clock = std::chrono::high_resolution_clock;
+  // READ INPUT
+  const size_t trunc_size = settings.trunc_size;
+  const int tot_SD = settings.tot_SD;
+  const double GFseedThreshold = settings.GFseedThres;
+  const double asThres = settings.asThres;
+  const bool use_bandLan = settings.use_bandLan;
+  const std::vector<int> GF_orbs_basis = settings.GF_orbs_basis;
+  const std::vector<int> GF_orbs_comp = settings.GF_orbs_comp;
+  const std::vector<bool> is_up_basis = settings.is_up_basis;
+  const std::vector<bool> is_up_comp = settings.is_up_comp;
+
+  double h_el_tol = 1.E-6;
+  int nLanIts = settings.nLanIts;
+  bool print = settings.print;
+  bool writeGF = settings.writeGF;
+  bool saveGFmats = settings.saveGFmats;
+
+  time_t loop1 = time(NULL), loop2 = time(NULL);
+  auto loop1C = Clock::now(), loop2C = Clock::now();
+  size_t ndets = base_dets.size();
+
+  // FIRST, BUILD THE BASIS SEQUENTIALLY BY FORMING THE BASES OF EACH ORBITAL
+  // AND ADDING THEM TOGETHER
+  std::vector<std::bitset<nbits>> gf_dets, gf_dets_tmp;
+  for(size_t iorb = 0; iorb < GF_orbs_basis.size(); iorb++) {
+    get_GF_basis_AS_1El<nbits, index_t>(GF_orbs_basis[iorb], is_up_basis[iorb],
+                                        is_part, wfn0, base_dets, gf_dets_tmp,
+                                        occs, settings);
+    gf_dets.insert(gf_dets.end(), gf_dets_tmp.begin(), gf_dets_tmp.end());
+
+    gf_dets_tmp.clear();
+
+    auto comparator = [](const auto &x, const auto &y) {
+      return bitset_less(x, y);
+    };
+#ifdef MACIS_USE_BOOST_SORT
+    boost::sort::pdqsort_branchless
+#else
+    std::sort
+#endif
+        (gf_dets.begin(), gf_dets.end(), comparator);
+
+    typename std::vector<std::bitset<nbits>>::iterator b_it =
+        std::unique(gf_dets.begin(), gf_dets.end());
+    gf_dets.resize(std::distance(gf_dets.begin(), b_it));
+    std::cout << "---> BASIS HAS NOW: " << gf_dets.size()
+              << " ELMENTS!! BY ORBITAL " << iorb + 1 << "/"
+              << GF_orbs_basis.size() << std::endl;
+  }
+
+  size_t nterms = gf_dets.size();
+  std::cout << "---> FINAL ADD BASIS HAS " << nterms << " ELEMENTS"
+            << std::endl;
+
+  loop1 = time(NULL);
+  loop1C = Clock::now();
+  auto hamil = make_dist_csr_hamiltonian<index_t>(
+      MPI_COMM_WORLD, gf_dets.begin(), gf_dets.end(), Hgen, h_el_tol);
+  loop2 = time(NULL);
+  loop2C = Clock::now();
+  std::cout << std::setprecision(3) << "Building "
+            << (is_part ? "*PARTICLE*" : "*HOLE*") << " Hamiltonian: "
+            << double(std::chrono::duration_cast<std::chrono::milliseconds>(
+                          loop2C - loop1C)
+                          .count()) /
+                   1000
+            << std::endl;
+  // NOW, PERFORM THE BAND LANCZOS ON THE TRUNCATED SPACE
+  // WE ALREADY BUILT THE HAMILTONIANS
+
+  if(nterms < nLanIts) nLanIts = nterms;
+
+  // PREPARE THE WAVEFUNCTIONS FOR THE BAND LANCZOS
+  std::vector<int> todelete;
+  std::vector<double> wfns;
+  int nvecs;
+  std::tie(wfns, nvecs) = BuildWfn4Lanczos<nbits, index_t>(
+      wfn0, GF_orbs_comp, is_up_comp, base_dets, gf_dets, is_part, todelete);
+
+  // //ACTUALLY COMPUTE THE GF!
+  time_t GF_loop1 = time(NULL);
+  auto GF_loop1C = Clock::now();
+
+  if(use_bandLan) {
+    BandResolvent(hamil, wfns, ws, GF, nLanIts, energ, is_part, nvecs, nterms,
+                  print, saveGFmats);
+  } else {
+    // DO SIMPLE LANCZOS FOR ALL GF ELEMENTS
+    SparsexDistSpMatOp hamil_wrap(hamil);
+    GF.clear();
+    GF.resize(ws.size(), std::vector<std::complex<double>>(
+                             nvecs * nvecs, std::complex<double>(0., 0.)));
+    for(int i = 0; i < nvecs; i++) {
+      std::vector<std::complex<double>> tGF;
+      // DIAGONAL ELEMENT
+      std::cout << "DOING ELEMENT (" << i << ", " << i << ")" << std::endl;
+      Eigen::VectorXd twfn = Eigen::Map<Eigen::VectorXd, Eigen::Unaligned>(
+          wfns.data() + nterms * i, nterms);
+      std::string fpref_basis = is_part ? "particle" : "hole";
+      std::string fpref =
+          fpref_basis + "_" + std::to_string(i) + "_" + std::to_string(i);
+      GF_Diag<SparsexDistSpMatOp>(twfn, hamil_wrap, ws, tGF, energ, is_part,
+                                  nLanIts, saveGFmats, fpref);
+      for(int iw = 0; iw < ws.size(); iw++) GF[iw][i * nvecs + i] = tGF[iw];
+      for(int j = i + 1; j < nvecs; j++) {
+        // OFF DIAGONAL ELEMENTS
+        // NOTE: WORKS ONLY FOR REAL-VALUED WAVE FUNCTIONS.
+        std::cout << "DOING ELEMENT (" << i << ", " << j << ")" << std::endl;
+        for(size_t iii = 0; iii < nterms; iii++)
+          twfn(iii) = wfns[i * nterms + iii] + wfns[j * nterms + iii];
+        fpref = fpref_basis + "_" + std::to_string(i) + "_" +
+                std::to_string(j) + "_a";
+        GF_Diag<SparsexDistSpMatOp>(twfn, hamil_wrap, ws, tGF, energ, is_part,
+                                    nLanIts, saveGFmats, fpref);
+        for(int iw = 0; iw < ws.size(); iw++)
+          GF[iw][i * nvecs + j] += 0.25 * tGF[iw];
+        for(size_t iii = 0; iii < nterms; iii++)
+          twfn(iii) = wfns[i * nterms + iii] - wfns[j * nterms + iii];
+        fpref = fpref_basis + "_" + std::to_string(i) + "_" +
+                std::to_string(j) + "_b";
+        GF_Diag<SparsexDistSpMatOp>(twfn, hamil_wrap, ws, tGF, energ, is_part,
+                                    nLanIts);
+        for(int iw = 0; iw < ws.size(); iw++) {
+          GF[iw][i * nvecs + j] -= 0.25 * tGF[iw];
+          GF[iw][j * nvecs + i] = GF[iw][i * nvecs + j];
+        }
+      }
+    }
+  }
+
+  time_t GF_loop2 = time(NULL);
+  auto GF_loop2C = Clock::now();
+  std::cout << std::setprecision(3) << "Computing GF with "
+            << (use_bandLan ? " *Band Lanczos*" : "*Regular Lanczos*")
+            << double(std::chrono::duration_cast<std::chrono::milliseconds>(
+                          GF_loop2C - GF_loop1C)
+                          .count()) /
+                   1000
+            << std::endl;
+
+  if(writeGF) write_GF(GF, ws, GF_orbs_comp, todelete, is_part);
+}
+
+}  // namespace macis
diff --git a/include/macis/gf/lanczos.hpp b/include/macis/gf/lanczos.hpp
new file mode 100644
index 00000000..97677769
--- /dev/null
+++ b/include/macis/gf/lanczos.hpp
@@ -0,0 +1,619 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+/**
+ * @brief Implements simple one-band Lanczos method
+ *        to compute the lowest eigenvalue of a given
+ *        Hamiltonian.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+#pragma once
+
+#include <iomanip>
+#include <sparsexx/matrix_types/csr_matrix.hpp>
+#include <sparsexx/spblas/pspmbv.hpp>
+#include <sparsexx/spblas/spmbv.hpp>
+
+#include "macis/gf/eigsolver.hpp"
+
+namespace macis {
+
+struct LanczosSettings {
+  size_t itermax = 1000;
+  double Lantol = 1.E-8;
+  double E0tol = 1.E-8;
+  bool print = false;
+};
+
+/**
+ * @brief Wrapper class for Eigen::MatrixXd, to be used in
+ *        the Lanczos code. Just needs to implement a matrix-
+ *        vector product dot, and a function rows() to return the
+ *        nr. of rows in the matrix.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+class eigMatDOp {
+ private:
+  const Eigen::MatrixXd *mat;
+
+ public:
+  /**
+   * @brief Constructor, takes Eigen::MatrixXd and keeps
+   *        pointer to it.
+   *
+   * @param [in] const Eigen::MatrixXd &A: Matrix to wrap.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  eigMatDOp(const Eigen::MatrixXd &A) { mat = &A; }
+  /**
+   * @brief Simple matrix-vector product.
+   *
+   * @param [in] const Eigen::VectorXd &vec: Input vector.
+   *
+   * @returns Eigen::VectorXd: A * vec.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  Eigen::VectorXd dot(const Eigen::VectorXd &vec) const { return (*mat) * vec; }
+  /**
+   * @brief Returns nr. of rows in the wrapped matrix.
+   *
+   * @returns int: Nr. of rows.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  int64_t rows() const { return mat->rows(); }
+};
+
+/**
+ * @brief Wrapper class for Eigen::SparseMatrix<double, Eigen::RowMajor>, to be
+ * used in the Lanczos code. Just needs to implement a matrix- vector product
+ * dot, and a function rows() to return the nr. of rows in the matrix.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+class SpMatDOp {
+ private:
+  const Eigen::SparseMatrix<double, Eigen::RowMajor> *mat;
+
+ public:
+  /**
+   * @brief Constructor, takes Eigen::SparseMatrix<double, Eigen::RowMajor> and
+   * keeps pointer to it.
+   *
+   * @param [in] const Eigen::SparseMatrix<double, Eigen::RowMajor> &A: Matrix
+   * to wrap.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  SpMatDOp(const Eigen::SparseMatrix<double, Eigen::RowMajor> &A) { mat = &A; }
+  /**
+   * @brief Simple matrix-vector product.
+   *
+   * @param [in] const Eigen::VectorXd &vec: Input vector.
+   *
+   * @returns Eigen::VectorXd: A * vec.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  Eigen::VectorXd dot(const Eigen::VectorXd &vec) const { return (*mat) * vec; }
+  /**
+   * @brief Returns nr. of rows in the wrapped matrix.
+   *
+   * @returns int: Nr. of rows.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  int64_t rows() const { return mat->rows(); }
+};
+
+/**
+ * @brief Wrapper class for
+ * sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t>, to be
+ * used in the Lanczos code. Just needs to implement a matrix- vector product
+ * dot, and a function rows() to return the nr. of rows in the matrix.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 13/06/2021
+ */
+class SparsexDistSpMatOp {
+ private:
+  const sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t> >
+      *mat;
+  sparsexx::spblas::spmv_info<int32_t> spmv_info;
+
+ public:
+  /**
+   * @brief Constructor, takes sparsexx::dist_sparse_matrix<
+   * sparsexx::csr_matrix<double, int32_t> and keeps pointer to it.
+   *
+   * @param [in] const sparsexx::dist_sparse_matrix<
+   * sparsexx::csr_matrix<double, int32_t> &A: Matrix to wrap.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  SparsexDistSpMatOp(
+      const sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t> >
+          &A) {
+    mat = &A;
+    spmv_info = sparsexx::spblas::generate_spmv_comm_info(A);
+  }
+  /**
+   * @brief Simple matrix-vector product.
+   *
+   * @param [in] const Eigen::VectorXd &vec: Input vector.
+   *
+   * @returns Eigen::VectorXd: A * vec.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  Eigen::VectorXd dot(const Eigen::VectorXd &vec) const {
+    std::vector<double> vin(&vec[0], vec.data() + vec.cols() * vec.rows());
+    std::vector<double> vout(vec.size(), 0.);
+    sparsexx::spblas::pgespmv(1., *mat, vin.data(), 0., vout.data(), spmv_info);
+    Eigen::VectorXd vec_out =
+        Eigen::Map<Eigen::VectorXd, Eigen::Unaligned>(vout.data(), vout.size());
+    return vec_out;
+  }
+  /**
+   * @brief Returns nr. of rows in the wrapped matrix.
+   *
+   * @returns int: Nr. of rows.
+   *
+   * @author Carlos Mejuto Zaera
+   * @date 05/04/2021
+   */
+  int64_t rows() const { return mat->m(); }
+};
+
+/**
+ * @brief Simple single band Lanczos implementation.
+ *
+ * @param [in] const Eigen::VectorXd &start_vec: Starting vector.
+ * @param [in] const MatOp &H: Wrapped Matrix. Has to be Hermitian!
+ * @param [in] int64_t nLanIts: Maximal number of iterations.
+ * @param [out] std::vector<double> &alphas: Diagonal of the tri-diagonal H.
+ * @param [out] std::vector<double> &betas: Off-diagonal of the tri-diagonal H.
+ * @param [in] double tol: Lanczos tolerance.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+template <class MatOp>
+void Lanczos(const Eigen::VectorXd &start_vec, const MatOp &H, int64_t nLanIts,
+             std::vector<double> &alphas, std::vector<double> &betas,
+             double tol) {
+  // LANCZOS ROUTINE USING TEMPLATED MATRIX
+  // CLASS. ONLY NEEDS TO PROVIDE A MATRIX
+  // VECTOR PRODUCT.
+  //
+  // SOME LAMBDAS
+  auto VecNorm = [](const Eigen::VectorXd &vec) -> double {
+    return std::sqrt(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1));
+  };
+  auto InnProd = [](const Eigen::VectorXd &Lvec,
+                    const Eigen::VectorXd &Rvec) -> double {
+    return blas::dot(Lvec.size(), Lvec.data(), 1, Rvec.data(), 1);
+  };
+  // INITIALIZATIONS
+  int64_t n = start_vec.rows();
+  Eigen::VectorXd qold = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qtemp = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qnew = Eigen::VectorXd::Zero(n);
+  alphas.clear();
+  betas.clear();
+
+  alphas.resize(nLanIts, 0.);
+  betas.resize(nLanIts + 1, 0.);
+
+  double normpsi = VecNorm(start_vec);
+  int64_t nlan = -1, itermax = nLanIts;
+
+  nlan++;
+  qold = start_vec / normpsi;
+
+  qnew = H.dot(qold);
+  alphas[0] = InnProd(qold, qnew);
+  qnew -= alphas[0] * qold;
+  betas[0] = normpsi;
+  betas[1] = VecNorm(qnew);
+  if(abs(betas[1]) <= tol) itermax = 1;
+  for(size_t iter = 1; iter < itermax; iter++) {
+    nlan++;
+    qtemp = qold;
+    qold = qnew / betas[iter];
+    qnew = -betas[iter] * qtemp;
+
+    qtemp = H.dot(qold);
+
+    qnew += qtemp;
+    alphas[iter] = InnProd(qold, qnew);
+    qnew -= alphas[iter] * qold;
+    betas[iter + 1] = VecNorm(qnew);
+
+    if(abs(betas[iter + 1]) <= tol) {
+      itermax = iter;
+      std::cout << "EXIT BECAUSE BETA IS TOO SMALL!! AT ITERATION " << iter
+                << ", betas[iter + 1] = " << betas[iter + 1] << std::endl;
+      break;
+    }
+  }
+}
+
+/**
+ * @brief Single band Lanczos implementation, with backprojection to
+ *        evaluate eigenvector in the original basis. A previous Lanczos
+ *        to determine the ground state in the Krylov basis has to be performed
+ *        first.
+ *
+ * @param [in] const Eigen::VectorXd &start_vec: Starting vector.
+ * @param [in] const MatOp &H: Wrapped Matrix. Has to be Hermitian!
+ * @param [in] int64_t nLanIts: Maximal number of iterations.
+ * @param [in] Eigen::VectorXd &vec_P: Eigenvector in the Krylov basis.
+ * @param [out] Eigen::VectorXd &vec_BP: Eigenvector in the original basis.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+template <class MatOp>
+void Lanczos_BackProj(const Eigen::VectorXd &start_vec, const MatOp &H,
+                      int64_t nLanIts, Eigen::VectorXd &vec_P,
+                      Eigen::VectorXd &vec_BP) {
+  // REBUILD THE EIGENVECTOR FROM A PREVIOUS LANZOS
+  // CALCULATION.
+  //
+  // SOME LAMBDAS
+  auto VecNorm = [](const Eigen::VectorXd &vec) -> double {
+    return std::sqrt(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1));
+  };
+  auto InnProd = [](const Eigen::VectorXd &Lvec,
+                    const Eigen::VectorXd &Rvec) -> double {
+    return blas::dot(Lvec.size(), Lvec.data(), 1, Rvec.data(), 1);
+  };
+  // INITIALIZATIONS
+  int64_t n = start_vec.rows();
+  Eigen::VectorXd qold = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qtemp = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qnew = Eigen::VectorXd::Zero(n);
+
+  std::vector<double> alphas(nLanIts, 0.);
+  std::vector<double> betas(nLanIts + 1, 0.);
+
+  double normpsi = VecNorm(start_vec);
+  int64_t nlan = -1, itermax = nLanIts;
+
+  vec_BP = Eigen::VectorXd::Zero(n);
+
+  nlan++;
+  qold = start_vec / normpsi;
+  vec_BP = vec_P(0) * qold;
+
+  qnew = H.dot(qold);
+  alphas[0] = InnProd(qold, qnew);
+  qnew -= alphas[0] * qold;
+  betas[0] = normpsi;
+  betas[1] = VecNorm(qnew);
+  for(size_t iter = 1; iter < itermax; iter++) {
+    nlan++;
+    qtemp = qold;
+    qold = qnew / betas[iter];
+    vec_BP += vec_P(iter) * qold;
+    qnew = -betas[iter] * qtemp;
+
+    qtemp = H.dot(qold);
+
+    qnew += qtemp;
+    alphas[iter] = InnProd(qold, qnew);
+    qnew -= alphas[iter] * qold;
+    betas[iter + 1] = VecNorm(qnew);
+  }
+  alphas.clear();
+  betas.clear();
+}
+
+/**
+ * @brief Determine ground state of Hamiltonian using Lanczos, returns
+ *        the ground state vector in the Krylov basis.
+ *
+ * @param [in] const Eigen::VectorXd &start_vec: Starting vector.
+ * @param [in] const MatOp &H: Wrapped Hamiltonian.
+ * @param [out] double &E0: Ground state energy.
+ * @param [out] Eigen::VectorXd &psi0_Lan: Eigenvector in the Krylov basis.
+ * @param [in] LanczosSettings &settings: Input structure with Lanczos
+ * parameters.
+ *
+ * @returns int: Required nr. of Lanczos iterations.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+template <class MatOp>
+int64_t GetGSEn_Lanczos(const Eigen::VectorXd &start_vec, const MatOp &H,
+                        double &E0, Eigen::VectorXd &psi0_Lan,
+                        const LanczosSettings &settings) {
+  // COMPUTE LOWEST EIGENVALUE OF MATRIX H
+  // USING LANCZOS. RETURNS EIGENVECTOR IN
+  // THE BASIS OF KRYLOV VECTORS
+  //
+  // SOME LAMBDAS
+  auto VecNorm = [](const Eigen::VectorXd &vec) -> double {
+    return std::sqrt(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1));
+  };
+  auto InnProd = [](const Eigen::VectorXd &Lvec,
+                    const Eigen::VectorXd &Rvec) -> double {
+    return blas::dot(Lvec.size(), Lvec.data(), 1, Rvec.data(), 1);
+  };
+  // INITIALIZATIONS
+  auto w = std::setw(15);
+  double Lantol = settings.Lantol;
+  double E0tol = settings.E0tol;
+  bool print = settings.print;
+  size_t itermax = settings.itermax;
+
+  int64_t n = start_vec.rows();
+  Eigen::VectorXd qold = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qtemp = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qnew = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd eigvals;
+  Eigen::MatrixXd eigvecs;
+  std::vector<double> alphas, betas;
+  double currE0, prevE0;
+
+  double normpsi = VecNorm(start_vec);
+  int64_t nlan = -1;
+
+  nlan++;
+  qold = start_vec / normpsi;
+
+  qnew = H.dot(qold);
+  alphas.push_back(InnProd(qold, qnew));
+  qnew -= alphas[0] * qold;
+  betas.push_back(normpsi);
+  betas.push_back(VecNorm(qnew));
+  prevE0 = alphas[0];
+
+  if(print) {
+    std::cout << w << "Lanczos diagonalization:" << std::endl;
+    std::ostringstream header;
+    header << w << "Iter." << w << "Alpha" << w << "Beta" << w << "E0" << w
+           << "dE";
+    std::cout << header.str() << std::endl;
+    std::cout << w << std::string(header.str().length(), '-') << std::endl;
+    std::cout << w << 1 << w << std::scientific << std::setprecision(5)
+              << alphas[0] << w << betas[1] << w << prevE0 << w << "***"
+              << std::endl;
+  }
+
+  if(abs(betas[1]) <= Lantol) {
+    itermax = 1;
+    currE0 = alphas[0];
+  }
+  for(size_t iter = 1; iter < itermax; iter++) {
+    nlan++;
+    qtemp = qold;
+    qold = qnew / betas[iter];
+    qnew = -betas[iter] * qtemp;
+
+    qtemp = H.dot(qold);
+
+    qnew += qtemp;
+    alphas.push_back(InnProd(qold, qnew));
+    qnew -= alphas[iter] * qold;
+    betas.push_back(VecNorm(qnew));
+    // GET EIGENVALUES OF CURRENT TRI-DIAG MATRIX
+    Hste_v(alphas, betas, eigvals, eigvecs);
+    currE0 = eigvals(0);
+    if(print)
+      std::cout << w << iter + 1 << w << std::scientific << std::setprecision(5)
+                << alphas[iter] << w << betas[iter + 1] << w << currE0 << w
+                << abs(currE0 - prevE0) << std::endl;
+
+    if(abs(betas[iter + 1]) <= Lantol) {
+      itermax = iter + 1;
+      if(print)
+        std::cout << "EXIT LANCZOS BECAUSE BETA IS TOO SMALL!! AT ITERATION "
+                  << iter << ", betas[iter + 1] = " << betas[iter + 1]
+                  << std::endl;
+      break;
+    }
+    if(abs(currE0 - prevE0) <= E0tol) {
+      itermax = iter + 1;
+      if(print)
+        std::cout << "EXIT LANCZOS BECAUSE ENERGY ACCURACY WAS OBTAINED!! AT "
+                     "ITERATION "
+                  << iter << ", dE = " << abs(currE0 - prevE0) << std::endl;
+      break;
+    }
+    prevE0 = currE0;
+  }
+
+  if(abs(prevE0 - currE0) > E0tol && nlan == itermax - 1 &&
+     abs(betas[itermax]) > Lantol) {
+    std::ostringstream oss;
+    oss << "Unable to achieve the desired accuracy of " << std::scientific
+        << E0tol << " in Lanczos after " << itermax << " iterations!!";
+    throw(oss.str());
+  }
+
+  E0 = currE0;
+  if(itermax == 1) {
+    psi0_Lan = Eigen::VectorXd::Ones(1);
+  } else {
+    psi0_Lan = eigvecs.col(0);
+  }
+  return itermax;
+}
+
+/**
+ * @brief Determine ground state of Hamiltonian using Lanczos, returns
+ *        the ground state vector in the original basis.
+ *
+ * @param [in] const Eigen::VectorXd &start_vec: Starting vector.
+ * @param [in] const MatOp &H: Wrapped Hamiltonian.
+ * @param [out] double &E0: Ground state energy.
+ * @param [out] Eigen::VectorXd &psi0: Ground state eigenvector.
+ * @param [in] const LanczosSettings &settings: Input structure with Lanczos
+ * parameters.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+template <class MatOp>
+void GetGSEnVec_Lanczos(const Eigen::VectorXd &start_vec, const MatOp &H,
+                        double &E0, Eigen::VectorXd &psi0,
+                        const LanczosSettings &settings) {
+  // COMPUTE LOWEST EIGENVALUE AND EIGENVECTOR
+  // OF MATRIX H USING LANCZOS.
+  //
+  // SOME LAMBDAS
+  auto VecNorm = [](const Eigen::VectorXd &vec) -> double {
+    return std::sqrt(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1));
+  };
+  auto InnProd = [](const Eigen::VectorXd &Lvec,
+                    const Eigen::VectorXd &Rvec) -> double {
+    return blas::dot(Lvec.size(), Lvec.data(), 1, Rvec.data(), 1);
+  };
+  // INITIALIZATIONS
+  double Lantol = settings.Lantol;
+  double E0tol = settings.E0tol;
+  bool print = settings.print;
+  size_t itermax = settings.itermax;
+
+  int64_t n = start_vec.rows();
+  Eigen::VectorXd qold = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qtemp = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd qnew = Eigen::VectorXd::Zero(n);
+  Eigen::VectorXd eigvals;
+  Eigen::MatrixXd eigvecs;
+  std::vector<Eigen::VectorXd> kry_vecs;
+  std::vector<double> alphas, betas;
+  double currE0, prevE0;
+
+  double normpsi = VecNorm(start_vec);
+  int64_t nlan = -1;
+
+  nlan++;
+  qold = start_vec / normpsi;
+  kry_vecs.push_back(qold);
+
+  qnew = H.dot(qold);
+  alphas.push_back(InnProd(qold, qnew));
+  qnew -= alphas[0] * qold;
+  betas.push_back(normpsi);
+  betas.push_back(VecNorm(qnew));
+  prevE0 = alphas[0];
+
+  if(abs(betas[1]) <= Lantol) {
+    itermax = 1;
+    currE0 = alphas[0];
+  }
+  for(size_t iter = 1; iter < itermax; iter++) {
+    nlan++;
+    qtemp = qold;
+    qold = qnew / betas[iter];
+    kry_vecs.push_back(qold);
+    qnew = -betas[iter] * qtemp;
+
+    qtemp = H.dot(qold);
+
+    qnew += qtemp;
+    alphas.push_back(InnProd(qold, qnew));
+    qnew -= alphas[iter] * qold;
+    betas.push_back(VecNorm(qnew));
+    // GET EIGENVALUES OF CURRENT TRI-DIAG MATRIX
+    Hste_v(alphas, betas, eigvals, eigvecs);
+    currE0 = eigvals(0);
+
+    if(abs(betas[iter + 1]) <= Lantol) {
+      itermax = iter;
+      if(print)
+        std::cout << "EXIT LANCZOS BECAUSE BETA IS TOO SMALL!! AT ITERATION "
+                  << iter << ", betas[iter + 1] = " << betas[iter + 1]
+                  << std::endl;
+      break;
+    }
+    if(abs(currE0 - prevE0) <= E0tol) {
+      itermax = iter;
+      if(print)
+        std::cout << "EXIT LANCZOS BECAUSE ENERGY ACCURACY WAS OBTAINED!! AT "
+                     "ITERATION "
+                  << iter << ", dE = " << abs(currE0 - prevE0) << std::endl;
+      break;
+    }
+    prevE0 = currE0;
+  }
+
+  if(abs(prevE0 - currE0) > E0tol && nlan == itermax - 1) {
+    std::ostringstream oss;
+    oss << "Unable to achieve the desired accuracy of " << std::scientific
+        << E0tol << " in Lanczos after " << itermax << " iterations!!";
+    throw(oss.str());
+  }
+
+  E0 = currE0;
+  psi0 = Eigen::VectorXd::Zero(n);
+  if(kry_vecs.size() == 1) {
+    psi0 = kry_vecs[0];
+  } else {
+    for(int64_t i = 0; i < kry_vecs.size(); i++)
+      psi0 += eigvecs.col(0)(i) * kry_vecs[i];
+  }
+}
+
+/**
+ * @brief Computes ground state of Hamiltonian and corresponding
+ *        eigenvector using Lanczos.
+ *
+ * @param [in] const MatOp &H: Wrapped Hamiltonian.
+ * @param [out] double &E0: Ground state energy.
+ * @param [out] Eigen::VectorXd &psi0: Ground state eigenvector.
+ * @param [in] const LanczosSettings &settings: Input structure with Lanczos
+ * parameters.
+ * @param [in] bool superCI: Determines starting guess for Lanczos. If true
+ *             we start from [1,0,0,0,...] vector, otherwise from [1,1,1,...].
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 05/04/2021
+ */
+template <class MatOp>
+void GetGS(const MatOp &H, double &E0, Eigen::VectorXd &psi0,
+           const LanczosSettings &settings, bool isHF = false) {
+  // Computes the lowest eigenvalue and corresponding
+  // eigenvector from the dense matrix H by using Lanczos.
+
+  int64_t n = H.rows();
+  // Initial vector. We choose (1,0,0,0,...)t
+  // for HF, Otherwhise  (1,1,1,1,...)t
+  Eigen::VectorXd start_psi =
+      isHF ? Eigen::VectorXd::Zero(n) : Eigen::VectorXd::Ones(n);
+  start_psi(0) = 1.;
+  // Determine lowest eigenvalue for the given
+  // tolerance.
+  Eigen::VectorXd psi0_Lan;
+  int64_t nLanIts = GetGSEn_Lanczos(start_psi, H, E0, psi0_Lan, settings);
+  // Reconstruct the eigenvector
+  Lanczos_BackProj(start_psi, H, nLanIts, psi0_Lan, psi0);
+  start_psi = psi0;
+  GetGSEnVec_Lanczos(start_psi, H, E0, psi0, settings);
+}
+
+}  // namespace macis
diff --git a/include/macis/hamiltonian_generator.hpp b/include/macis/hamiltonian_generator.hpp
index 9ef9583b..899f2586 100644
--- a/include/macis/hamiltonian_generator.hpp
+++ b/include/macis/hamiltonian_generator.hpp
@@ -183,7 +183,7 @@ class HamiltonianGenerator {
   void rotate_hamiltonian_ordm(const double* ordm);
 
   virtual void SetJustSingles(bool /*_js*/) {}
-  virtual bool GetJustSingles() { return false; }
+  virtual bool GetJustSingles() const { return false; }
   virtual size_t GetNimp() const { return N / 2; }
 };
 
diff --git a/include/macis/hamiltonian_generator/sd_build.hpp b/include/macis/hamiltonian_generator/sd_build.hpp
new file mode 100644
index 00000000..37c8cf0f
--- /dev/null
+++ b/include/macis/hamiltonian_generator/sd_build.hpp
@@ -0,0 +1,278 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#pragma once
+#include <macis/hamiltonian_generator.hpp>
+#include <macis/sd_operations.hpp>
+#include <macis/util/rdms.hpp>
+#include <set>
+
+namespace macis {
+
+template <size_t N>
+struct det_pos {
+ public:
+  std::bitset<N> det;
+  uint32_t id;
+};
+
+template <size_t N>
+bool operator<(const det_pos<N>& a, const det_pos<N>& b) {
+  return bitset_less<N>(a.det, b.det);
+}
+
+template <size_t N>
+class SDBuildHamiltonianGenerator : public HamiltonianGenerator<N> {
+ public:
+  using base_type = HamiltonianGenerator<N>;
+  using full_det_t = typename base_type::full_det_t;
+  using spin_det_t = typename base_type::spin_det_t;
+  using full_det_iterator = typename base_type::full_det_iterator;
+  using matrix_span_t = typename base_type::matrix_span_t;
+  using rank4_span_t = typename base_type::rank4_span_t;
+
+  template <typename index_t>
+  using sparse_matrix_type = sparsexx::csr_matrix<double, index_t>;
+
+ protected:
+  size_t nimp, nimp2, nimp3;
+
+  template <typename index_t>
+  sparse_matrix_type<index_t> make_csr_hamiltonian_block_(
+      full_det_iterator bra_begin, full_det_iterator bra_end,
+      full_det_iterator ket_begin, full_det_iterator ket_end, double H_thresh) {
+    const size_t nbra_dets = std::distance(bra_begin, bra_end);
+    const size_t nket_dets = std::distance(ket_begin, ket_end);
+
+    std::vector<index_t> colind, rowptr(nbra_dets + 1);
+    std::vector<double> nzval;
+
+    // List of impurity orbitals, assumed to be the first nimp.
+    std::vector<uint32_t> imp_orbs(nimp, 0);
+    for(int ii = 0; ii < nimp; ii++) imp_orbs[ii] = ii;
+    std::vector<uint32_t> bra_occ_alpha, bra_occ_beta;
+
+    std::set<det_pos<N> > kets;
+    for(full_det_iterator it = ket_begin; it != ket_end; it++) {
+      det_pos<N> a;
+      a.det = *it;
+      a.id = std::distance(ket_begin, it);
+      kets.insert(a);
+    }
+
+    rowptr[0] = 0;
+
+    // Loop over bra determinants
+    for(size_t i = 0; i < nbra_dets; ++i) {
+      // if( (i%1000) == 0 ) std::cout << i << ", " << rowptr[i] << std::endl;
+      const auto bra = *(bra_begin + i);
+
+      size_t nrow = 0;
+      if(bra.count()) {
+        // Separate out into alpha/beta components
+        spin_det_t bra_alpha = bitset_lo_word(bra);
+        spin_det_t bra_beta = bitset_hi_word(bra);
+
+        // Get occupied indices
+        bits_to_indices(bra_alpha, bra_occ_alpha);
+        bits_to_indices(bra_beta, bra_occ_beta);
+
+        // Get singles and doubles
+        // (Note that doubles only involve impurity orbitals)
+        std::vector<full_det_t> excs, doubles;
+        if(just_singles)
+          generate_singles_spin(this->norb_, bra, excs);
+        else {
+          std::vector<full_det_t> singls;
+          generate_singles_spin(this->norb_, bra, excs);
+          // This will store in singls sinles among impurity orbitals, which we
+          // have already taken into account.
+          generate_singles_doubles_spin_as(this->norb_, bra, singls, doubles,
+                                           imp_orbs);
+          excs.insert(excs.end(), doubles.begin(), doubles.end());
+        }
+
+        // Diagonal term
+        full_det_t ex_diag = bra ^ bra;
+        spin_det_t exd_alpha = bitset_lo_word(ex_diag);
+        spin_det_t exd_beta = bitset_hi_word(ex_diag);
+
+        // Compute Matrix Element
+        const auto h_eld =
+            this->matrix_element_diag(bra_occ_alpha, bra_occ_beta);
+
+        if(std::abs(h_eld) > H_thresh) {
+          nrow++;
+          colind.emplace_back(i);
+          nzval.emplace_back(h_eld);
+        }
+
+        // Loop over ket determinants
+        for(const auto pos_ket : excs) {
+          det_pos<N> pos_ket2;
+          pos_ket2.det = pos_ket;
+          pos_ket2.id = 0;
+          auto it = kets.find(pos_ket2);
+          if(it != kets.end()) {
+            int j = it->id;
+            spin_det_t ket_alpha = bitset_lo_word(pos_ket);
+            spin_det_t ket_beta = bitset_hi_word(pos_ket);
+
+            full_det_t ex_total = bra ^ pos_ket;
+
+            spin_det_t ex_alpha = bitset_lo_word(ex_total);
+            spin_det_t ex_beta = bitset_hi_word(ex_total);
+
+            // Compute Matrix Element
+            const auto h_el = this->matrix_element(
+                bra_alpha, ket_alpha, ex_alpha, bra_beta, ket_beta, ex_beta,
+                bra_occ_alpha, bra_occ_beta);
+
+            if(std::abs(h_el) > H_thresh) {
+              nrow++;
+              colind.emplace_back(j);
+              nzval.emplace_back(h_el);
+            }
+
+          }  // Non-zero ket determinant
+        }    // Loop over ket determinants
+
+      }  // Non-zero bra determinant
+
+      rowptr[i + 1] = rowptr[i] + nrow;  // Update rowptr
+
+    }  // Loop over bra determinants
+
+    colind.shrink_to_fit();
+    nzval.shrink_to_fit();
+
+    return sparse_matrix_type<index_t>(nbra_dets, nket_dets, std::move(rowptr),
+                                       std::move(colind), std::move(nzval));
+  }
+
+  sparse_matrix_type<int32_t> make_csr_hamiltonian_block_32bit_(
+      full_det_iterator bra_begin, full_det_iterator bra_end,
+      full_det_iterator ket_begin, full_det_iterator ket_end,
+      double H_thresh) override {
+    return make_csr_hamiltonian_block_<int32_t>(bra_begin, bra_end, ket_begin,
+                                                ket_end, H_thresh);
+  }
+
+  sparse_matrix_type<int64_t> make_csr_hamiltonian_block_64bit_(
+      full_det_iterator bra_begin, full_det_iterator bra_end,
+      full_det_iterator ket_begin, full_det_iterator ket_end,
+      double H_thresh) override {
+    return make_csr_hamiltonian_block_<int64_t>(bra_begin, bra_end, ket_begin,
+                                                ket_end, H_thresh);
+  }
+
+ public:
+  void form_rdms(full_det_iterator bra_begin, full_det_iterator bra_end,
+                 full_det_iterator ket_begin, full_det_iterator ket_end,
+                 double* C, matrix_span_t ordm, rank4_span_t trdm) override {
+    const size_t nbra_dets = std::distance(bra_begin, bra_end);
+    const size_t nket_dets = std::distance(ket_begin, ket_end);
+
+    std::vector<uint32_t> bra_occ_alpha, bra_occ_beta;
+
+    std::set<det_pos<N> > kets;
+    for(full_det_iterator it = ket_begin; it != ket_end; it++) {
+      det_pos<N> a;
+      a.det = *it;
+      a.id = std::distance(ket_begin, it);
+      kets.insert(a);
+    }
+
+    // Loop over bra determinants
+    for(size_t i = 0; i < nbra_dets; ++i) {
+      const auto bra = *(bra_begin + i);
+      // if( (i%1000) == 0 ) std::cout << i  << std::endl;
+      if(bra.count()) {
+        // Separate out into alpha/beta components
+        spin_det_t bra_alpha = bitset_lo_word(bra);
+        spin_det_t bra_beta = bitset_hi_word(bra);
+
+        // Get occupied indices
+        bits_to_indices(bra_alpha, bra_occ_alpha);
+        bits_to_indices(bra_beta, bra_occ_beta);
+
+        // Get singles and doubles
+        std::vector<full_det_t> excs;
+        if(trdm.data_handle()) {
+          std::vector<full_det_t> doubles;
+          generate_singles_doubles_spin(this->norb_, bra, excs, doubles);
+          excs.insert(excs.end(), doubles.begin(), doubles.end());
+        } else {
+          generate_singles_spin(this->norb_, bra, excs);
+        }
+
+        // Diagonal term
+        full_det_t ex_diag = bra ^ bra;
+        spin_det_t exd_alpha = bitset_lo_word(ex_diag);
+        spin_det_t exd_beta = bitset_hi_word(ex_diag);
+
+        // Compute Matrix Element
+        rdm_contributions(bra_alpha, bra_alpha, exd_alpha, bra_beta, bra_beta,
+                          exd_beta, bra_occ_alpha, bra_occ_beta, C[i] * C[i],
+                          ordm, trdm);
+
+        // Loop over excitations
+        for(const auto pos_ket : excs) {
+          det_pos<N> pos_ket2;
+          pos_ket2.det = pos_ket;
+          pos_ket2.id = 0;
+          auto it = kets.find(pos_ket2);
+          if(it != kets.end()) {
+            int j = it->id;
+            spin_det_t ket_alpha = bitset_lo_word(pos_ket);
+            spin_det_t ket_beta = bitset_hi_word(pos_ket);
+
+            full_det_t ex_total = bra ^ pos_ket;
+            int ex_lim = 2;
+            if(trdm.data_handle()) ex_lim = 4;
+            if(ex_total.count() <= ex_lim) {
+              spin_det_t ex_alpha = bitset_lo_word(ex_total);
+              spin_det_t ex_beta = bitset_hi_word(ex_total);
+
+              const double val = C[i] * C[j];
+
+              // Compute Matrix Element
+              rdm_contributions(bra_alpha, ket_alpha, ex_alpha, bra_beta,
+                                ket_beta, ex_beta, bra_occ_alpha, bra_occ_beta,
+                                val, ordm, trdm);
+
+            }  // Possible non-zero connection (Hamming distance)
+
+          }  // Non-zero ket determinant
+        }    // Loop over ket determinants
+
+      }  // Non-zero bra determinant
+    }    // Loop over bra determinants
+  }
+
+ public:
+  bool just_singles;
+
+  template <typename... Args>
+  SDBuildHamiltonianGenerator(Args&&... args)
+      : HamiltonianGenerator<N>(std::forward<Args>(args)...),
+        just_singles(false) {
+    SetNimp(this->norb_);
+  }
+
+  void SetJustSingles(bool _js) override { just_singles = _js; }
+  void SetNimp(size_t _n) {
+    nimp = _n;
+    nimp2 = _n * _n;
+    nimp3 = nimp2 * _n;
+  }
+  size_t GetNimp() const override { return nimp; }
+  bool GetJustSingles() const override { return just_singles; }
+};
+
+}  // namespace macis
diff --git a/include/macis/model/hubbard.hpp b/include/macis/model/hubbard.hpp
new file mode 100644
index 00000000..5e80fd0f
--- /dev/null
+++ b/include/macis/model/hubbard.hpp
@@ -0,0 +1,21 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#pragma once
+
+#include <macis/types.hpp>
+
+namespace macis {
+
+/**
+ *  @brief Generate Hamiltonian Data for 1D Hubbard
+ */
+void hubbard_1d(size_t nsites, double t, double U, std::vector<double>& T,
+                std::vector<double>& V, bool pbc = false);
+
+}  // namespace macis
diff --git a/include/macis/sd_operations.hpp b/include/macis/sd_operations.hpp
index c04267f8..7c889134 100644
--- a/include/macis/sd_operations.hpp
+++ b/include/macis/sd_operations.hpp
@@ -96,16 +96,17 @@ void bitset_to_occ_vir(size_t norb, std::bitset<N> state,
 }
 
 /**
- *  @brief Generate the list of (un)occupied orbitals for a paricular state.
+ *  @brief Generate the list of (un)occupied orbitals for a paricular state,
+ *         but considering only a subset of active orbitals.
  *
  *  TODO: Test this function
  *
  *  @tparam N Number of bits for the total bit string of the state
  *  @param[in]  norb   Number of orbitals used to describe the state (<= `N`)
  *  @param[in]  state  The state from which to determine orbital occupations.
- *  @param[out] occ    List of occupied orbitals in `state`
- *  @param[out] vir    List of unoccupied orbitals in `state`
- *  @param[in]  as_orbs TODO:????
+ *  @param[out] occ    List of occupied active orbitals in `state`
+ *  @param[out] vir    List of unoccupied active orbitals in `state`
+ *  @param[in]  as_orbs: Orbital indices for the active orbitals to consider..
  */
 template <size_t N>
 void bitset_to_occ_vir_as(size_t norb, std::bitset<N> state,
@@ -198,7 +199,24 @@ void generate_singles_doubles(size_t norb, std::bitset<N> state,
   append_doubles(state, occ_orbs, vir_orbs, doubles);
 }
 
-// TODO: Test this function
+/**
+ * @brief Generates single excitations for a given spin involving only the
+ * orbitals defined in the active space as_orbs.
+ *
+ * TODO: Test this function
+ *
+ * @param[in] size_t norb: Nr. of orbitals in the system.
+ * @param[in] std::bitset<N>state : Half Slater determinant for which to compute
+ * the excitations, includes only one of the spin species.
+ * @param[out] std::vector<std::bitset<N> >& singles: Vector storing the singly
+ *             excited determinants from state, considering the active space.
+ * @param[in] const std::vector<uint32_t> as_orbs: Vector with the indices of
+ * the active orbitals. Only excitations involving these orbitals will be
+ * formed.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
 template <size_t N>
 void generate_singles_as(size_t norb, std::bitset<N> state,
                          std::vector<std::bitset<N>>& singles,
@@ -210,7 +228,26 @@ void generate_singles_as(size_t norb, std::bitset<N> state,
   append_singles(state, occ_orbs, vir_orbs, singles);
 }
 
-// TODO: Test this function
+/**
+ * @brief Generates single and double excitations for a given spin involving
+ * only the orbitals defined in the active space as_orbs.
+ *
+ * TODO: Test this function
+ *
+ * @param[in] size_t norb: Nr. of orbitals in the system.
+ * @param[in] std::bitset<N>state : Half Slater determinant for which to compute
+ * the excitations, includes only one of the spin species.
+ * @param[out] std::vector<std::bitset<N> >& singles: Vector storing the singly
+ *             excited determinants from state, considering the active space.
+ * @param[out] std::vector<std::bitset<N> >& doubles: Vector storing the doubly
+ *             excited determinants from state, considering the active space.
+ * @param[in] const std::vector<uint32_t> as_orbs: Vector with the indices of
+ * the active orbitals. Only excitations involving these orbitals will be
+ * formed.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
 template <size_t N>
 void generate_singles_doubles_as(size_t norb, std::bitset<N> state,
                                  std::vector<std::bitset<N>>& singles,
@@ -225,7 +262,24 @@ void generate_singles_doubles_as(size_t norb, std::bitset<N> state,
   append_doubles(state, occ_orbs, vir_orbs, doubles);
 }
 
-// TODO: Test this function
+/**
+ * @brief Generates single excitations for both spins involving only the
+ * orbitals defined in the active space as_orbs.
+ *
+ * TODO: Test this function
+ *
+ * @param[in] size_t norb: Nr. of orbitals in the system.
+ * @param[in] std::bitset<N>state : Slater determinant for which to compute the
+ * excitations.
+ * @param[out] std::vector<std::bitset<N> >& singles: Vector storing the singly
+ *             excited determinants from state, considering the active space.
+ * @param[in] const std::vector<uint32_t> as_orbs: Vector with the indices of
+ * the active orbitals. Only excitations involving these orbitals will be
+ * formed.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
 template <size_t N>
 void generate_singles_spin_as(size_t norb, std::bitset<N> state,
                               std::vector<std::bitset<N>>& singles,
@@ -260,7 +314,26 @@ void generate_singles_spin_as(size_t norb, std::bitset<N> state,
   }
 }
 
-// TODO: Test this function
+/**
+ * @brief Generates single and double excitations for both spins involving only
+ * the orbitals defined in the active space as_orbs.
+ *
+ * TODO: Test this function
+ *
+ * @param[in] size_t norb: Nr. of orbitals in the system.
+ * @param[in] std::bitset<N>state : Half Slater determinant for which to compute
+ * the excitations, includes only one of the spin species.
+ * @param[out] std::vector<std::bitset<N> >& singles: Vector storing the singly
+ *             excited determinants from state, considering the active space.
+ * @param[out] std::vector<std::bitset<N> >& doubles: Vector storing the doubly
+ *             excited determinants from state, considering the active space.
+ * @param[in] const std::vector<uint32_t> as_orbs: Vector with the indices of
+ * the active orbitals. Only excitations involving these orbitals will be
+ * formed.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 28/01/2022
+ */
 template <size_t N>
 void generate_singles_doubles_spin_as(size_t norb, std::bitset<N> state,
                                       std::vector<std::bitset<N>>& singles,
diff --git a/include/macis/solvers/lobpcg.hpp b/include/macis/solvers/lobpcg.hpp
new file mode 100644
index 00000000..5bb1c785
--- /dev/null
+++ b/include/macis/solvers/lobpcg.hpp
@@ -0,0 +1,106 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+/**
+ * @file lobpcg.h++
+ *
+ * @brief Implements simple call to LOBPCG to get the lowest few
+ *        eigenstates of a given matrix.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/10/2024
+ */
+#pragma once
+#include <assert.h>
+#include <sys/stat.h>
+
+#include <complex>
+#include <fstream>
+#include <iomanip>
+#include <limits>
+#include <lobpcgxx/lobpcg.hpp>
+#include <map>
+#include <sparsexx/matrix_types/csr_matrix.hpp>
+#include <sparsexx/spblas/pspmbv.hpp>
+#include <sparsexx/spblas/spmbv.hpp>
+#include <utility>
+
+namespace macis {
+
+/**
+ * @brief Perform a band Lanczos calculation on the Hamiltonian operator H,
+ * starting from vectors qs, for at most nLanIts iterations. Returns the first
+ * len(qs) eigenvalues, converged to some accuracy. Note that this
+ * implementation does not account for deflations (i.e., pruning the span of the
+ * qs for linear dependencies in higher powers of H).
+ *
+ * @param[in] typename Functor &H: Hamiltonian oprator. Just needs to implement
+ * a matrix vector product.
+ * @param[in] std::vector<std::vector<double> > &qs: Initial set of vetors to
+ * perform the band Lanczos on. Deleted on exit.
+ * @param[out] std::vector<double> &evals: Lowest len(qs) eigenvalues.
+ * @param[out] std::vector<std::vector<double> > &evecs: Lowest len(qs)
+ * eigenvectors, in the Krylov basis.
+ * @param[in] int &nLanIts: Number of Lanczos iterations to perform.
+ * @param[in] double tol: Target tolerance for the eigenvalue convergence.
+ * @param[in] double thres: Threshold determining when to ignore beta's for
+ * being too small.
+ * @param[in] bool print: If true, write intermediate results to file.
+ *
+ * @author Carlos Mejuto Zaera
+ * @date 25/10/2024
+ */
+template <typename Functor>
+void LobpcgGS(size_t dimH, size_t nstates, const Functor& H,
+              std::vector<double>& evals, std::vector<double>& X, int maxIts,
+              double tol = 1.E-8, bool print = false) {
+  // Run LOBPCG to get the first few eigenstates of a given
+  // Hamiltonian
+  evals.clear();
+  evals.resize(nstates, 0.);
+  X.clear();
+  X.resize(dimH * nstates, 0.);
+  // Hop
+  lobpcgxx::operator_action_type<double> Hop =
+      [&](int64_t n, int64_t k, const double* x, int64_t ldx, double* y,
+          int64_t ldy) -> void {
+    for(int ik = 0; ik < k; ik++)
+      H.operator_action(1, 1., x + ik * n, n, 0., y + ik * n, n);
+  };
+
+  // Random vectors
+  std::default_random_engine gen;
+  std::normal_distribution<> dist(0., 1.);
+  auto rand_gen = [&]() { return dist(gen); };
+  std::generate(X.begin(), X.end(), rand_gen);
+  lobpcgxx::cholqr(dimH, nstates, X.data(), dimH);  // Orthogonalize
+
+  lobpcgxx::lobpcg_settings settings;
+  settings.conv_tol = tol;
+  settings.maxiter = maxIts;
+  lobpcgxx::lobpcg_operator<double> lob_op(Hop);
+
+  std::vector<double> res(nstates);
+
+  try {
+    lobpcgxx::lobpcg(settings, dimH, nstates, nstates, lob_op, evals.data(),
+                     X.data(), dimH, res.data());
+  } catch(std::runtime_error e) {
+    std::cout << "Runtime error during lobpcg: " << e.what() << std::endl;
+  }
+
+  if(print) {
+    std::cout << std::scientific << std::setprecision(10) << std::endl;
+    for(int64_t i = 0; i < nstates; ++i) {
+      std::cout << "  evals[" << i << "] = " << evals[i] << ",   res[" << i
+                << "] = " << res[i] << std::endl;
+    }
+  }
+}
+
+}  // namespace macis
diff --git a/include/macis/solvers/selected_ci_diag.hpp b/include/macis/solvers/selected_ci_diag.hpp
index 0a1e293f..72316c8b 100644
--- a/include/macis/solvers/selected_ci_diag.hpp
+++ b/include/macis/solvers/selected_ci_diag.hpp
@@ -11,6 +11,7 @@
 #include <macis/csr_hamiltonian.hpp>
 #include <macis/hamiltonian_generator.hpp>
 #include <macis/solvers/davidson.hpp>
+#include <macis/solvers/lobpcg.hpp>
 #include <macis/types.hpp>
 #include <macis/util/mpi.hpp>
 #include <sparsexx/matrix_types/dense_conversions.hpp>
@@ -22,7 +23,8 @@ namespace macis {
 template <typename SpMatType>
 double parallel_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
                                  double davidson_res_tol,
-                                 std::vector<double>& C_local, MPI_Comm comm) {
+                                 std::vector<double>& C_local, MPI_Comm comm,
+                                 const size_t nstates = 1) {
   auto logger = spdlog::get("ci_solver");
   if(!logger) {
     logger = spdlog::stdout_color_mt("ci_solver");
@@ -56,10 +58,25 @@ double parallel_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
   // Solve EVP
   MPI_Barrier(comm);
   auto dav_st = clock_type::now();
-
-  auto [niter, E] =
-      p_davidson(H.local_row_extent(), davidson_max_m, op, D_local.data(),
-                 davidson_res_tol, C_local.data() MACIS_MPI_CODE(, H.comm()));
+  double E = 0.;
+  size_t niter = 0;
+
+  if(nstates == 1) {
+    auto [niter0, E0] =
+        p_davidson(H.local_row_extent(), davidson_max_m, op, D_local.data(),
+                   davidson_res_tol, C_local.data() MACIS_MPI_CODE(, H.comm()));
+    niter = niter0;
+    E = E0;
+  } else {
+    std::vector<double> evals;
+    std::vector<double> evecs;
+    LobpcgGS(H.m(), nstates, op, evals, evecs, 10000, davidson_res_tol);
+    logger->info("  Hamiltonian Eigenvalues:");
+    for(int ii = 0; ii < nstates; ii++)
+      logger->info("    --> Eval #{:4}: {:.6e}", ii, evals[ii]);
+    E = evals[0];
+    for(int ii = 0; ii < H.m(); ii++) C_local[ii] = evecs[ii];
+  }
 
   MPI_Barrier(comm);
   auto dav_en = clock_type::now();
@@ -74,8 +91,8 @@ double parallel_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
 
 template <typename SpMatType>
 double serial_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
-                               double davidson_res_tol,
-                               std::vector<double>& C) {
+                               double davidson_res_tol, std::vector<double>& C,
+                               const size_t nstates = 1) {
   auto logger = spdlog::get("ci_solver");
   if(!logger) {
     logger = spdlog::stdout_color_mt("ci_solver");
@@ -108,9 +125,24 @@ double serial_selected_ci_diag(const SpMatType& H, size_t davidson_max_m,
 
   // Solve EVP
   auto dav_st = clock_type::now();
-
-  auto [niter, E] =
-      davidson(H.m(), davidson_max_m, op, D.data(), davidson_res_tol, C.data());
+  double E = 0.;
+  size_t niter = 0;
+
+  if(nstates == 1) {
+    auto [niter0, E0] = davidson(H.m(), davidson_max_m, op, D.data(),
+                                 davidson_res_tol, C.data());
+    niter = niter0;
+    E = E0;
+  } else {
+    std::vector<double> evals;
+    std::vector<double> evecs;
+    LobpcgGS(H.m(), nstates, op, evals, evecs, 10000, davidson_res_tol);
+    logger->info("  Hamiltonian Eigenvalues:");
+    for(int ii = 0; ii < nstates; ii++)
+      logger->info("    --> Eval #{:4}: {:.6e}", ii, evals[ii]);
+    E = evals[0];
+    for(int ii = 0; ii < H.m(); ii++) C[ii] = evecs[ii];
+  }
 
   auto dav_en = clock_type::now();
 
@@ -128,7 +160,8 @@ double selected_ci_diag(wavefunction_iterator_t<N> dets_begin,
                         size_t davidson_max_m, double davidson_res_tol,
                         std::vector<double>& C_local,
                         MACIS_MPI_CODE(MPI_Comm comm, )
-                            const bool quiet = false) {
+                            const bool quiet = false,
+                        const size_t nstates = 1) {
   auto logger = spdlog::get("ci_solver");
   if(!logger) {
     logger = spdlog::stdout_color_mt("ci_solver");
@@ -197,10 +230,10 @@ double selected_ci_diag(wavefunction_iterator_t<N> dets_begin,
   // Solve EVP
 #ifdef MACIS_ENABLE_MPI
   auto E = parallel_selected_ci_diag(H, davidson_max_m, davidson_res_tol,
-                                     C_local, comm);
+                                     C_local, comm, nstates);
 #else
-  auto E =
-      serial_selected_ci_diag(H, davidson_max_m, davidson_res_tol, C_local);
+  auto E = serial_selected_ci_diag(H, davidson_max_m, davidson_res_tol, C_local,
+                                   nstates);
 #endif
 
   return E;
diff --git a/include/macis/util/cas.hpp b/include/macis/util/cas.hpp
index 63c8f4a2..304aae1a 100644
--- a/include/macis/util/cas.hpp
+++ b/include/macis/util/cas.hpp
@@ -53,7 +53,7 @@ double compute_casci_rdms(
   double E0 =
       selected_ci_diag(dets.begin(), dets.end(), ham_gen, settings.ci_matel_tol,
                        settings.ci_max_subspace, settings.ci_res_tol, C,
-                       MACIS_MPI_CODE(comm, ) true);
+                       MACIS_MPI_CODE(comm, ) true, settings.ci_nstates);
 
   // Compute RDMs
   ham_gen.form_rdms(dets.begin(), dets.end(), dets.begin(), dets.end(),
diff --git a/include/macis/util/fcidump.hpp b/include/macis/util/fcidump.hpp
index 41bb2a5c..823ef7c8 100644
--- a/include/macis/util/fcidump.hpp
+++ b/include/macis/util/fcidump.hpp
@@ -70,6 +70,17 @@ void read_fcidump_1body(std::string fname, col_major_span<double, 2> T);
  */
 void read_fcidump_2body(std::string fname, col_major_span<double, 4> V);
 
+/**
+ * @brief Check whether the 2-body contribution of the Hamiltonian is
+ * exclusively diagonal.
+ *
+ * @param[in] fname: Filename of FCIDUMP file
+ *
+ * @returns bool: Is the 2-body contribution to the Hamiltonian exclusively
+ *                diagonal?
+ */
+bool is_2body_diagonal(std::string fname);
+
 /**
  *  @brief Write an FCIDUMP file from a 2-body hamiltonian
  *
diff --git a/include/macis/util/mcscf.hpp b/include/macis/util/mcscf.hpp
index 9ff31b8f..b49936ef 100644
--- a/include/macis/util/mcscf.hpp
+++ b/include/macis/util/mcscf.hpp
@@ -27,6 +27,8 @@ struct MCSCFSettings {
   double ci_res_tol = 1e-8;
   size_t ci_max_subspace = 20;
   double ci_matel_tol = std::numeric_limits<double>::epsilon();
+
+  size_t ci_nstates = 1;
 };
 
 double casscf_diis(MCSCFSettings settings, NumElectron nalpha,
diff --git a/src/macis/CMakeLists.txt b/src/macis/CMakeLists.txt
index 26155ac6..e8f828fc 100644
--- a/src/macis/CMakeLists.txt
+++ b/src/macis/CMakeLists.txt
@@ -15,6 +15,10 @@ add_library( macis
   orbital_rotation_utilities.cxx
   orbital_hessian.cxx
   orbital_steps.cxx
+  gf/eigsolver.cxx
+  gf/bandlan.cxx
+  gf/gf.cxx
+  model/hubbard.cxx
 )
 
 target_include_directories( macis PUBLIC 
diff --git a/src/macis/fcidump.cxx b/src/macis/fcidump.cxx
index bf1d7762..7430a46a 100644
--- a/src/macis/fcidump.cxx
+++ b/src/macis/fcidump.cxx
@@ -134,8 +134,8 @@ void read_fcidump_1body(std::string fname, col_major_span<double, 2> T) {
 void read_fcidump_1body(std::string fname, double* T, size_t LDT) {
   auto norb = read_fcidump_norb(fname);
   col_major_span<double, 2> T_map(T, LDT, norb);
-  read_fcidump_1body(fname, KokkosEx::submdspan(T_map, std::pair{0, norb},
-                                                Kokkos::full_extent));
+  read_fcidump_1body(
+      fname, Kokkos::submdspan(T_map, std::pair{0, norb}, Kokkos::full_extent));
 }
 
 void read_fcidump_2body(std::string fname, col_major_span<double, 4> V) {
@@ -177,8 +177,34 @@ void read_fcidump_2body(std::string fname, double* V, size_t LDV) {
   auto norb = read_fcidump_norb(fname);
   col_major_span<double, 4> V_map(V, LDV, LDV, LDV, norb);
   auto sl = std::pair{0, norb};
-  read_fcidump_2body(
-      fname, KokkosEx::submdspan(V_map, sl, sl, sl, Kokkos::full_extent));
+  read_fcidump_2body(fname,
+                     Kokkos::submdspan(V_map, sl, sl, sl, Kokkos::full_extent));
+}
+
+bool is_2body_diagonal(std::string fname) {
+  auto norb = read_fcidump_norb(fname);
+  bool all_diag = true;
+
+  std::ifstream file(fname);
+  std::string line;
+  while(std::getline(file, line)) {
+    auto tokens = split(line, " ");
+    if(tokens.size() != 5) continue;  // not a valid FCIDUMP line
+
+    auto [p, q, r, s, integral] = fcidump_line(tokens);
+    auto lc = line_classification(p, q, r, s);
+    if(lc == LineClassification::TwoBody) {
+      p--;
+      q--;
+      r--;
+      s--;
+      if(p != q || r != s) {
+        all_diag = false;
+        break;
+      }
+    }
+  }
+  return all_diag;
 }
 
 void write_fcidump(std::string fname, size_t norb, const double* T, size_t LDT,
diff --git a/src/macis/gf/bandlan.cxx b/src/macis/gf/bandlan.cxx
new file mode 100644
index 00000000..2662ce72
--- /dev/null
+++ b/src/macis/gf/bandlan.cxx
@@ -0,0 +1,359 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include "macis/gf/bandlan.hpp"
+
+#include <random>
+
+inline bool is_file(const std::string &name) {
+  struct stat buffer;
+  return (stat(name.c_str(), &buffer) == 0);
+}
+
+namespace macis {
+bool QRdecomp(std::vector<double> &Q, std::vector<double> &R, int Qrows,
+              int Qcols) {
+  // CALL LAPACK'S QR DECOMPOSITION ROUTINES.
+  // INPUT: Q: INPUT MATRIX TO PERFORM A QR DECOMPOSITION FOR. MAY BE
+  // RECTANGULAR,
+  //           THE NUMBER OF COLUMNS WILL BE ALWAYS LESS THAN THE NUMBER OF
+  //           ROWS.
+  // OUTPUT: Q: Q MATRIX FROM THE DECOMPOSITION, OVERWRITES INPUT
+  //         R: R MATRIX FROM THE DECOMPOSITION. UPPER DIAGONAL, SQUARE.
+  //         return : TRUE FOR SUCCESS, FALSE OTHERWISE
+
+  R.clear();
+  // PREPARE VARIABLES TO CALL LAPACK
+  int M = Qrows, N = Qcols;
+  assert(M >= N);
+  int LDA = M, INFO = 0;
+  std::vector<double> A(M * N, 0.), TAU(N, 0.);
+
+  // INITIALIZE A
+  for(int i = 0; i < M; i++)
+    for(int j = 0; j < N; j++) A[i + j * M] = Q[i + j * M];
+  // COMPUTE R MATRIX
+  lapack::geqrf(M, N, A.data(), LDA, TAU.data());
+  // SAVE THE R MATRIX
+  R.resize(N * N);
+  for(int i = 0; i < N; i++)
+    for(int j = i; j < N; j++) R[i + j * N] = A[i + j * M];
+
+  // NOW, COMPUTE THE ACTUAL Q MATRIX
+  int K = N;
+  lapack::orgqr(M, N, K, A.data(), LDA, TAU.data());
+  // SAVE THE Q MATRIX
+  for(int i = 0; i < M; i++)
+    for(int j = 0; j < N; j++) Q[i + j * M] = A[i + j * M];
+
+  return true;
+}
+
+bool QRdecomp_tr(std::vector<double> &Q, std::vector<double> &R, int Qrows,
+                 int Qcols) {
+  // CALL LAPACK'S QR DECOMPOSITION ROUTINES.
+  // INPUT: Q: INPUT MATRIX TO PERFORM A QR DECOMPOSITION FOR. MAY BE
+  // RECTANGULAR,
+  //           THE NUMBER OF COLUMNS WILL BE ALWAYS LESS THAN THE NUMBER OF
+  //           ROWS.
+  // OUTPUT: Q: Q MATRIX FROM THE DECOMPOSITION, OVERWRITES INPUT
+  //         R: R MATRIX FROM THE DECOMPOSITION. UPPER DIAGONAL, SQUARE.
+  //         return : TRUE FOR SUCCESS, FALSE OTHERWISE
+
+  R.clear();
+  // PREPARE VARIABLES TO CALL LAPACK
+  int M = Qcols, N = Qrows;
+  assert(M >= N);
+  int LDA = M, INFO = 0;
+  std::vector<double> A(M * N, 0.), TAU(N, 0.);
+
+  // INITIALIZE A
+  for(int i = 0; i < M; i++)
+    for(int j = 0; j < N; j++) A[i + j * M] = Q[i + j * M];
+
+  // EVALUATE R MATRIX
+  lapack::geqrf(M, N, A.data(), LDA, TAU.data());
+  // SAVE THE R MATRIX
+  R.resize(N * N);
+  for(int i = 0; i < N; i++)
+    for(int j = i; j < N; j++) R[i + j * N] = A[i + j * M];
+
+  // NOW, COMPUTE THE ACTUAL Q MATRIX
+  int K = N;
+  lapack::orgqr(M, N, K, A.data(), LDA, TAU.data());
+  // SAVE THE Q MATRIX
+  for(int i = 0; i < M; i++)
+    for(int j = 0; j < N; j++) Q[i + j * M] = A[i + j * M];
+
+  return true;
+}
+
+bool GetEigsys(std::vector<double> &mat, std::vector<double> &eigvals,
+               std::vector<double> &eigvecs, int matsize) {
+  // COMPUTES THE EIGENVALUES AND EIGENVECTORS OF THE SYMMETRIC MATRIX mat BY
+  // CALLING LAPACK. WE ASSUME THE UPPER TRIANGULAR PART OF A IS STORED. FIRST,
+  // IT BRINGS THE MATRIX INTO TRIANGULAR FORM, THEN COMPUTES THE EIGENVALUES
+  // AND EIGENVECTORS. THESE ARE STORED IN eigvals AND eigvecs RESPECTIVELY. THE
+  // MATRIX mat IS ERASED DURING COMPUTATION
+  eigvals.clear();
+  eigvecs.clear();
+  // PREPARE VARIABLES FOR LAPACK
+  lapack::Uplo UPLO = lapack::Uplo::Upper;
+  lapack::Job JOBZ = lapack::Job::Vec;
+  int N = matsize, LDA = matsize;
+  std::vector<double> A(N * N, 0.), D(N, 0.);
+
+  // INITIALIZE A
+  for(int i = 0; i < N; i++)
+    for(int j = 0; j < N; j++) A[i + j * N] = mat[i + j * N];
+  mat.clear();
+  // COMPUTE EIGENVALUES AND EIGENVECTORS
+  lapack::heev_2stage(JOBZ, UPLO, N, A.data(), LDA, D.data());
+
+  // NOW, STORE THE EIGENVALUES AND EIGENVECTORS
+  eigvals.resize(N);
+  for(int i = 0; i < N; i++) eigvals[i] = D[i];
+  eigvecs.resize(N * N);
+  for(int i = 0; i < N; i++)
+    for(int j = 0; j < N; j++) eigvecs[i + j * N] = A[j + i * N];
+
+  return true;
+}
+
+bool GetEigsysBand(std::vector<double> &mat, int nSupDiag,
+                   std::vector<double> &eigvals, std::vector<double> &eigvecs,
+                   int matsize) {
+  // COMPUTES THE EIGENVALUES AND EIGENVECTORS OF THE SYMMETRIC BAND MATRIX mat
+  // BY CALLING LAPACK. WE ASSUME THE UPPER TRIANGULAR PART OF A IS STORED.
+  // FIRST, IT BRINGS THE MATRIX INTO TRIANGULAR FORM, THEN COMPUTES THE
+  // EIGENVALUES AND EIGENVECTORS. THESE ARE STORED IN eigvals AND eigvecs
+  // RESPECTIVELY. THE MATRIX mat IS ERASED DURING COMPUTATION
+  eigvals.clear();
+  eigvecs.clear();
+  // PREPARE VARIABLES FOR LAPACK
+  lapack::Uplo UPLO = lapack::Uplo::Upper;
+  lapack::Job VECT = lapack::Job::Vec;
+  lapack::Job COMPZ = lapack::Job::UpdateVec;
+  int N = matsize, LDQ = matsize, LDAB = nSupDiag + 1;
+  std::vector<double> AB((nSupDiag + 1) * N, 0.);
+  std::vector<double> D(N, 0.), E(N - 1, 0.), Q(N * N, 0.);
+
+  // INITIALIZE A
+  for(int j = 0; j < N; j++)
+    for(int i = std::max(0, j - nSupDiag); i <= j; i++)
+      AB[nSupDiag + i - j + j * (nSupDiag + 1)] = mat[i + j * N];
+  mat.clear();
+
+  // TRANSFORM THE MATRIX TO TRIDIAGONAL FORM
+  // NOW, TRANSFORM MATRIX TO TRIDIAGONAL FORM
+  lapack::sbtrd(VECT, UPLO, N, nSupDiag, AB.data(), LDAB, D.data(), E.data(),
+                Q.data(), LDQ);
+  AB.clear();
+
+  // FINALLY, COMPUTE THE EIGENVALUES AND EIGENVECTORS!
+  lapack::steqr(COMPZ, N, D.data(), E.data(), Q.data(), LDQ);
+
+  // NOW, STORE THE EIGENVALUES AND EIGENVECTORS
+  eigvals.resize(N);
+  for(int i = 0; i < N; i++) eigvals[i] = D[i];
+  D.clear();
+  eigvecs.resize(N * N);
+  for(int i = 0; i < N; i++)
+    for(int j = 0; j < N; j++) eigvecs[i + j * N] = Q[j + i * N];
+
+  return true;
+}
+
+void BandResolvent(
+    const sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t> >
+        &H,
+    std::vector<double> &vecs, const std::vector<std::complex<double> > &ws,
+    std::vector<std::vector<std::complex<double> > > &res, int nLanIts,
+    double E0, bool ispart, int nvecs, int len_vec, bool print,
+    bool saveGFmats) {
+  // COMPUTES THE RESOLVENT (ws - H)^-1 IN MATRIX FORM FOR THE "BASIS" GIVEN BY
+  // THE vecs VECTORS AND THE FREQUENCY GRID IN ws. USES THE BAND LANCZOS
+  // ALGORITHM. IT GETS STORED IN res.
+  using dbl = std::numeric_limits<double>;
+  res.clear();
+  std::cout << "RESOLVENT ROUTINE: ";
+  res.resize(ws.size(), std::vector<std::complex<double> >(
+                            nvecs * nvecs, std::complex<double>(0., 0.)));
+
+  // FIRST, COMPUTE QR DECOMPOSITION OF THE "BASIS" VECTORS vecs, NECESSARY FOR
+  // LANCZOS
+  std::vector<double> R;
+  std::cout << "QR DECOMPOSITION ...";
+  bool worked = QRdecomp_tr(vecs, R, nvecs, len_vec);
+  if(not worked) {
+    std::cout << "QR DECOMPOSITION FAILED!!" << std::endl;
+    return;
+  }
+  std::cout << "DONE! ";
+
+  if(print) {
+    std::ofstream ofile("QRresVecs.dat", std::ios::out);
+    ofile.precision(dbl::max_digits10);
+    ofile << "RESULT OF QR DECOMPOSITION: " << std::endl;
+    ofile << " New Vectors: " << std::endl;
+    for(int i = 0; i < len_vec; i++) {
+      for(int j = 0; j < nvecs; j++)
+        ofile << std::scientific << vecs[j * len_vec + i] << "    ";
+      ofile << std::endl;
+    }
+    ofile.close();
+    ofile.clear();
+    ofile.open("QRresRmat.dat", std::ios::out);
+    ofile << " R Matrix: " << std::endl;
+    for(int i = 0; i < nvecs; i++) {
+      for(int j = 0; j < nvecs; j++)
+        ofile << std::scientific << R[i + j * nvecs] << "  ";
+      ofile << std::endl;
+    }
+    ofile.close();
+  }
+
+  // NEXT, COMPUTE THE BAND LANCZOS
+  std::vector<double> bandH;
+  std::cout << "BAND LANCZOS ...";
+  SparseMatrixOperator Hop(H);
+  int nbands = nvecs;
+  BandLan<double>(Hop, vecs, bandH, nLanIts, nbands, len_vec, 1.E-6, print);
+  std::cout << "DONE! ";
+
+  if(print) {
+    std::ofstream ofile("BLH.dat", std::ios::out);
+    ofile.precision(dbl::max_digits10);
+    ofile << "RESULT OF BAND LANCZOS: " << std::endl;
+    ofile << " bandH Matrix: " << std::endl;
+    for(int i = 0; i < nLanIts; i++) {
+      for(int j = 0; j < nLanIts; j++)
+        ofile << std::scientific << bandH[i * nLanIts + j] << "  ";
+      ofile << std::endl;
+    }
+    ofile.close();
+  }
+
+  if(nvecs == 1) {
+    // ONLY ONE BAND. DIAGONAL GREEN'S FUNCTION ELEMENT.
+    // COMPUTE THROUGH CONTINUED FRACTION.
+    std::cout << "COMPUTING GF AS CONTINUED FRACTION...";
+    std::vector<double> alphas(nLanIts, 0.), betas(nLanIts, 0.);
+    for(int i = 0; i < nLanIts; i++)
+      alphas[i] =
+          ispart ? E0 - bandH[i * nLanIts + i] : bandH[i * nLanIts + i] - E0;
+    for(int i = 0; i < nLanIts - 1; i++)
+      betas[i + 1] =
+          ispart ? -bandH[i * nLanIts + i + 1] : bandH[i * nLanIts + i + 1];
+    betas[0] = R[0];
+#pragma omp parallel for
+    for(int indx_w = 0; indx_w < ws.size(); indx_w++) {
+      res[indx_w][0] =
+          betas.back() * betas.back() / (ws[indx_w] + alphas.back());
+      for(int i = betas.size() - 2; i >= 0; i--)
+        res[indx_w][0] =
+            betas[i] * betas[i] / (ws[indx_w] + alphas[i] - res[indx_w][0]);
+    }
+  } else {
+    // NEXT, COMPUTE THE EIGENVALUES AND EIGENVECTORS OF THE BAND DIAGONAL
+    // KRYLOV HAMILTONIAN
+    std::vector<double> eigvals;
+    std::vector<double> eigvecs;
+    std::cout << "COMPUTING EIGENVALES ...";
+    if(ispart)
+      for(int rr = 0; rr < nLanIts; rr++) {
+        bandH[rr * nLanIts + rr] = E0 - bandH[rr * nLanIts + rr];
+        for(int cc = rr + 1; cc < nLanIts; cc++) {
+          bandH[rr * nLanIts + cc] = -bandH[rr * nLanIts + cc];
+          bandH[cc * nLanIts + rr] = -bandH[cc * nLanIts + rr];
+        }
+      }
+    else
+      for(int rr = 0; rr < nLanIts; rr++)
+        bandH[rr * nLanIts + rr] = bandH[rr * nLanIts + rr] - E0;
+
+    GetEigsysBand(bandH, std::min(size_t(nvecs), size_t(nLanIts - 1)), eigvals,
+                  eigvecs, nLanIts);
+    if(print) {
+      std::ofstream ofile("BLEigs.dat", std::ios::out);
+      ofile.precision(dbl::max_digits10);
+      ofile << "RESULT OF EIGENVALUE CALCULATION: " << std::endl;
+      ofile << " Eigvals: [";
+      for(int i = 0; i < eigvals.size(); i++)
+        ofile << std::scientific << eigvals[i] << ", ";
+      ofile << std::endl;
+      ofile << "Eigvecs: " << std::endl;
+      for(int i = 0; i < nLanIts; i++) {
+        for(int j = 0; j < nLanIts; j++)
+          ofile << std::scientific << eigvecs[i + j * nLanIts] << "  ";
+        ofile << std::endl;
+      }
+      ofile.close();
+    }
+    std::cout << "DONE! ";
+    // FINALLY, COMPUTE S-MATRIX AND RESOLVENT
+    std::vector<double> S(nLanIts * nvecs, 0.);
+    std::cout << " COMPUTING S MATRIX ...";
+    for(int i_lan = 0; i_lan < nLanIts; i_lan++) {
+      for(int j_n = 0; j_n < nvecs; j_n++) {
+        for(int l = 0; l < nvecs; l++)
+          S[i_lan * nvecs + j_n] +=
+              eigvecs[i_lan + l * nLanIts] * R[l + j_n * nvecs];
+      }
+    }
+    if(saveGFmats) {
+      std::cout << "WRITING S MATRIX AND BAND-LANCZOS EIGENVALUES TO FILE!"
+                << std::endl;
+      std::string fprefix = ispart ? "particle" : "hole";
+      std::ofstream ofile(fprefix + "_S.mat", std::ios::out);
+      ofile.precision(dbl::max_digits10);
+      for(int i_lan = 0; i_lan < nLanIts; i_lan++) {
+        for(int k = 0; k < nvecs; k++)
+          ofile << std::scientific << S[i_lan * nvecs + k] << " ";
+        ofile << std::endl;
+      }
+      ofile.close();
+      ofile.open(fprefix + "_BLevals.dat", std::ios::out);
+      ofile.precision(dbl::max_digits10);
+      for(int i_lan = 0; i_lan < nLanIts; i_lan++)
+        ofile << std::scientific << eigvals[i_lan] << std::endl;
+      ofile.close();
+    }
+    std::cout << "DONE! COMPUTING RESOLVENT ...";
+    if(ispart) {
+#pragma omp parallel for
+      for(int iw = 0; iw < ws.size(); iw++) {
+        for(int k = 0; k < nvecs; k++) {
+          for(int l = 0; l < nvecs; l++) {
+            for(int i_lan = 0; i_lan < nLanIts; i_lan++) {
+              res[iw][k * nvecs + l] += S[i_lan * nvecs + k] * 1. /
+                                        (ws[iw] + eigvals[i_lan]) *
+                                        S[i_lan * nvecs + l];
+            }
+          }
+        }
+      }
+    } else {
+#pragma omp parallel for
+      for(int iw = 0; iw < ws.size(); iw++) {
+        for(int k = 0; k < nvecs; k++) {
+          for(int l = 0; l < nvecs; l++) {
+            for(int i_lan = 0; i_lan < nLanIts; i_lan++) {
+              res[iw][l * nvecs + k] += S[i_lan * nvecs + k] * 1. /
+                                        (ws[iw] + eigvals[i_lan]) *
+                                        S[i_lan * nvecs + l];
+            }
+          }
+        }
+      }
+    }
+  }
+  std::cout << "DONE!" << std::endl;
+}
+
+}  // namespace macis
diff --git a/src/macis/gf/eigsolver.cxx b/src/macis/gf/eigsolver.cxx
new file mode 100644
index 00000000..30d7f9eb
--- /dev/null
+++ b/src/macis/gf/eigsolver.cxx
@@ -0,0 +1,83 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include "macis/gf/eigsolver.hpp"
+
+/***Written by Carlos Mejuto Zaera***/
+
+namespace macis {
+
+void Hste_v(const std::vector<double> &alphas, const std::vector<double> &betas,
+            Eigen::VectorXd &eigvals, Eigen::MatrixXd &eigvecs) {
+  /*
+   * COMPUTES THE EIGENVALUES AND EIGENVECTORS OF A TRIDIAGONAL, SYMMETRIC
+   * MATRIX A USING LAPACK.
+   */
+  eigvals.resize(alphas.size());
+  eigvecs.resize(alphas.size(), alphas.size());
+  // INITIALIZE VARIABLES
+  // COMPUTE EIGENVALUES AND EIGENVECTORS OF THE TRIDIAGONAL MATRIX
+  lapack::Job JOBZ = lapack::Job::Vec;
+  int N = alphas.size(), LDZ = N;  // SIZES
+  std::vector<double> D, E;        // DIAGONAL AND SUB-DIAGONAL ELEMENTS
+  std::vector<double> Z;           // EIGENVECTORS
+  // INITIALIZE MATRIX
+  D.resize(N);
+  for(int64_t i = 0; i < N; i++) D[i] = alphas[i];
+  E.resize(N - 1);
+  for(int64_t i = 1; i < N; i++) E[i - 1] = betas[i];
+  // ALLOCATE MEMORY
+  Z.resize(N * LDZ);
+
+  // ACTUAL EIGENVALUE CALCULATION
+  lapack::steqr(JOBZ, N, D.data(), E.data(), Z.data(), LDZ);
+  // SAVE EIGENVECTORS
+  for(int i = 0; i < N; i++) {
+    for(int j = 0; j < N; j++) eigvecs(i, j) = Z[i + j * N];
+  }
+  Z.clear();
+  // SAVE EIGENVALUES
+  for(int i = 0; i < N; i++) eigvals(i) = D[i];
+}
+
+void Hsyev(const Eigen::MatrixXd &H, Eigen::VectorXd &eigvals,
+           Eigen::MatrixXd &eigvecs) {
+  /*
+   * COMPUTES THE EIGENVALUES AND EIGENVECTORS OF A SYMMETRIC MATRIX A USING
+   * LAPACK.
+   */
+  eigvals.resize(H.rows());
+  eigvecs.resize(H.rows(), H.rows());
+  // INITIALIZE VARIABLES
+  // COMPUTE EIGENVALUES AND EIGENVECTORS, H IS
+  // STORED IN THE UPPER TRIANGLE
+  lapack::Job JOBZ = lapack::Job::Vec;
+  lapack::Uplo UPLO = lapack::Uplo::Upper;
+  int N = H.rows(), LDA = N;  // SIZES
+  std::vector<double> A;      // MATRIX AND WORKSPACE
+  std::vector<double> W;      // EIGENVALUES AND WORKSPACE
+  // INITIALIZE MATRIX
+  A.resize(N * N);
+  for(int i = 0; i < N; i++) {
+    for(int j = 0; j < N; j++) A[i + j * N] = H(i, j);
+  }
+  // ALLOCATE MEMORY
+  W.resize(N);
+
+  // ACTUAL EIGENVALUE CALCULATION
+  lapack::syev(JOBZ, UPLO, N, A.data(), LDA, W.data());
+  // SAVE EIGENVECTORS
+  for(int i = 0; i < N; i++) {
+    for(int j = 0; j < N; j++) eigvecs(i, N - 1 - j) = A[i + j * N];
+  }
+  A.clear();
+  // SAVE EIGENVALUES
+  for(int i = 0; i < N; i++) eigvals(N - 1 - i) = W[i];
+}
+
+}  // namespace macis
diff --git a/src/macis/gf/gf.cxx b/src/macis/gf/gf.cxx
new file mode 100644
index 00000000..fb37bab2
--- /dev/null
+++ b/src/macis/gf/gf.cxx
@@ -0,0 +1,102 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include "macis/gf/gf.hpp"
+
+namespace macis {
+
+std::vector<std::complex<double> > GetGFFreqGrid(const GFSettings &settings) {
+  double wmin = settings.wmin;
+  double wmax = settings.wmax;
+  double eta = settings.eta;
+  double beta = settings.beta;
+  size_t nws = settings.nws;
+  bool real_g = settings.real_g;
+
+  std::complex<double> fact(0., 0.);
+  if(real_g)
+    fact = std::complex<double>(1., 0.);
+  else {
+    fact = std::complex<double>(0., 1.);
+    eta = 0.;
+  }
+
+  std::vector<std::complex<double> > ws(nws, std::complex<double>(0., 0.));
+
+  std::string scale = settings.w_scale;
+  if(scale == "lin") {
+    for(int i = 0; i < nws; i++)
+      ws[i] = fact * (wmin + (wmax - wmin) / double(nws - 1.) * double(i) +
+                      std::complex<double>(0., eta));
+  } else if(scale == "matsubara") {
+    if(real_g == true)
+      throw(
+          std::runtime_error("Error in GetGFFreqGrid! Asked for 'real' "
+                             "Matsubara frequency grid."));
+    for(int i = 0; i < nws; i++)
+      ws[i] = std::complex<double>(0., (2. * double(i) + 1.) * M_PI / beta);
+  } else if(scale == "log") {
+    if((wmin < 0. && wmax > 0.) || (wmin > 0. && wmax < 0.))
+      throw(std::runtime_error(
+          "Error in GetGFFreqGrid! Requested grid touches or passes by 0."));
+    for(int i = 0; i < nws; i++) {
+      double step = std::log(wmin) + (std::log(wmax) - std::log(wmin)) /
+                                         double(nws - 1.) * double(i);
+      ws[i] = fact * std::exp(step) + std::complex<double>(0., eta);
+    }
+  } else
+    throw(std::runtime_error(
+        "Error in GetGFFreqGrid! Frequency scale passed is not supported. "
+        "Options are 'lin', 'log' and 'matsubara'."));
+
+  return ws;
+}
+
+void write_GF(const std::vector<std::vector<std::complex<double> > > &GF,
+              const std::vector<std::complex<double> > &ws,
+              const std::vector<int> &GF_orbs, const std::vector<int> &todelete,
+              const bool is_part) {
+  using dbl = std::numeric_limits<double>;
+  size_t nfreqs = ws.size();
+  int GFmat_size = GF_orbs.size() - todelete.size();
+
+  if(GF_orbs.size() > 1) {
+    std::string fname = is_part ? "LanGFMatrix_ADD.dat" : "LanGFMatrix_SUB.dat";
+    std::ofstream ofile(fname);
+    ofile.precision(dbl::max_digits10);
+    for(int iii = 0; iii < nfreqs; iii++) {
+      ofile << std::scientific << real(ws[iii]) << " " << imag(ws[iii]) << " ";
+      for(int jjj = 0; jjj < GFmat_size; jjj++) {
+        for(int lll = 0; lll < GFmat_size; lll++)
+          ofile << std::scientific << real(GF[iii][jjj * GFmat_size + lll])
+                << " " << imag(GF[iii][jjj * GFmat_size + lll]) << " ";
+      }
+      ofile << std::endl;
+    }
+
+    std::string fname2 = is_part ? "GFMatrix_OrbitalIndices_ADD.dat"
+                                 : "GFMatrix_OrbitalIndices_SUB.dat";
+    std::ofstream ofile2(fname2);
+    for(int iii = 0; iii < GF_orbs.size(); iii++) {
+      if(std::find(todelete.begin(), todelete.end(), iii) != todelete.end())
+        continue;
+      ofile2 << GF_orbs[iii] << std::endl;
+    }
+  } else {
+    std::string fname = is_part ? "LanGF_ADD_" : "LanGF_SUB_";
+    fname += std::to_string(GF_orbs[0] + 1) + "_" +
+             std::to_string(GF_orbs[0] + 1) + ".dat";
+    std::ofstream ofile(fname);
+    ofile.precision(dbl::max_digits10);
+    for(int iii = 0; iii < nfreqs; iii++)
+      ofile << std::scientific << real(ws[iii]) << " " << imag(ws[iii]) << " "
+            << real(GF[iii][0]) << " " << imag(GF[iii][0]) << std::endl;
+  }
+}
+
+}  // namespace macis
diff --git a/src/macis/model/hubbard.cxx b/src/macis/model/hubbard.cxx
new file mode 100644
index 00000000..5e74b0d3
--- /dev/null
+++ b/src/macis/model/hubbard.cxx
@@ -0,0 +1,39 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include <macis/model/hubbard.hpp>
+
+namespace macis {
+
+void hubbard_1d(size_t nsites, double t, double U, std::vector<double>& T,
+                std::vector<double>& V, bool pbc) {
+  T.resize(nsites * nsites);
+  V.resize(nsites * nsites * nsites * nsites);
+
+  for(size_t p = 0; p < nsites; ++p) {
+    // Half-filling Chemical Potential
+    T[p * (nsites + 1)] = -U / 2;
+
+    // On-Site Interaction
+    V[p * (nsites * nsites * nsites + nsites * nsites + nsites + 1)] = U;
+
+    // Hopping
+    if(p < nsites - 1) {
+      T[p + (p + 1) * nsites] = -t;
+      T[(p + 1) + p * nsites] = -t;
+    }
+  }
+
+  // PBC for 1-D
+  if(pbc) {
+    T[(nsites - 1)] = -t;
+    T[(nsites - 1) * nsites] = -t;
+  }
+}
+
+}  // namespace macis
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 038f9dfd..1e539475 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -13,6 +13,7 @@ add_executable( macis_test
   fcidump.cxx 
   read_wavefunction.cxx
   double_loop.cxx
+  sd_build.cxx
   csr_hamiltonian.cxx
   davidson.cxx
   transform.cxx
@@ -20,6 +21,7 @@ add_executable( macis_test
   mcscf.cxx
   asci.cxx
   dist_quickselect.cxx
+  hubbard.cxx
 )
 target_link_libraries( macis_test PUBLIC macis Catch2::Catch2 )
 target_include_directories( macis_test PUBLIC ${PROJECT_BINARY_DIR}/tests )
diff --git a/tests/fock_matrices.cxx b/tests/fock_matrices.cxx
index 75a84a05..877c4e9b 100644
--- a/tests/fock_matrices.cxx
+++ b/tests/fock_matrices.cxx
@@ -93,15 +93,15 @@ TEST_CASE("Fock Matrices") {
     macis::matrix_span<double> Fi_span(Fi.data(), norb, norb);
 
     auto act_range = std::make_pair(ninact.get(), ninact.get() + nact.get());
-    auto Fi_active = macis::KokkosEx::submdspan(Fi_span, act_range, act_range);
+    auto Fi_active = Kokkos::submdspan(Fi_span, act_range, act_range);
     for(auto i = 0; i < nact.get(); ++i)
       for(auto j = 0; j < nact.get(); ++j) {
         REQUIRE(Ta(i, j) == Fi_active(i, j));
       }
 
     macis::rank4_span<double> V_span(V.data(), norb, norb, norb, norb);
-    auto V_act_span = macis::KokkosEx::submdspan(V_span, act_range, act_range,
-                                                 act_range, act_range);
+    auto V_act_span =
+        Kokkos::submdspan(V_span, act_range, act_range, act_range, act_range);
     macis::rank4_span<double> Va(V_active.data(), nact.get(), nact.get(),
                                  nact.get(), nact.get());
     for(auto i = 0; i < nact.get(); ++i)
diff --git a/tests/hubbard.cxx b/tests/hubbard.cxx
new file mode 100644
index 00000000..6f3d3553
--- /dev/null
+++ b/tests/hubbard.cxx
@@ -0,0 +1,64 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include <spdlog/sinks/null_sink.h>
+#include <spdlog/spdlog.h>
+
+#include <iostream>
+#include <macis/model/hubbard.hpp>
+
+#include "ut_common.hpp"
+
+TEST_CASE("Hubbard") {
+  ROOT_ONLY(MPI_COMM_WORLD);
+
+  const size_t nsites = 4;
+  const size_t nsites2 = nsites * nsites;
+  const size_t nsites3 = nsites2 * nsites;
+  const double t = 1.0, U = 4.0;
+  std::vector<double> T, V;
+
+  SECTION("1D") {
+    bool pbc;
+
+    SECTION("No PBC") { pbc = false; }
+
+    SECTION("PBC") { pbc = true; }
+
+    macis::hubbard_1d(nsites, t, U, T, V, pbc);
+
+    // Check two-body term
+    for(int p = 0; p < nsites; ++p)
+      for(int q = 0; q < nsites; ++q)
+        for(int r = 0; r < nsites; ++r)
+          for(int s = 0; s < nsites; ++s) {
+            const auto mat_el = V[p + nsites * q + nsites2 * r + nsites3 * s];
+            if(p == q and p == r and p == s)
+              REQUIRE(mat_el == U);
+            else
+              REQUIRE(mat_el == 0.0);
+          }
+
+    // Check 1-body term
+    for(int p = 0; p < nsites; ++p)
+      for(int q = 0; q < nsites; ++q) {
+        const auto mat_el = T[p + q * nsites];
+        if(p == q)
+          REQUIRE(mat_el == Approx(-U / 2));
+        else if(std::abs(p - q) == 1)
+          REQUIRE(mat_el == -t);
+        else if(pbc) {
+          if((p == 0 and q == nsites - 1) or (p == nsites - 1 and q == 0))
+            REQUIRE(mat_el == -t);
+          else
+            REQUIRE(mat_el == 0.0);
+        } else
+          REQUIRE(mat_el == 0.0);
+      }
+  }
+}
diff --git a/tests/ini_input.cxx b/tests/ini_input.cxx
index bee88cb1..2aff6f29 100644
--- a/tests/ini_input.cxx
+++ b/tests/ini_input.cxx
@@ -8,6 +8,8 @@
 
 #include "ini_input.hpp"
 
+#include <sstream>
+
 // Misc string functions
 
 /**
@@ -329,6 +331,24 @@ int INIFile::getData(std::string query) {
 
 }  // INIFile::getData<int>
 
+/**
+ *  \brief Specialization of getData to return vector<int> of query
+ *  data field
+ *
+ *  \param [in] query Formatted query string to be parsed
+ *  \return     Value of query data field as a vector<int>
+ */
+template <>
+std::vector<int> INIFile::getData(std::string query) {
+  std::string line = getData<std::string>(query);
+  std::istringstream iss(line);
+  std::vector<int> res;
+  int tmp;
+  while(iss >> tmp) res.push_back(tmp);
+  return res;
+
+}  // INIFile::getData<std::vector<int>>
+
 /**
  *  \brief Specialization of getData to return bool of query
  *  data field
@@ -344,6 +364,27 @@ bool INIFile::getData(std::string query) {
 
 }  // INIFile::getData<bool>
 
+/**
+ *  \brief Specialization of getData to return std::vector<bool> of query
+ *  data field
+ *
+ *  \param [in] query Formatted query string to be parsed
+ *  \return     Value of query data field as a std::vector<bool>
+ */
+template <>
+std::vector<bool> INIFile::getData(std::string query) {
+  std::string line = getData<std::string>(query);
+  std::istringstream iss(line);
+  std::vector<bool> res;
+  std::string tmp;
+  while(iss >> tmp) {
+    bool b = (not tmp.compare("TRUE") or not tmp.compare("ON"));
+    res.push_back(b);
+  }
+  return res;
+
+}  // INIFile::getData<std::vector<bool>>
+
 /**
  *  \brief Specialization of getData to return size_t of query
  *  data field
diff --git a/tests/ref_data/hubbard10.fci.dat b/tests/ref_data/hubbard10.fci.dat
new file mode 100644
index 00000000..aed59ff2
--- /dev/null
+++ b/tests/ref_data/hubbard10.fci.dat
@@ -0,0 +1,38 @@
+1  1  1  1      4.
+2  2  2  2      4.
+3  3  3  3      4.
+4  4  4  4      4.
+5  5  5  5      4.
+6  6  6  6      4.
+7  7  7  7      4.
+8  8  8  8      4.
+9  9  9  9      4.
+10 10 10 10     4.
+1  1  0  0     -2.
+2  2  0  0     -2.
+3  3  0  0     -2.
+4  4  0  0     -2.
+5  5  0  0     -2.
+6  6  0  0     -2.
+7  7  0  0     -2.
+8  8  0  0     -2.
+9  9  0  0     -2.
+10 10 0  0     -2.
+1  2  0  0     -1.
+2  1  0  0     -1.
+2  3  0  0     -1.
+3  2  0  0     -1.
+3  4  0  0     -1.
+4  3  0  0     -1.
+4  5  0  0     -1.
+5  4  0  0     -1.
+5  6  0  0     -1.
+6  5  0  0     -1.
+6  7  0  0     -1.
+7  6  0  0     -1.
+7  8  0  0     -1.
+8  7  0  0     -1.
+8  9  0  0     -1.
+9  8  0  0     -1.
+9  10 0  0     -1.
+10 9  0  0     -1.
diff --git a/tests/sd_build.cxx b/tests/sd_build.cxx
new file mode 100644
index 00000000..9a779920
--- /dev/null
+++ b/tests/sd_build.cxx
@@ -0,0 +1,116 @@
+/*
+ * MACIS Copyright (c) 2023, The Regents of the University of California,
+ * through Lawrence Berkeley National Laboratory (subject to receipt of
+ * any required approvals from the U.S. Dept. of Energy). All rights reserved.
+ *
+ * See LICENSE.txt for details
+ */
+
+#include <iomanip>
+#include <iostream>
+#include <macis/hamiltonian_generator/sd_build.hpp>
+#include <macis/util/cas.hpp>
+#include <macis/util/fcidump.hpp>
+#include <macis/util/fock_matrices.hpp>
+#include <macis/wavefunction_io.hpp>
+
+#include "ut_common.hpp"
+
+using macis::NumActive;
+using macis::NumInactive;
+using macis::NumOrbital;
+
+TEST_CASE("Single Double Build") {
+  ROOT_ONLY(MPI_COMM_WORLD);
+
+  auto norb = macis::read_fcidump_norb(hubbard10_fcidump);
+  const auto norb2 = norb * norb;
+  const auto norb3 = norb2 * norb;
+  const size_t nocc = 10;
+
+  std::vector<double> T(norb * norb);
+  std::vector<double> V(norb * norb * norb * norb);
+  auto E_core = macis::read_fcidump_core(hubbard10_fcidump);
+  macis::read_fcidump_1body(hubbard10_fcidump, T.data(), norb);
+  macis::read_fcidump_2body(hubbard10_fcidump, V.data(), norb);
+  bool just_singles = macis::is_2body_diagonal(hubbard10_fcidump);
+
+  using generator_type = macis::SDBuildHamiltonianGenerator<64>;
+
+#if 0
+  generator_type ham_gen(norb, V.data(), T.data());
+#else
+  generator_type ham_gen(
+      macis::matrix_span<double>(T.data(), norb, norb),
+      macis::rank4_span<double>(V.data(), norb, norb, norb, norb));
+  ham_gen.SetJustSingles(just_singles);
+#endif
+  const auto hf_det = macis::canonical_hf_determinant<64>(nocc, nocc);
+
+  std::vector<double> eps(norb);
+  for(auto p = 0ul; p < norb; ++p) {
+    double tmp = 0.;
+    for(auto i = 0ul; i < nocc; ++i) {
+      tmp += 2. * V[p * (norb + 1) + i * (norb2 + norb3)] -
+             V[p * (1 + norb3) + i * (norb + norb2)];
+    }
+    eps[p] = T[p * (norb + 1)] + tmp;
+  }
+  const auto EHF = ham_gen.matrix_element(hf_det, hf_det);
+
+  SECTION("HF Energy") { REQUIRE(EHF + E_core == Approx(-0.00)); }
+
+  SECTION("RDM") {
+    std::vector<double> ordm(norb * norb, 0.0), trdm(norb3 * norb, 0.0);
+    std::vector<std::bitset<64>> dets = {
+        macis::canonical_hf_determinant<64>(nocc, nocc)};
+
+    std::vector<double> C = {1.};
+
+    ham_gen.form_rdms(
+        dets.begin(), dets.end(), dets.begin(), dets.end(), C.data(),
+        macis::matrix_span<double>(ordm.data(), norb, norb),
+        macis::rank4_span<double>(trdm.data(), norb, norb, norb, norb));
+
+    auto E_tmp = blas::dot(norb2, ordm.data(), 1, T.data(), 1) +
+                 blas::dot(norb3 * norb, trdm.data(), 1, V.data(), 1);
+    REQUIRE(E_tmp == Approx(EHF));
+  }
+
+  SECTION("CI") {
+    size_t nalpha = 5;
+    size_t nbeta = 5;
+    size_t n_inactive = 0;
+    size_t n_active = 10;
+    size_t n_virtual = 0;
+    std::vector<double> C_local;
+    std::vector<double> active_ordm(n_active * n_active);
+    std::vector<double> active_trdm;
+    macis::MCSCFSettings mcscf_settings;
+    mcscf_settings.ci_max_subspace = 100;
+    // Copy integrals into active subsets
+    std::vector<double> T_active(n_active * n_active);
+    std::vector<double> V_active(n_active * n_active * n_active * n_active);
+
+    // Compute active-space Hamiltonian and inactive Fock matrix
+    std::vector<double> F_inactive(norb2);
+    macis::active_hamiltonian(NumOrbital(norb), NumActive(n_active),
+                              NumInactive(n_inactive), T.data(), norb, V.data(),
+                              norb, F_inactive.data(), norb, T_active.data(),
+                              n_active, V_active.data(), n_active);
+    auto E_inactive = macis::inactive_energy(NumInactive(n_inactive), T.data(),
+                                             norb, F_inactive.data(), norb);
+    auto dets = macis::generate_hilbert_space<64>(norb, nalpha, nbeta);
+    double E0 = macis::selected_ci_diag(
+        dets.begin(), dets.end(), ham_gen, mcscf_settings.ci_matel_tol,
+        mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol, C_local,
+        MACIS_MPI_CODE(MPI_COMM_WORLD, ) true, mcscf_settings.ci_nstates);
+    E0 += E_inactive + E_core;
+    // Compute RDMs
+    ham_gen.form_rdms(
+        dets.begin(), dets.end(), dets.begin(), dets.end(), C_local.data(),
+        macis::matrix_span<double>(active_ordm.data(), norb, norb),
+        macis::rank4_span<double>(active_trdm.data(), norb, norb, norb, norb));
+    REQUIRE(E0 == Approx(-2.538061882041e+01));
+  }
+}
diff --git a/tests/standalone_driver.cxx b/tests/standalone_driver.cxx
index 44794230..5db07c76 100644
--- a/tests/standalone_driver.cxx
+++ b/tests/standalone_driver.cxx
@@ -16,7 +16,9 @@
 #include <iostream>
 #include <macis/asci/grow.hpp>
 #include <macis/asci/refine.hpp>
+#include <macis/gf/gf.hpp>
 #include <macis/hamiltonian_generator/double_loop.hpp>
+#include <macis/hamiltonian_generator/sd_build.hpp>
 #include <macis/util/cas.hpp>
 #include <macis/util/detail/rdm_files.hpp>
 #include <macis/util/fcidump.hpp>
@@ -165,6 +167,8 @@ int main(int argc, char** argv) {
     OPT_KEYWORD("MCSCF.CI_MAX_SUB", mcscf_settings.ci_max_subspace, size_t);
     OPT_KEYWORD("MCSCF.CI_MATEL_TOL", mcscf_settings.ci_matel_tol, double);
 
+    OPT_KEYWORD("MCSCF.CI_NSTATES", mcscf_settings.ci_nstates, size_t);
+
     // ASCI Settings
     macis::ASCISettings asci_settings;
     std::string asci_wfn_fname, asci_wfn_out_fname;
@@ -307,6 +311,79 @@ int main(int argc, char** argv) {
           }
         }
 
+        // Testing GF
+        bool testGF = false;
+        OPT_KEYWORD("CI.GF", testGF, bool);
+        if(testGF) {
+          // Generate determinant list
+          auto dets = macis::generate_hilbert_space<generator_t::nbits>(
+              n_active, nalpha, nbeta);
+          // Generate the Hamiltonian Generator
+          generator_t ham_gen(
+              macis::matrix_span<double>(T_active.data(), n_active, n_active),
+              macis::rank4_span<double>(V_active.data(), n_active, n_active,
+                                        n_active, n_active));
+
+          // Generate frequency grid
+          double wmin = -8.;
+          double wmax = 8.;
+          size_t nws = 1001;
+          double beta = 157.;
+          double eta = 0.1;
+          std::complex<double> w0(wmin, eta);
+          std::complex<double> wf(wmax, eta);
+          std::vector<std::complex<double>> ws(nws,
+                                               std::complex<double>(0., 0.));
+          for(int i = 0; i < nws; i++)
+            ws[i] = w0 + (wf - w0) / double(nws - 1) * double(i);
+          // ws[i] = std::complex<double>(0., 1.) * (2. * i + 1.) * M_PI / beta;
+
+          // MCSCF Settings
+          macis::GFSettings gf_settings;
+          OPT_KEYWORD("GF.NORBS", gf_settings.norbs, size_t);
+          OPT_KEYWORD("GF.TRUNC_SIZE", gf_settings.trunc_size, size_t);
+          OPT_KEYWORD("GF.TOT_SD", gf_settings.tot_SD, int);
+          OPT_KEYWORD("GF.GFSEEDTHRES", gf_settings.GFseedThres, double);
+          OPT_KEYWORD("GF.ASTHRES", gf_settings.asThres, double);
+          OPT_KEYWORD("GF.USE_BANDLAN", gf_settings.use_bandLan, bool);
+          OPT_KEYWORD("GF.NLANITS", gf_settings.nLanIts, int);
+          OPT_KEYWORD("GF.WRITE", gf_settings.writeGF, bool);
+          OPT_KEYWORD("GF.PRINT", gf_settings.print, bool);
+          OPT_KEYWORD("GF.SAVEGFMATS", gf_settings.saveGFmats, bool);
+          OPT_KEYWORD("GF.ORBS_BASIS", gf_settings.GF_orbs_basis,
+                      std::vector<int>);
+          OPT_KEYWORD("GF.IS_UP_BASIS", gf_settings.is_up_basis,
+                      std::vector<bool>);
+          OPT_KEYWORD("GF.ORBS_COMP", gf_settings.GF_orbs_comp,
+                      std::vector<int>);
+          OPT_KEYWORD("GF.IS_UP_COMP", gf_settings.is_up_comp,
+                      std::vector<bool>);
+
+          // GF vector
+          std::vector<std::vector<std::complex<double>>> GF(
+              nws, std::vector<std::complex<double>>(
+                       n_active * n_active, std::complex<double>(0., 0.)));
+
+          // Occupation numbers
+          std::vector<double> occs(n_active, 1.);
+          for(int i = 0; i < n_active; i++)
+            occs[i] = active_ordm[i + n_active * i] / 2.;
+          std::cout << "Occupation Nrs.: ";
+          for(int i = 0; i < n_active; i++) std::cout << "  " << occs[i];
+          std::cout << std::endl;
+
+          // GS vector
+          Eigen::VectorXd psi0 = Eigen::Map<Eigen::VectorXd, Eigen::Unaligned>(
+              C_local.data(), C_local.size());
+
+          // Evaluate particle GF
+          macis::RunGFCalc<nwfn_bits>(GF, psi0, ham_gen, dets, E0, true, ws,
+                                      occs, gf_settings);
+          // Evaluate hole GF
+          macis::RunGFCalc<nwfn_bits>(GF, psi0, ham_gen, dets, E0, false, ws,
+                                      occs, gf_settings);
+        }
+
       } else {
         // Generate the Hamiltonian Generator
         generator_t ham_gen(
diff --git a/tests/ut_common.hpp.in b/tests/ut_common.hpp.in
index cb9cc085..ba5af02c 100644
--- a/tests/ut_common.hpp.in
+++ b/tests/ut_common.hpp.in
@@ -37,3 +37,5 @@ const std::string water_ccpvdz_nzval_fname  =
   REF_DATA_PREFIX "/h2o.ccpvdz.cisd.nzval.bin";
 const std::string ch4_wfn_fname =
   REF_DATA_PREFIX "/ch4.wfn.dat";
+const std::string hubbard10_fcidump =
+  REF_DATA_PREFIX "/hubbard10.fci.dat";