Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Green's Functions #13

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e95c43f
Added GF functionalities. TODO: Fix the outputs to console, polish th…
CarlosMejZ Jun 2, 2023
dbea9ae
Filling in the doxygen docu for the GF-relevant routines.
CarlosMejZ Jun 2, 2023
88353d3
Committing license headers
Jun 2, 2023
c4704b2
Merge branch 'master' into feature/gf
Jun 2, 2023
90c234a
Committing clang-format changes
Jun 2, 2023
2f9d5ee
Remove extra omp.h inclusion
Jun 2, 2023
e23c537
Committing clang-format changes
Jun 2, 2023
a5946ab
Merge branch 'master' into feature/gf
Jun 2, 2023
2ffd1e2
Applied review comments from pull request, started reformulating the …
CarlosMejZ Jun 5, 2023
91e506b
Committing clang-format changes
Jun 5, 2023
5175a96
Added Hubbard Hamiltonian generator + UT
Jun 7, 2023
ebb7ef4
Committing clang-format changes
Jun 7, 2023
84b219d
Committing license headers
Jun 7, 2023
65f56b3
Add PBC
Jun 13, 2023
e0ef771
Committing clang-format changes
Jun 13, 2023
9ab4627
Merge branch 'master' into feature/model
Jun 13, 2023
414fdd1
Merge branch 'master' into feature/gf
Jun 13, 2023
302feef
Implemented suggestions for GF routines. Most importantly, vectorized…
CarlosMejZ Jun 15, 2023
a806f13
Committing clang-format changes
Jun 15, 2023
f02e8b0
Cleaned GF routines from omp pragmas, only one remaining to paralleli…
CarlosMejZ Jun 21, 2023
a08ebf0
Committing clang-format changes
Jun 21, 2023
5baea2e
BUG FIXED: Passing wrong vector length to Band Lanczos. Also, include…
CarlosMejZ Sep 11, 2023
74702c2
Committing clang-format changes
Sep 11, 2023
d56989c
Included new HamiltonianGenerator, particularly efficient for impurit…
CarlosMejZ Sep 19, 2023
3241bc4
Merged with feature/model, to get the single-double Hamiltonian Gener…
CarlosMejZ Sep 20, 2023
d4a7e4a
Adding clang format changes from bot.
CarlosMejZ Sep 20, 2023
3d3277d
Committing clang-format changes
Sep 20, 2023
41cc116
Added option to read in vectors of ints and bools from input file, to…
CarlosMejZ Sep 22, 2023
a51b754
Committing clang-format changes
Sep 22, 2023
ff65ee7
Fixed Bug in GF calculation: Call to band diagonalization through Lap…
CarlosMejZ Oct 5, 2023
b797e30
Committing clang-format changes
Oct 5, 2023
f838450
Added functionalities to specify GF frequency grid.
CarlosMejZ Oct 9, 2023
65db897
Committing clang-format changes
Oct 9, 2023
93e48e8
Updating calls to submdspan, since mdspan library changed in which na…
CarlosMejZ Nov 6, 2023
18415d5
Committing clang-format changes
Nov 6, 2023
e38bdbf
Added option to ask for multiple eigenstates in selected_ci_diag. Cal…
CarlosMejZ Oct 28, 2024
fa2442a
Committing clang-format changes
Oct 28, 2024
b2d334e
Added minor modification to make Band Lanczos computation of the GF c…
CarlosMejZ Nov 26, 2024
365a871
Committing clang-format changes
Nov 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions include/macis/asci/determinant_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,12 +539,13 @@ std::vector<wfn_t<N>> asci_search(
// and keep only the duplicate with positive coefficient.
keep_only_largest_copy_asci_pairs(asci_pairs);

asci_pairs.erase(std::partition(asci_pairs.begin(), asci_pairs.end(),
[](const auto& p) { return p.rv < 0.0; }),
asci_pairs.end());
// asci_pairs.erase(std::partition(asci_pairs.begin(), asci_pairs.end(),
// [](const auto& p) { return p.rv < 0.0; }),
// asci_pairs.end());

// Only do top-K on (ndets_max - ncdets) b/c CDETS will be added later
const size_t top_k_elements = ndets_max - ncdets;
// const size_t top_k_elements = ndets_max - ncdets;
const size_t top_k_elements = ndets_max;

auto keep_large_en = clock_type::now();
duration_type keep_large_dur = keep_large_en - keep_large_st;
Expand Down Expand Up @@ -650,7 +651,7 @@ std::vector<wfn_t<N>> asci_search(
[](auto x) { return x.state; });

// Insert the CDETS back in
new_dets.insert(new_dets.end(), cdets_begin, cdets_end);
// new_dets.insert(new_dets.end(), cdets_begin, cdets_end);
new_dets.shrink_to_fit();

logger->info(" * New Dets Mem = {:.2e} GiB", to_gib(new_dets));
Expand Down
5 changes: 3 additions & 2 deletions include/macis/asci/grow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
dur_t ai_dur = ai_en - ai_st;
logger->trace(" * ASCI_ITER_DUR = {:.2e} ms", ai_dur.count());
if(ndets_new > wfn.size())
throw std::runtime_error("Wavefunction didn't grow enough...");
logger->info("Wavefunction didn't grow enough...");
// throw std::runtime_error("Wavefunction didn't grow enough...");

logger->info(fmt_string, iter++, E, E - E0, wfn.size());
if(asci_settings.grow_with_rot and
Expand Down Expand Up @@ -127,7 +128,7 @@ auto asci_grow(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
selected_ci_diag(
wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol,
mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol,
X_local MACIS_MPI_CODE(, comm));
X_local MACIS_MPI_CODE(, comm), false, mcscf_settings.ci_nstates);

if(world_size > 1) {
#ifdef MACIS_ENABLE_MPI
Expand Down
3 changes: 2 additions & 1 deletion include/macis/asci/iteration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
auto E = selected_ci_diag<N, index_t>(
wfn.begin(), wfn.end(), ham_gen, mcscf_settings.ci_matel_tol,
mcscf_settings.ci_max_subspace, mcscf_settings.ci_res_tol,
X_local MACIS_MPI_CODE(, comm));
X_local MACIS_MPI_CODE(, comm), false, mcscf_settings.ci_nstates);

#ifdef MACIS_ENABLE_MPI
auto world_size = comm_size(comm);
Expand Down Expand Up @@ -64,6 +64,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
X = std::move(X_local); // Serial
#endif

if(wfn.size() > 1) reorder_ci_on_coeff(wfn, X);
return std::make_tuple(E, wfn, X);
}

Expand Down
3 changes: 3 additions & 0 deletions include/macis/bitset_operations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
*/

#pragma once
#include <string.h>
#include <strings.h>

#include <bit>
#include <cassert>
#include <climits>
Expand Down
279 changes: 279 additions & 0 deletions include/macis/gf/bandlan.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
/*
* MACIS Copyright (c) 2023, The Regents of the University of California,
* through Lawrence Berkeley National Laboratory (subject to receipt of
* any required approvals from the U.S. Dept. of Energy). All rights reserved.
*
* See LICENSE.txt for details
*/

/**
* @file bandlan.h++
*
* @brief Implements simple Band Lanczos routine.
*
* @author Carlos Mejuto Zaera
* @date 25/04/2022
*/
#pragma once
#include <assert.h>
#include <sys/stat.h>

#include <complex>
#include <fstream>
#include <iomanip>
#include <limits>
#include <lobpcgxx/lobpcg.hpp>
#include <map>
#include <sparsexx/matrix_types/csr_matrix.hpp>
#include <sparsexx/spblas/pspmbv.hpp>
#include <sparsexx/spblas/spmbv.hpp>
#include <utility>

#include "macis/solvers/davidson.hpp"

namespace macis {

/**
* @ brief Wrapper for QR decomposition in LAPACK, basically
* calling dgeqrf and dorgqr to evaluate the R and
* Q matrices, which are returned. Here, the input
* matrix has more rows than columns.
*
* @param[inout] std::vector<double> &Q: On input,
* matrix for which to evaluate the QR decomposition.
* On output, Q-matrix.
* @param[out] std::vector<double> &R: On output, R
* matrix in the QR decomposition.
* @param[in] int Qrows: Nr. of rows in matrix Q.
* @param[in] int Qcols: Nr. of cols in matrix Q.
*
* @returns bool: Error code from LAPACK routines.
*
* @author Carlos Mejuto-Zaera
* @date 25/04/2022
*/
bool QRdecomp(std::vector<double> &Q, std::vector<double> &R, int Qrows,
int Qcols);

/**
* @ brief Wrapper for QR decomposition in LAPACK, basically
* calling dgeqrf and dorgqr to evaluate the R and
* Q matrices, which are returned. Here, the input
* matrix has more columns than rows.
*
* @param[inout] std::vector<double> &Q: On input,
* matrix for which to evaluate the QR decomposition.
* On output, Q-matrix.
* @param[out] std::vector<double> &R: On output, R
* matrix in the QR decomposition.
* @param[in] int Qrows: Nr. of rows in Q.
* @param[in] int Qcols: Nr. of cols in Q.
*
* @returns bool: Error code from LAPACK routines.
*
* @author Carlos Mejuto-Zaera
* @date 25/04/2022
*/
bool QRdecomp_tr(std::vector<double> &Q, std::vector<double> &R, int Qrows,
int Qcols);

/**
* @brief Wrapper to LAPACK routine to evaluate the eigenvectors
* and eigenvalues of the symmetric matrix mat.
*
* @param[inout] std::vector<double> &mat: Matrix for
* which to compute the eigenvalues/vectors. Erased
* during computation.
* @param[out] std::vector<double> &eigvals: Eigenvalues, sorted from smallest
* to largest.
* @param[out] std::vector<double> &eigvecs: Eigenvectors,
* stored as row vectors.
* @param[in] int matsize: Nr. of rows/columns of the square matrix mat.
*
* @returns bool: Error code from LAPACK.
*
* @author Carlos Mejuto Zaera
* @date 25/04/2022
*/
bool GetEigsys(std::vector<double> &mat, std::vector<double> &eigvals,
std::vector<double> &eigvecs, int matsize);

/**
* @brief Wrapper to LAPACK routine to evaluate the eigenvectors
* and eigenvalues of the symmetric band matrix mat.
*
* @param[inout] std::vector<double> &mat: Matrix for
* which to compute the eigenvalues/vectors. Erased
* during computation.
* @param[in] int nSupDiag: Nr. of bands.
* @param[out] std::vector<double> &eigvals: Eigenvalues, sorted from smallest
* to largest.
* @param[out] std::vector<double> &eigvecs: Eigenvectors,
* stored as row vectors.
* @param[in] int matsize: Nr. of rows/cols of the square matrix mat.
*
* @returns bool: Error code from LAPACK.
*
* @author Carlos Mejuto Zaera
* @date 25/04/2022
*/
bool GetEigsysBand(std::vector<double> &mat, int nSupDiag,
std::vector<double> &eigvals, std::vector<double> &eigvecs,
int matsize);

/**
* @brief Perform a band Lanczos calculation on the Hamiltonian operator H,
* starting from vectors qs, for at most nLanIts iterations. The resulting
* band-diagonal matrix Hamiltonian will be stored in bandH. Note that this
* implementation does not account for deflations (i.e., pruning the span of the
* qs for linear dependencies in higher powers of H).
*
* @param[in] const sparseexx::csr_matrix<double, int32_t> &H: Hamiltonian
* oprator. Just needs to implement a matrix vector product.
* @param[in] std::vector<Cont> &qs: Initial set of vetors to
* perform the band Lanczos on. Deleted on exit.
* @param[in] std::vector<Cont> &bandH: On exit, band-diagonal
* Hamiltonian approximation.
* @param[inout] int &nLanIts: Number of Lanczos iterations to perform.
* @param[in] double thres: Threshold determining when to ignore beta's for
* being too small.
* @param[in] bool print: If true, write intermediate results to file.
*
* @author Carlos Mejuto Zaera
* @date 25/04/2022
*/
template <class Cont, class Functor>
void BandLan(const Functor &H, std::vector<Cont> &qs, std::vector<Cont> &bandH,
int &nLanIts, int nbands, int N, double thres = 1.E-6,
bool print = false) {
// BAND LANCZOS ROUTINE. TAKES AS INPUT THE HAMILTONIAN H, INITIAL VECTORS qs
// AND RETURNS THE BAND HAMILTONIAN bandH. IT PERFORMS nLanIts ITERATIONS,
// STOPPING IF THE NORM OF ANY NEW KRYLOV VECTOR IS BELOW thres. IF LANCZOS IS
// STOPPED PREMATURELY , nLanIts IS OVERWRITTEN WITH THE ACTUAL NUMBER OF
// ITERATIONS! THE qs VECTOR IS ERASED AT THE END OF THE CALCULATION
using dbl = std::numeric_limits<double>;
bandH.clear();
bandH.resize(nLanIts * nLanIts, 0.);

auto VecNorm = [](const std::vector<Cont> &vec) -> double {
return std::sqrt(
std::real(blas::dot(vec.size(), vec.data(), 1, vec.data(), 1)));
};

// MAKE SPACE FOR 2 * nbands VECTORS
qs.resize(2 * nbands * N);
std::vector<Cont> temp(N, 0.);
if(print) {
for(int i = 0; i < nbands; i++) {
std::ofstream ofile("lanvec_" + std::to_string(i + 1) + ".dat",
std::ios::out);
ofile.precision(dbl::max_digits10);
for(size_t el = 0; el < N; el++)
ofile << std::scientific << qs[el + i * N] << std::endl;
ofile.close();
}
}
// DICTIONARY TO KEEP THE REAL INDICES OF THE KRYLOV VECTORS
// THIS IS NECESSARY IN ORDER TO ONLY STORE 2* nbands OF THEM
// AT ANY POINT IN TIME, PLUS ONE SCRATCH VECTOR TO BE DEFINED
// INSIDE THE FOR LOOP
std::vector<int> true_indx(nLanIts + 1);
for(int i = 0; i < nbands; i++) true_indx[i + 1] = i;
int next_indx = nbands;

for(int it = 1; it <= nLanIts; it++) {
int band_indx_i =
true_indx[it]; // TO WHAT ELEMENT OF THE VECTOR SET DO WE APPLY THIS
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this indirection here? Generally we want to avoid things like this (to e.g. allow for better usage of Level-3 BLAS when possible), but if its completely necessary, it's fine.

Copy link
Collaborator Author

@CarlosMejZ CarlosMejZ Jun 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean the use of the effective index true_indx? (I'm afraid I don't know what you mean by indirection)

This is what I came up with to apply the matvecs in the right order to the right vector, since in band Lanczos, unlike in the regular one, at every iteration you act on a different vector in your set of nbands. Why is this a problem for BLAS? Shouldn't it be fine as long as you pass the right memory sector to act on?

I guess that a way to avoid this true_indx variable could be to turn the for loop over it into two, one external loop going over nLanIts // nbands and one internal loop going over nbands. I think that should be completely equivalent, and then the index in the internal loop would be essentially iterating over which vector in the basis to act on with the Hamiltonian. Do you think this would make the code better?

H.operator_action(1, 1., qs.data() + band_indx_i * N, N, 0., temp.data(),
N);
if(print) {
std::ofstream ofile("Htimes_lanvec_" + std::to_string(it) + ".dat",
std::ios::out);
ofile.precision(dbl::max_digits10);
for(size_t el = 0; el < temp.size(); el++)
ofile << std::scientific << temp[el] << std::endl;
ofile.close();
}
for(int jt = std::max(1, it - nbands); jt <= std::min(it - 1, nLanIts);
jt++) {
int band_indx_j = true_indx[jt];
blas::axpy(N, -1. * bandH[(it - 1) * nLanIts + jt - 1],
qs.data() + N * band_indx_j, 1, temp.data(), 1);
}
for(int jt = it; jt <= std::min(it + nbands - 1, nLanIts); jt++) {
int band_indx_j = true_indx[jt];
bandH[(it - 1) * nLanIts + jt - 1] =
blas::dot(N, temp.data(), 1, qs.data() + band_indx_j * N, 1);
bandH[(jt - 1) * nLanIts + it - 1] = bandH[(it - 1) * nLanIts + jt - 1];
blas::axpy(N, -1. * bandH[(it - 1) * nLanIts + jt - 1],
qs.data() + N * band_indx_j, 1, temp.data(), 1);
}
if(it + nbands <= nLanIts) {
bandH[(it - 1) * nLanIts + it + nbands - 1] = VecNorm(temp);
bandH[(it + nbands - 1) * nLanIts + it - 1] =
bandH[(it - 1) * nLanIts + it + nbands - 1];
true_indx[it + nbands] = next_indx;
if(std::abs(bandH[(it - 1) * nLanIts + it + nbands - 1]) < thres) {
// std::cout
// << "BAND LANCZOS STOPPED PREMATURELY DUE TO SMALL NORM! NAMELY "
// << bandH[(it - 1) * nLanIts + it + nbands - 1]
// << ", STOPPED AT ITERATION: " << it << std::endl;
// nLanIts = it;
// bandH.resize(nLanIts * nLanIts);
// break;
blas::scal(N, 0., qs.data() + true_indx[it + nbands] * N, 1);
std::cout << "FOUND A ZERO VECTOR AT POSITION " << next_indx
<< std::endl;
} else {
blas::copy(N, temp.data(), 1, qs.data() + true_indx[it + nbands] * N,
1);
blas::scal(N, 1. / bandH[(it - 1) * nLanIts + it + nbands - 1],
qs.data() + true_indx[it + nbands] * N, 1);
if(print) {
std::ofstream ofile("lanvec_" + std::to_string(it + nbands) + ".dat",
std::ios::out);
ofile.precision(dbl::max_digits10);
for(size_t el = 0; el < N; el++)
ofile << std::scientific << qs[true_indx[it + nbands] * N + el]
<< std::endl;
ofile.close();
}
}
next_indx = (next_indx + 1 >= 2 * nbands) ? 0 : next_indx + 1;
}
}
qs.clear();
}

/**
* @brief Evaluates the expectation values of the resolvent of Hamiltonian H
* along a frequency grid ws with respect to the vectors vecs, using the band
* Lanczos algorithm.
*
* @param[in] const sparsex::dist_sparse_matrix<sparsexx::csr_matrix<double,
* int32_t> > &H: Hamiltonian operator.
* @param[in] std::vector<double> &vecs: Vectors for which to
* compute the resolvent matrix elements in format res[freq.][iorb1 * norbs +
* iorb2].
* @param[in] std::vector<std::complex<double> > &ws: Frequency grid over which
* to evaluate the resolvent.
* @param[out] std::vector<std::vector<std::complex<double> > >
* &res: On exit, resolvent elements.
* @param[in] int nLanIts: Max number of iterations.
* @param[in] double E0: Ground state energy, for shifting the resolvent.
* @param[in] bool ispart: If true, computes resolvent for particle GF,
* otherwise for hole GF.
* @param[in] bool print: If true, write intermediate results to file.
*
* @author Carlos Mejuto Zaera
* @date 25/04/2022
*/
void BandResolvent(
const sparsexx::dist_sparse_matrix<sparsexx::csr_matrix<double, int32_t> >
&H,
std::vector<double> &vecs, const std::vector<std::complex<double> > &ws,
std::vector<std::vector<std::complex<double> > > &res, int nLanIts,
double E0, bool ispart, int nvecs, int len_vec, bool print = false,
bool saveGFmats = false);

} // namespace macis
Loading