Skip to content

Commit

Permalink
Add preallocation for thread insetion in ASCI search, misc print impr…
Browse files Browse the repository at this point in the history
…ovements
  • Loading branch information
David Williams-Young committed Nov 6, 2023
1 parent 6521725 commit a9a0208
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 14 deletions.
26 changes: 17 additions & 9 deletions include/macis/asci/determinant_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ asci_contrib_container<wfn_t<N>> asci_contributions_standard(
#ifdef MACIS_ENABLE_MPI
template <size_t N>
asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
ASCISettings asci_settings, wavefunction_iterator_t<N> cdets_begin,
ASCISettings asci_settings, const size_t ntdets,
wavefunction_iterator_t<N> cdets_begin,
wavefunction_iterator_t<N> cdets_end, const double E_ASCI,
const std::vector<double>& C, size_t norb, const double* T_pq,
const double* G_red, const double* V_red, const double* G_pqrs,
Expand Down Expand Up @@ -266,7 +267,7 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
logger->info(" * GEN_DUR = {:.2e} ms", gen_c_dur.count());

size_t max_size =
std::min(asci_settings.pair_size_max,
std::min(std::min(ntdets,asci_settings.pair_size_max),
ncdets * (n_sing_alpha + n_sing_beta + // AA + BB
n_doub_alpha + n_doub_beta + // AAAA + BBBB
n_sing_alpha * n_sing_beta // AABB
Expand Down Expand Up @@ -298,8 +299,8 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
const size_t c_end = std::min(ncon_total, ic + ntake);
for(; ic < c_end; ++ic) {
const auto& con = constraints[ic].first;
printf("[rank %4d tid:%4d] %10lu / %10lu\n", world_rank,
omp_get_thread_num(), ic, ncon_total);
//printf("[rank %4d tid:%4d] %10lu / %10lu\n", world_rank,
// omp_get_thread_num(), ic, ncon_total);

for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) {
const auto& alpha_det = uniq_alpha[i_alpha].first;
Expand Down Expand Up @@ -384,11 +385,15 @@ asci_contrib_container<wfn_t<N>> asci_contributions_constraint(
// Insert into list
#pragma omp critical
{
printf("[rank %4d tid:%4d] BEFORE\n", world_rank, omp_get_thread_num());
asci_pairs_total.insert(asci_pairs_total.end(), asci_pairs.begin(),
asci_pairs.end());
if(asci_pairs_total.size()) {
// Preallocate space for insertion
asci_pairs_total.reserve(asci_pairs.size() + asci_pairs_total.size());
asci_pairs_total.insert(asci_pairs_total.end(), asci_pairs.begin(),
asci_pairs.end());
} else {
asci_pairs_total = std::move(asci_pairs);
}
asci_contrib_container<wfn_t<N>>().swap(asci_pairs);
printf("[rank %4d tid:%4d] AFTER\n", world_rank, omp_get_thread_num());
}

} // OpenMP
Expand Down Expand Up @@ -457,7 +462,7 @@ std::vector<wfn_t<N>> asci_search(
// #ifdef MACIS_ENABLE_MPI
// else
asci_pairs = asci_contributions_constraint(
asci_settings, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, G_red,
asci_settings, ndets_max, cdets_begin, cdets_end, E_ASCI, C, norb, T_pq, G_red,
V_red, G_pqrs, V_pqrs, ham_gen MACIS_MPI_CODE(, comm));
// #endif
auto pairs_en = clock_type::now();
Expand All @@ -469,6 +474,8 @@ std::vector<wfn_t<N>> asci_search(
size_t npairs = asci_pairs.size();
#endif
logger->info(" * ASCI Kept {} Pairs", npairs);
if(npairs < ndets_max)
logger->info(" * WARNING: Kept ASCI pairs less than requested TDETS");

#ifdef MACIS_ENABLE_MPI
if(world_size > 1) {
Expand Down Expand Up @@ -596,6 +603,7 @@ std::vector<wfn_t<N>> asci_search(
dist_quickselect(scores.begin(), scores.end(), top_k_elements, comm,
std::greater<double>{}, std::equal_to<double>{});

logger->info(" * Kth Score Pivot = {.2e}", kth_score);
// Partition local pairs into less / eq batches
auto [g_begin, e_begin, l_begin, _end] = leg_partition(
asci_pairs.begin(), asci_pairs.end(), kth_score,
Expand Down
16 changes: 11 additions & 5 deletions include/macis/util/mpi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ REGISTER_MPI_TYPE(int, MPI_INT);
REGISTER_MPI_TYPE(double, MPI_DOUBLE);
REGISTER_MPI_TYPE(float, MPI_FLOAT);
REGISTER_MPI_TYPE(size_t, MPI_UINT64_T);
REGISTER_MPI_TYPE(int64_t, MPI_INT64_T);

#undef REGISTER_MPI_TYPE

Expand Down Expand Up @@ -240,13 +241,13 @@ class global_atomic {

global_atomic() = delete;

global_atomic(MPI_Comm comm) {
global_atomic(MPI_Comm comm, T init = 0) {
MPI_Win_allocate(sizeof(T), sizeof(T), MPI_INFO_NULL, comm, &buffer_,
&window_);
if(window_ == MPI_WIN_NULL) {
throw std::runtime_error("Window creation failed");
}
*buffer_ = 0;
*buffer_ = init;
MPI_Win_lock_all(MPI_MODE_NOCHECK, window_);
}

Expand All @@ -257,14 +258,19 @@ class global_atomic {

global_atomic(const global_atomic&) = delete;
global_atomic(global_atomic&&) noexcept = delete;
T fetch_and_add(T val) {

T fetch_and_op(T val, MPI_Op op) {
T next_val;
MPI_Fetch_and_op(&val, &next_val, mpi_traits<T>::datatype(), 0, 0, MPI_SUM,
MPI_Fetch_and_op(&val, &next_val, mpi_traits<T>::datatype(), 0, 0, op,
window_);
MPI_Win_flush(0,window_);
return next_val;
}

T fetch_and_add(T val) { return fetch_and_op(val, MPI_SUM); }
T fetch_and_min(T val) { return fetch_and_op(val, MPI_MIN); }


};


Expand Down

0 comments on commit a9a0208

Please sign in to comment.