diff --git a/include/macis/asci/determinant_search.hpp b/include/macis/asci/determinant_search.hpp index 4f3a1ccc..b01eb25f 100644 --- a/include/macis/asci/determinant_search.hpp +++ b/include/macis/asci/determinant_search.hpp @@ -157,7 +157,7 @@ asci_contrib_container> asci_contributions_constraint( using wfn_traits = wavefunction_traits>; using spin_wfn_type = spin_wfn_t>; using spin_wfn_traits = wavefunction_traits; - using wfn_comp = typename wfn_traits::spin_comparator; + using wfn_comp = typename wfn_traits::spin_comparator; if(!std::is_sorted(cdets_begin, cdets_end, wfn_comp{})) throw std::runtime_error("PT2 Only Works with Sorted Wfns"); @@ -264,20 +264,19 @@ asci_contrib_container> asci_contributions_constraint( } }; - auto uniq_alpha = get_unique_alpha(cdets_begin, cdets_end); const size_t nuniq_alpha = uniq_alpha.size(); std::vector> uniq_alpha_wfn(nuniq_alpha); - std::transform(uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_wfn.begin(), - [](const auto& p) { return wfn_traits::from_spin(p.first,0); }); - + std::transform( + uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_wfn.begin(), + [](const auto& p) { return wfn_traits::from_spin(p.first, 0); }); using unique_alpha_data = std::vector; std::vector uad(nuniq_alpha); for(auto i = 0, iw = 0; i < nuniq_alpha; ++i) { std::vector occ_alpha, vir_alpha; - spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, - occ_alpha, vir_alpha); + spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, occ_alpha, + vir_alpha); const auto nbeta = uniq_alpha[i].second; uad[i].reserve(nbeta); @@ -289,7 +288,7 @@ asci_contrib_container> asci_contributions_constraint( #endif - //const auto n_occ_alpha = wfn_traits::count(uniq_alpha_wfn[0]); + // const auto n_occ_alpha = wfn_traits::count(uniq_alpha_wfn[0]); const auto n_occ_alpha = spin_wfn_traits::count(uniq_alpha[0].first); const auto n_vir_alpha = norb - n_occ_alpha; const auto n_sing_alpha = n_occ_alpha * n_vir_alpha; @@ -327,9 +326,9 @@ asci_contrib_container> asci_contributions_constraint( } auto gen_c_st = clock_type::now(); - auto constraints = - dist_constraint_general>(asci_settings.constraint_level, norb, n_sing_beta, - n_doub_beta, uniq_alpha_wfn, comm); + auto constraints = dist_constraint_general>( + asci_settings.constraint_level, norb, n_sing_beta, n_doub_beta, + uniq_alpha_wfn, comm); auto gen_c_en = clock_type::now(); duration_type gen_c_dur = gen_c_en - gen_c_st; logger->info(" * GEN_DUR = {:.2e} ms", gen_c_dur.count()); @@ -453,65 +452,65 @@ asci_contrib_container> asci_contributions_constraint( } // Unique Alpha Loop #else - for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { - const auto& alpha_det = uniq_alpha[i_alpha].first; - const auto occ_alpha = bits_to_indices(alpha_det); - const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); - - const auto& bcd = uad[i_alpha]; - const size_t nbeta = bcd.size(); - for(size_t j_beta = 0; j_beta < nbeta; ++j_beta, ++iw) { - const auto w = *(cdets_begin + iw); - const auto c = C[iw]; - const auto& beta_det = bcd[j_beta].beta_string; - const auto h_diag = bcd[j_beta].h_diag; - const auto& occ_beta = bcd[j_beta].occ_beta; - const auto& vir_beta = bcd[j_beta].vir_beta; - const auto& orb_ens_alpha = bcd[j_beta].orb_ens_alpha; - const auto& orb_ens_beta = bcd[j_beta].orb_ens_beta; - - // AA excitations - generate_constraint_singles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), - T_pq, norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, - ham_gen, asci_pairs); + for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta, ++iw) { + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + const auto& occ_beta = bcd[j_beta].occ_beta; + const auto& vir_beta = bcd[j_beta].vir_beta; + const auto& orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + const auto& orb_ens_beta = bcd[j_beta].orb_ens_beta; + + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, norb, + G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, ham_gen, + asci_pairs); - // AAAA excitations - generate_constraint_doubles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), - G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - // AABB excitations - generate_constraint_doubles_contributions_os( - c, w, con, occ_alpha, occ_beta, vir_beta, - orb_ens_alpha.data(), orb_ens_beta.data(), V_pqrs, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - if(alpha_satisfies_con) { - // BB excitations - append_singles_asci_contributions( - c, w, beta_det, occ_beta, vir_beta, occ_alpha, - orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); - - // BBBB excitations - append_ss_doubles_asci_contributions( - c, w, beta_det, alpha_det, occ_beta, vir_beta, - occ_alpha, orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, - E_ASCI, ham_gen, asci_pairs); - - // No excitation (push inf to remove from list) - asci_pairs.push_back( - {w, std::numeric_limits::infinity(), 1.0}); - } + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); } + } // Prune Down Contributions if(asci_pairs.size() > asci_settings.pair_size_max) { throw std::runtime_error("DIE DIE DIE"); } - } // Unique Alpha Loop + } // Unique Alpha Loop #endif @@ -700,9 +699,9 @@ std::vector> asci_search( // Remove core dets // XXX: This assumes the constraint search for now { - auto inf_ptr = std::partition(asci_pairs.begin(), asci_pairs.end(), - [](auto& p){ return !std::isinf(p.rv()); }); - asci_pairs.erase(inf_ptr, asci_pairs.end()); + auto inf_ptr = std::partition(asci_pairs.begin(), asci_pairs.end(), + [](auto& p) { return !std::isinf(p.rv()); }); + asci_pairs.erase(inf_ptr, asci_pairs.end()); } #endif diff --git a/include/macis/asci/determinant_sort.hpp b/include/macis/asci/determinant_sort.hpp index cf25c239..206e37d3 100644 --- a/include/macis/asci/determinant_sort.hpp +++ b/include/macis/asci/determinant_sort.hpp @@ -40,14 +40,15 @@ template void reorder_ci_on_alpha(WfnIterator begin, WfnIterator end, double* C) { using wfn_type = typename WfnIterator::value_type; using wfn_traits = wavefunction_traits; - using cmp_type = typename wfn_traits::spin_comparator; - const size_t ndets = std::distance(begin,end); + using cmp_type = typename wfn_traits::spin_comparator; + const size_t ndets = std::distance(begin, end); cmp_type comparator{}; std::vector idx(ndets); std::iota(idx.begin(), idx.end(), 0); - std::sort(idx.begin(), idx.end(), - [&](auto i, auto j) { return comparator(*(begin+i),*(begin+j)); }); + std::sort(idx.begin(), idx.end(), [&](auto i, auto j) { + return comparator(*(begin + i), *(begin + j)); + }); std::vector reorder_C(ndets); std::vector reorder_dets(ndets); @@ -58,7 +59,6 @@ void reorder_ci_on_alpha(WfnIterator begin, WfnIterator end, double* C) { std::copy(reorder_dets.begin(), reorder_dets.end(), begin); std::copy(reorder_C.begin(), reorder_C.end(), C); - } template diff --git a/include/macis/asci/iteration.hpp b/include/macis/asci/iteration.hpp index 4f28f475..3eccf964 100644 --- a/include/macis/asci/iteration.hpp +++ b/include/macis/asci/iteration.hpp @@ -25,7 +25,7 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings, size_t nkeep = std::min(asci_settings.ncdets_max, wfn.size()); // Sort kept dets on alpha string - if(wfn.size() > 1) + if(wfn.size() > 1) reorder_ci_on_alpha(wfn.begin(), wfn.begin() + nkeep, X.data()); // Perform the ASCI search @@ -33,9 +33,9 @@ auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings, E0, X, norb, ham_gen.T(), ham_gen.G_red(), ham_gen.V_red(), ham_gen.G(), ham_gen.V(), ham_gen MACIS_MPI_CODE(, comm)); - //std::sort(wfn.begin(), wfn.end(), bitset_less_comparator{}); + // std::sort(wfn.begin(), wfn.end(), bitset_less_comparator{}); using wfn_traits = wavefunction_traits>; - using wfn_comp = typename wfn_traits::spin_comparator; + using wfn_comp = typename wfn_traits::spin_comparator; std::sort(wfn.begin(), wfn.end(), wfn_comp{}); // Rediagonalize diff --git a/include/macis/asci/mask_constraints.hpp b/include/macis/asci/mask_constraints.hpp index 15a96d02..8c2531f6 100644 --- a/include/macis/asci/mask_constraints.hpp +++ b/include/macis/asci/mask_constraints.hpp @@ -629,18 +629,15 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, #else template auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, - size_t nd_othr, - const ContainerType& unique_alpha, + size_t nd_othr, const ContainerType& unique_alpha, int world_size) { - using wfn_traits = wavefunction_traits; using constraint_type = alpha_constraint; using string_type = typename constraint_type::constraint_type; - constexpr bool flat_container = std::is_same_v< - std::decay_t, - std::decay_t - >; + constexpr bool flat_container = + std::is_same_v, + std::decay_t>; // Generate triplets + heuristic std::vector> constraint_sizes; @@ -653,12 +650,12 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, size_t nw = 0; for(const auto& alpha : unique_alpha) { - if constexpr (flat_container) + if constexpr(flat_container) nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, nd_othr, constraint); else - nw += alpha.second * - constraint_histogram(alpha.first, ns_othr, nd_othr, constraint); + nw += alpha.second * constraint_histogram(alpha.first, ns_othr, + nd_othr, constraint); } if(nw) constraint_sizes.emplace_back(constraint, nw); total_work += nw; @@ -697,12 +694,12 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, size_t nw = 0; for(const auto& alpha : unique_alpha) { - if constexpr (flat_container) + if constexpr(flat_container) nw += constraint_histogram(wfn_traits::alpha_string(alpha), ns_othr, nd_othr, c_next); else - nw += alpha.second * - constraint_histogram(alpha.first, ns_othr, nd_othr, c_next); + nw += alpha.second * + constraint_histogram(alpha.first, ns_othr, nd_othr, c_next); } if(nw) constraint_sizes.emplace_back(c_next, nw); total_work += nw; @@ -719,10 +716,8 @@ auto gen_constraints_general(size_t nlevels, size_t norb, size_t ns_othr, template auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, - size_t nd_othr, - const ContainerType& unique_alpha, + size_t nd_othr, const ContainerType& unique_alpha, MPI_Comm comm) { - using wfn_traits = wavefunction_traits; using constraint_type = alpha_constraint; @@ -730,8 +725,8 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, auto world_size = comm_size(comm); // Generate constraints subject to expected workload - auto constraint_sizes = gen_constraints_general(nlevels, norb, ns_othr, - nd_othr, unique_alpha, world_size); + auto constraint_sizes = gen_constraints_general( + nlevels, norb, ns_othr, nd_othr, unique_alpha, world_size); // Global workloads std::vector workloads(world_size, 0); @@ -757,7 +752,6 @@ auto dist_constraint_general(size_t nlevels, size_t norb, size_t ns_othr, // workloads[world_rank], total_work); return constraints; - } #endif #endif diff --git a/include/macis/asci/pt2.hpp b/include/macis/asci/pt2.hpp index 163729c4..df86af00 100644 --- a/include/macis/asci/pt2.hpp +++ b/include/macis/asci/pt2.hpp @@ -7,9 +7,9 @@ */ #pragma once +#include #include #include -#include namespace macis { @@ -27,7 +27,7 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, using wfn_traits = wavefunction_traits>; using spin_wfn_type = spin_wfn_t>; using spin_wfn_traits = wavefunction_traits; - using wfn_comp = typename wfn_traits::spin_comparator; + using wfn_comp = typename wfn_traits::spin_comparator; if(!std::is_sorted(cdets_begin, cdets_end, wfn_comp{})) throw std::runtime_error("PT2 Only Works with Sorted Wfns"); @@ -69,20 +69,19 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, } }; - auto uniq_alpha = get_unique_alpha(cdets_begin, cdets_end); const size_t nuniq_alpha = uniq_alpha.size(); std::vector> uniq_alpha_wfn(nuniq_alpha); - std::transform(uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_wfn.begin(), - [](const auto& p) { return wfn_traits::from_spin(p.first,0); }); - + std::transform( + uniq_alpha.begin(), uniq_alpha.end(), uniq_alpha_wfn.begin(), + [](const auto& p) { return wfn_traits::from_spin(p.first, 0); }); using unique_alpha_data = std::vector; std::vector uad(nuniq_alpha); for(auto i = 0, iw = 0; i < nuniq_alpha; ++i) { std::vector occ_alpha, vir_alpha; - spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, - occ_alpha, vir_alpha); + spin_wfn_traits::state_to_occ_vir(norb, uniq_alpha[i].first, occ_alpha, + vir_alpha); const auto nbeta = uniq_alpha[i].second; uad[i].reserve(nbeta); @@ -92,14 +91,15 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, } } - //if(world_rank == 0) { - // std::ofstream ofile("uniq_alpha.txt"); - // for(auto [d, c] : uniq_alpha) { - // ofile << to_canonical_string(wfn_traits::from_spin(d,0)) << " " << c << std::endl; - // } - //} + // if(world_rank == 0) { + // std::ofstream ofile("uniq_alpha.txt"); + // for(auto [d, c] : uniq_alpha) { + // ofile << to_canonical_string(wfn_traits::from_spin(d,0)) << " " << c << + // std::endl; + // } + // } - //const auto n_occ_alpha = wfn_traits::count(uniq_alpha_wfn[0]); + // const auto n_occ_alpha = wfn_traits::count(uniq_alpha_wfn[0]); const auto n_occ_alpha = spin_wfn_traits::count(uniq_alpha[0].first); const auto n_vir_alpha = norb - n_occ_alpha; const auto n_sing_alpha = n_occ_alpha * n_vir_alpha; @@ -113,8 +113,8 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, logger->info(" * NS = {} ND = {}", n_sing_alpha, n_doub_alpha); auto gen_c_st = clock_type::now(); - //auto constraints = dist_constraint_general>( - // 5, norb, n_sing_beta, n_doub_beta, uniq_alpha, comm); + // auto constraints = dist_constraint_general>( + // 5, norb, n_sing_beta, n_doub_beta, uniq_alpha, comm); auto constraints = gen_constraints_general>( 5, norb, n_sing_beta, n_doub_beta, uniq_alpha, world_size); auto gen_c_en = clock_type::now(); @@ -127,120 +127,120 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, size_t NPT2 = 0; auto pt2_st = clock_type::now(); std::deque print_points(100); - for(auto i = 0; i < 100; ++i ) { - print_points[i] = constraints.size() * (i/100.); + for(auto i = 0; i < 100; ++i) { + print_points[i] = constraints.size() * (i / 100.); } - //std::mutex print_barrier; + // std::mutex print_barrier; const size_t ncon_total = constraints.size(); duration_type lock_wait_dur(0.0); MPI_Win window; - //MPI_Win_create( &window_count, sizeof(size_t), sizeof(size_t), MPI_INFO_NULL, comm, &window ); + // MPI_Win_create( &window_count, sizeof(size_t), sizeof(size_t), + // MPI_INFO_NULL, comm, &window ); size_t* window_buffer; - MPI_Win_allocate( sizeof(size_t), sizeof(size_t), MPI_INFO_NULL, comm, &window_buffer, &window); + MPI_Win_allocate(sizeof(size_t), sizeof(size_t), MPI_INFO_NULL, comm, + &window_buffer, &window); if(window == MPI_WIN_NULL) throw std::runtime_error("Window failed"); MPI_Win_lock_all(MPI_MODE_NOCHECK, window); -// Process ASCI pair contributions for each constraint -//#pragma omp parallel reduction(+ : EPT2) reduction(+ : NPT2) + // Process ASCI pair contributions for each constraint + // #pragma omp parallel reduction(+ : EPT2) reduction(+ : NPT2) { asci_contrib_container> asci_pairs; asci_pairs.reserve(max_size); -//#pragma omp for - //for(size_t ic = 0; ic < constraints.size(); ++ic) + // #pragma omp for + // for(size_t ic = 0; ic < constraints.size(); ++ic) size_t ic = 0; - while(ic < ncon_total) - { + while(ic < ncon_total) { size_t ntake = 10; MPI_Fetch_and_op(&ntake, &ic, MPI_UINT64_T, 0, 0, MPI_SUM, window); MPI_Win_flush(0, window); // Loop over assigned tasks - const size_t c_end = std::min( ncon_total, ic + ntake); - for(; ic < c_end; ++ic ) { - - const auto& con = constraints[ic].first; - //if(ic >= print_points.front()) { - // //std::lock_guard lock(print_barrier); - // printf("[rank %d] %.1f done\n", world_rank, double(ic)/constraints.size()*100); - // print_points.pop_front(); - //} - printf("[rank %d] %lu / %lu\n", world_rank, ic, ncon_total); - const double h_el_tol = 1e-16; - - for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { - const auto& alpha_det = uniq_alpha[i_alpha].first; - const auto occ_alpha = bits_to_indices(alpha_det); - const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); - - const auto& bcd = uad[i_alpha]; - const size_t nbeta = bcd.size(); - for(size_t j_beta = 0; j_beta < nbeta; ++j_beta, ++iw) { - const auto w = *(cdets_begin + iw); - const auto c = C[iw]; - const auto& beta_det = bcd[j_beta].beta_string; - const auto h_diag = bcd[j_beta].h_diag; - const auto& occ_beta = bcd[j_beta].occ_beta; - const auto& vir_beta = bcd[j_beta].vir_beta; - const auto& orb_ens_alpha = bcd[j_beta].orb_ens_alpha; - const auto& orb_ens_beta = bcd[j_beta].orb_ens_beta; - - // AA excitations - generate_constraint_singles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), - T_pq, norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, - ham_gen, asci_pairs); - - // AAAA excitations - generate_constraint_doubles_contributions_ss( - c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), - G_pqrs, norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); - - // AABB excitations - generate_constraint_doubles_contributions_os( - c, w, con, occ_alpha, occ_beta, vir_beta, - orb_ens_alpha.data(), orb_ens_beta.data(), V_pqrs, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); - - if(alpha_satisfies_con) { - // BB excitations - append_singles_asci_contributions( - c, w, beta_det, occ_beta, vir_beta, occ_alpha, - orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, h_el_tol, - h_diag, E_ASCI, ham_gen, asci_pairs); - - // BBBB excitations - append_ss_doubles_asci_contributions( - c, w, beta_det, alpha_det, occ_beta, vir_beta, - occ_alpha, orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, - E_ASCI, ham_gen, asci_pairs); - - // No excitation (push inf to remove from list) - asci_pairs.push_back( - {w, std::numeric_limits::infinity(), 1.0}); + const size_t c_end = std::min(ncon_total, ic + ntake); + for(; ic < c_end; ++ic) { + const auto& con = constraints[ic].first; + // if(ic >= print_points.front()) { + // //std::lock_guard lock(print_barrier); + // printf("[rank %d] %.1f done\n", world_rank, + // double(ic)/constraints.size()*100); print_points.pop_front(); + // } + printf("[rank %d] %lu / %lu\n", world_rank, ic, ncon_total); + const double h_el_tol = 1e-16; + + for(size_t i_alpha = 0, iw = 0; i_alpha < nuniq_alpha; ++i_alpha) { + const auto& alpha_det = uniq_alpha[i_alpha].first; + const auto occ_alpha = bits_to_indices(alpha_det); + const bool alpha_satisfies_con = satisfies_constraint(alpha_det, con); + + const auto& bcd = uad[i_alpha]; + const size_t nbeta = bcd.size(); + for(size_t j_beta = 0; j_beta < nbeta; ++j_beta, ++iw) { + const auto w = *(cdets_begin + iw); + const auto c = C[iw]; + const auto& beta_det = bcd[j_beta].beta_string; + const auto h_diag = bcd[j_beta].h_diag; + const auto& occ_beta = bcd[j_beta].occ_beta; + const auto& vir_beta = bcd[j_beta].vir_beta; + const auto& orb_ens_alpha = bcd[j_beta].orb_ens_alpha; + const auto& orb_ens_beta = bcd[j_beta].orb_ens_beta; + + // AA excitations + generate_constraint_singles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), T_pq, + norb, G_red, norb, V_red, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // AAAA excitations + generate_constraint_doubles_contributions_ss( + c, w, con, occ_alpha, occ_beta, orb_ens_alpha.data(), G_pqrs, + norb, h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // AABB excitations + generate_constraint_doubles_contributions_os( + c, w, con, occ_alpha, occ_beta, vir_beta, orb_ens_alpha.data(), + orb_ens_beta.data(), V_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + if(alpha_satisfies_con) { + // BB excitations + append_singles_asci_contributions( + c, w, beta_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), T_pq, norb, G_red, norb, V_red, norb, + h_el_tol, h_diag, E_ASCI, ham_gen, asci_pairs); + + // BBBB excitations + append_ss_doubles_asci_contributions( + c, w, beta_det, alpha_det, occ_beta, vir_beta, occ_alpha, + orb_ens_beta.data(), G_pqrs, norb, h_el_tol, h_diag, E_ASCI, + ham_gen, asci_pairs); + + // No excitation (push inf to remove from list) + asci_pairs.push_back( + {w, std::numeric_limits::infinity(), 1.0}); + } } - } - - } // Unique Alpha Loop - - double EPT2_local = 0.0; - // Local S&A for each quad + update EPT2 - { - auto uit = sort_and_accumulate_asci_pairs(asci_pairs.begin(), - asci_pairs.end()); - for(auto it = asci_pairs.begin(); it != uit; ++it) { - // if(std::find(cdets_begin, cdets_end, it->state) == cdets_end) - if(!std::isinf(it->c_times_matel)) { - EPT2_local += (it->c_times_matel * it->c_times_matel) / it->h_diag; - NPT2++; + } // Unique Alpha Loop + + double EPT2_local = 0.0; + // Local S&A for each quad + update EPT2 + { + auto uit = sort_and_accumulate_asci_pairs(asci_pairs.begin(), + asci_pairs.end()); + for(auto it = asci_pairs.begin(); it != uit; ++it) { + // if(std::find(cdets_begin, cdets_end, it->state) == cdets_end) + if(!std::isinf(it->c_times_matel)) { + EPT2_local += + (it->c_times_matel * it->c_times_matel) / it->h_diag; + NPT2++; + } } + asci_pairs.clear(); } - asci_pairs.clear(); - } - EPT2 += EPT2_local; + EPT2 += EPT2_local; } // Loc constraint loop - } // Constraint Loop + } // Constraint Loop } auto pt2_en = clock_type::now(); @@ -249,10 +249,10 @@ double asci_pt2_constraint(wavefunction_iterator_t cdets_begin, double local_pt2_dur = duration_type(pt2_en - pt2_st).count(); if(world_size > 1) { double total_dur = allreduce(local_pt2_dur, MPI_SUM, comm); - double min_dur = allreduce(local_pt2_dur, MPI_MIN, comm); - double max_dur = allreduce(local_pt2_dur, MPI_MAX, comm); + double min_dur = allreduce(local_pt2_dur, MPI_MIN, comm); + double max_dur = allreduce(local_pt2_dur, MPI_MAX, comm); logger->info("* PT2_DUR MIN = {:.2e}, MAX = {:.2e}, AVG = {:.2e} ms", - min_dur, max_dur, total_dur / world_size); + min_dur, max_dur, total_dur / world_size); } else { logger->info("* PT2_DUR = ${:.2e} ms", local_pt2_dur); } diff --git a/include/macis/hamiltonian_generator/sorted_double_loop.hpp b/include/macis/hamiltonian_generator/sorted_double_loop.hpp index f5254c03..8256c778 100644 --- a/include/macis/hamiltonian_generator/sorted_double_loop.hpp +++ b/include/macis/hamiltonian_generator/sorted_double_loop.hpp @@ -7,15 +7,16 @@ */ #pragma once +#include #include #include #include -#include namespace macis { template -class SortedDoubleLoopHamiltonianGenerator : public HamiltonianGenerator { +class SortedDoubleLoopHamiltonianGenerator + : public HamiltonianGenerator { public: using base_type = HamiltonianGenerator; using full_det_t = typename base_type::full_det_t; @@ -41,40 +42,39 @@ class SortedDoubleLoopHamiltonianGenerator : public HamiltonianGenerator bra_occ_alpha, bra_occ_beta; const bool is_symm = bra_begin == ket_begin and bra_end == ket_end; - // Get unique alpha strings auto setup_st = std::chrono::high_resolution_clock::now(); auto unique_alpha_bra = get_unique_alpha(bra_begin, bra_end); - auto unique_alpha_ket = is_symm ? unique_alpha_bra : get_unique_alpha(ket_begin, ket_end); + auto unique_alpha_ket = + is_symm ? unique_alpha_bra : get_unique_alpha(ket_begin, ket_end); const size_t nuniq_bra = unique_alpha_bra.size(); const size_t nuniq_ket = unique_alpha_ket.size(); // Compute offsets - std::vector unique_alpha_bra_idx(nuniq_bra+1); - std::transform_exclusive_scan(unique_alpha_bra.begin(), - unique_alpha_bra.end(), - unique_alpha_bra_idx.begin(), - 0ul, std::plus{}, - [](auto& x){ return x.second; }); - std::vector unique_alpha_ket_idx(nuniq_ket+1); - if( is_symm ) { + std::vector unique_alpha_bra_idx(nuniq_bra + 1); + std::transform_exclusive_scan( + unique_alpha_bra.begin(), unique_alpha_bra.end(), + unique_alpha_bra_idx.begin(), 0ul, std::plus{}, + [](auto& x) { return x.second; }); + std::vector unique_alpha_ket_idx(nuniq_ket + 1); + if(is_symm) { unique_alpha_ket_idx = unique_alpha_bra_idx; } else { - std::transform_exclusive_scan(unique_alpha_ket.begin(), - unique_alpha_ket.end(), - unique_alpha_ket_idx.begin(), - 0ul, std::plus{}, - [](auto& x){ return x.second; }); + std::transform_exclusive_scan( + unique_alpha_ket.begin(), unique_alpha_ket.end(), + unique_alpha_ket_idx.begin(), 0ul, std::plus{}, + [](auto& x) { return x.second; }); } unique_alpha_bra_idx.back() = nbra_dets; unique_alpha_ket_idx.back() = nket_dets; auto setup_en = std::chrono::high_resolution_clock::now(); - //std::cout << "AVERAGE NBETA = " << - // std::accumulate(unique_alpha_bra.begin(), unique_alpha_bra.end(), - // 0ul, [](auto a, auto b){ return a + b.second; }) / double(nuniq_bra) << std::endl; + // std::cout << "AVERAGE NBETA = " << + // std::accumulate(unique_alpha_bra.begin(), unique_alpha_bra.end(), + // 0ul, [](auto a, auto b){ return a + b.second; }) / double(nuniq_bra) + // << std::endl; // Populate COO matrix locally sparsexx::coo_matrix coo_mat(nbra_dets, nket_dets, 0, 0); @@ -84,108 +84,114 @@ class SortedDoubleLoopHamiltonianGenerator : public HamiltonianGenerator 4) { - skip1++; - continue; - } - - // Precompute all-alpha excitation if it will be used - const double mat_el_4_alpha = (ex_alpha_count == 4) ? - this->matrix_element_4(bra_alpha, ket_alpha, ex_alpha) : 0.0; - if(ex_alpha_count == 4 and std::abs(mat_el_4_alpha) < H_thresh) { - // The only possible matrix element is too-small, skip everyhing - skip2++; - continue; - } - - const size_t beta_st_ket = unique_alpha_ket_idx[ia_ket]; - const size_t beta_en_ket = unique_alpha_ket_idx[ia_ket+1]; - - // Loop over local betas according to their global indices - for(size_t ibra = beta_st_bra; ibra < beta_en_bra; ++ibra) { - const auto bra_beta = wfn_traits::beta_string(*(bra_begin+ibra)); - spin_wfn_traits::state_to_occ(bra_beta, bra_occ_beta); - for(size_t iket = beta_st_ket; iket < beta_en_ket; ++iket) { - if(is_symm and (iket < ibra)) continue; - const auto ket_beta = wfn_traits::beta_string(*(ket_begin+iket)); - - // Compute beta excitation - const auto ex_beta = bra_beta ^ ket_beta; - const auto ex_beta_count = spin_wfn_traits::count(ex_beta); - - if((ex_alpha_count + ex_beta_count) > 4) continue; - - double h_el = 0.0; - if(ex_alpha_count == 4) { - // Use precomputed value - h_el = mat_el_4_alpha; - } else if(ex_beta_count == 4) { - h_el = this->matrix_element_4(bra_beta, ket_beta, ex_beta); - } else if(ex_alpha_count == 2) { - if(ex_beta_count == 2) { - h_el = this->matrix_element_22(bra_alpha, ket_alpha, ex_alpha, bra_beta, - ket_beta, ex_beta); - } else { - h_el = this->matrix_element_2(bra_alpha, ket_alpha, ex_alpha, - bra_occ_alpha, bra_occ_beta); - } - } else if(ex_beta_count == 2) { - h_el = this->matrix_element_2(bra_beta, ket_beta, ex_beta, - bra_occ_beta, bra_occ_alpha); - } else { - // Diagonal matrix element - h_el = this->matrix_element_diag(bra_occ_alpha, bra_occ_beta); - } - - // Insert matrix element - if(std::abs(h_el) > H_thresh) { - coo_mat.template insert(ibra, iket, h_el); - if(is_symm and ibra != iket) { - coo_mat.template insert(iket, ibra, h_el); - } + for(size_t ia_bra = 0; ia_bra < nuniq_bra; ++ia_bra) + if(unique_alpha_bra[ia_bra].first.any()) { + // Extract alpha bra + const auto bra_alpha = unique_alpha_bra[ia_bra].first; + const size_t beta_st_bra = unique_alpha_bra_idx[ia_bra]; + const size_t beta_en_bra = unique_alpha_bra_idx[ia_bra + 1]; + spin_wfn_traits::state_to_occ(bra_alpha, bra_occ_alpha); + + const auto ket_lower = is_symm ? ia_bra : 0; + for(size_t ia_ket = ket_lower; ia_ket < nuniq_ket; ++ia_ket) + if(unique_alpha_ket[ia_ket].first.any()) { + // Extract alpha ket + const auto ket_alpha = unique_alpha_ket[ia_ket].first; + + // Compute alpha excitation + const auto ex_alpha = bra_alpha ^ ket_alpha; + const auto ex_alpha_count = spin_wfn_traits::count(ex_alpha); + + // Early exit + if(ex_alpha_count > 4) { + skip1++; + continue; + } + + // Precompute all-alpha excitation if it will be used + const double mat_el_4_alpha = + (ex_alpha_count == 4) + ? this->matrix_element_4(bra_alpha, ket_alpha, ex_alpha) + : 0.0; + if(ex_alpha_count == 4 and std::abs(mat_el_4_alpha) < H_thresh) { + // The only possible matrix element is too-small, skip everyhing + skip2++; + continue; } - - } // ket beta - } // bra beta - - } // Loop over ket alphas - } // Loop over bra alphas + const size_t beta_st_ket = unique_alpha_ket_idx[ia_ket]; + const size_t beta_en_ket = unique_alpha_ket_idx[ia_ket + 1]; + + // Loop over local betas according to their global indices + for(size_t ibra = beta_st_bra; ibra < beta_en_bra; ++ibra) { + const auto bra_beta = + wfn_traits::beta_string(*(bra_begin + ibra)); + spin_wfn_traits::state_to_occ(bra_beta, bra_occ_beta); + for(size_t iket = beta_st_ket; iket < beta_en_ket; ++iket) { + if(is_symm and (iket < ibra)) continue; + const auto ket_beta = + wfn_traits::beta_string(*(ket_begin + iket)); + + // Compute beta excitation + const auto ex_beta = bra_beta ^ ket_beta; + const auto ex_beta_count = spin_wfn_traits::count(ex_beta); + + if((ex_alpha_count + ex_beta_count) > 4) continue; + + double h_el = 0.0; + if(ex_alpha_count == 4) { + // Use precomputed value + h_el = mat_el_4_alpha; + } else if(ex_beta_count == 4) { + h_el = this->matrix_element_4(bra_beta, ket_beta, ex_beta); + } else if(ex_alpha_count == 2) { + if(ex_beta_count == 2) { + h_el = + this->matrix_element_22(bra_alpha, ket_alpha, ex_alpha, + bra_beta, ket_beta, ex_beta); + } else { + h_el = + this->matrix_element_2(bra_alpha, ket_alpha, ex_alpha, + bra_occ_alpha, bra_occ_beta); + } + } else if(ex_beta_count == 2) { + h_el = this->matrix_element_2(bra_beta, ket_beta, ex_beta, + bra_occ_beta, bra_occ_alpha); + } else { + // Diagonal matrix element + h_el = this->matrix_element_diag(bra_occ_alpha, bra_occ_beta); + } + + // Insert matrix element + if(std::abs(h_el) > H_thresh) { + coo_mat.template insert(ibra, iket, h_el); + if(is_symm and ibra != iket) { + coo_mat.template insert(iket, ibra, h_el); + } + } + + } // ket beta + } // bra beta + + } // Loop over ket alphas + } // Loop over bra alphas auto pop_en = std::chrono::high_resolution_clock::now(); // Sort for CSR Conversion auto sort_st = std::chrono::high_resolution_clock::now(); - coo_mat.sort_by_row_index(); + coo_mat.sort_by_row_index(); auto sort_en = std::chrono::high_resolution_clock::now(); auto conv_st = std::chrono::high_resolution_clock::now(); - sparse_matrix_type csr_mat(coo_mat); // Convert to CSR Matrix + sparse_matrix_type csr_mat(coo_mat); // Convert to CSR Matrix auto conv_en = std::chrono::high_resolution_clock::now(); printf("Setup %.2e Pop %.2e Sort %.2e Conv %.2e S1 %lu S2 %lu\n", - std::chrono::duration(setup_en - setup_st).count(), - std::chrono::duration(pop_en - pop_st).count(), - std::chrono::duration(sort_en - sort_st).count(), - std::chrono::duration(conv_en - conv_st).count(),skip1,skip2); + std::chrono::duration(setup_en - setup_st).count(), + std::chrono::duration(pop_en - pop_st).count(), + std::chrono::duration(sort_en - sort_st).count(), + std::chrono::duration(conv_en - conv_st).count(), skip1, + skip2); return csr_mat; } @@ -209,7 +215,7 @@ class SortedDoubleLoopHamiltonianGenerator : public HamiltonianGenerator; const size_t nbra_dets = std::distance(bra_begin, bra_end); const size_t nket_dets = std::distance(ket_begin, ket_end); @@ -260,7 +266,7 @@ class SortedDoubleLoopHamiltonianGenerator : public HamiltonianGenerator - SortedDoubleLoopHamiltonianGenerator(Args &&...args) + SortedDoubleLoopHamiltonianGenerator(Args&&... args) : HamiltonianGenerator(std::forward(args)...) {} }; diff --git a/include/macis/sd_operations.hpp b/include/macis/sd_operations.hpp index f90f8ff3..4f4e9781 100644 --- a/include/macis/sd_operations.hpp +++ b/include/macis/sd_operations.hpp @@ -369,7 +369,6 @@ inline auto doubles_sign(WfnType bra, WfnType ket, WfnType ex) { return sign; } - template auto get_unique_alpha(WfnIterator begin, WfnIterator end) { using wfn_type = typename std::iterator_traits::value_type; @@ -378,7 +377,7 @@ auto get_unique_alpha(WfnIterator begin, WfnIterator end) { std::vector> unique_alpha; unique_alpha.push_back({wfn_traits::alpha_string(*begin), 1}); - for(auto it = begin+1; it != end; ++it) { + for(auto it = begin + 1; it != end; ++it) { auto& [cur_alpha, cur_count] = unique_alpha.back(); auto alpha_i = wfn_traits::alpha_string(*it); if(alpha_i == cur_alpha) { diff --git a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp index 0d371a8c..e0342cf2 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix.hpp @@ -217,10 +217,9 @@ class coo_matrix { static_assert(not Check, "insert check NYI"); rowind_.emplace_back(i); colind_.emplace_back(j); - nzval_ .emplace_back(v); + nzval_.emplace_back(v); nnz_++; } - #ifdef SPARSEXX_ENABLE_CEREAL template diff --git a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp index 210d2b52..d04693df 100644 --- a/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp +++ b/src/sparsexx/include/sparsexx/matrix_types/coo_matrix_ops.hpp @@ -110,7 +110,7 @@ void coo_matrix::expand_from_triangle() { std::cout << "UT " << upper_triangle << std::endl; if(diagonal or full_matrix) return; - //std::cout << "Performing Expansion..." << std::endl; + // std::cout << "Performing Expansion..." << std::endl; size_t new_nnz = 2 * nnz_ - n_; rowind_.reserve(new_nnz); colind_.reserve(new_nnz); diff --git a/src/sparsexx/tests/test_types.cxx b/src/sparsexx/tests/test_types.cxx index 97ee3463..09b18c03 100644 --- a/src/sparsexx/tests/test_types.cxx +++ b/src/sparsexx/tests/test_types.cxx @@ -6,9 +6,8 @@ * See LICENSE.txt for details */ - -#include #include +#include #include "catch2/catch.hpp" @@ -17,17 +16,19 @@ TEST_CASE("COO Matrix", "[matrix_types]") { SECTION("Insert") { size_t n = 6; - mat_type A(n,n,0,0); // empty matrix zero-indexing + mat_type A(n, n, 0, 0); // empty matrix zero-indexing for(int i = 0; i < n; ++i) { - A.insert(i,i, 2); - if(i < n-1) A.insert(i,i+1, 1); - if(i > 0) A.insert(i,i-1, 1); + A.insert(i, i, 2); + if(i < n - 1) A.insert(i, i + 1, 1); + if(i > 0) A.insert(i, i - 1, 1); } - A.sort_by_row_index(); // Put into canonical order + A.sort_by_row_index(); // Put into canonical order REQUIRE(A.nnz() == 16); - REQUIRE(A.rowind() == std::vector{0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5}); - REQUIRE(A.colind() == std::vector{0,1,0,1,2,1,2,3,2,3,4,3,4,5,4,5}); - REQUIRE(A.nzval() == std::vector {2,1,1,2,1,1,2,1,1,2,1,1,2,1,1,2}); + REQUIRE(A.rowind() == std::vector{0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, + 4, 4, 4, 5, 5}); + REQUIRE(A.colind() == std::vector{0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, + 3, 4, 5, 4, 5}); + REQUIRE(A.nzval() == std::vector{2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, + 2, 1, 1, 2}); } - } diff --git a/tests/csr_hamiltonian.cxx b/tests/csr_hamiltonian.cxx index 444f4161..5be3cbed 100644 --- a/tests/csr_hamiltonian.cxx +++ b/tests/csr_hamiltonian.cxx @@ -17,7 +17,9 @@ #include "ut_common.hpp" using wfn_type = macis::wfn_t<64>; -TEMPLATE_TEST_CASE("CSR Hamiltonian","[ham_gen]", macis::DoubleLoopHamiltonianGenerator, macis::SortedDoubleLoopHamiltonianGenerator ) { +TEMPLATE_TEST_CASE("CSR Hamiltonian", "[ham_gen]", + macis::DoubleLoopHamiltonianGenerator, + macis::SortedDoubleLoopHamiltonianGenerator) { ROOT_ONLY(MPI_COMM_WORLD); size_t norb = macis::read_fcidump_norb(water_ccpvdz_fcidump); @@ -50,23 +52,27 @@ TEMPLATE_TEST_CASE("CSR Hamiltonian","[ham_gen]", macis::DoubleLoopHamiltonianGe auto H = macis::make_csr_hamiltonian_block( dets.begin(), dets.end(), dets.begin(), dets.end(), ham_gen, 1e-16); auto en = std::chrono::high_resolution_clock::now(); - std::cout << std::chrono::duration(en - st).count() << std::endl; + std::cout << std::chrono::duration(en - st).count() + << std::endl; -//#define GENERATE_TESTS +// #define GENERATE_TESTS #ifdef GENERATE_TESTS std::string tmp_rowptr_fname = "tmp_rowptr.bin"; std::string tmp_colind_fname = "tmp_colind.bin"; - std::string tmp_nzval_fname = "tmp_nzval.bin"; + std::string tmp_nzval_fname = "tmp_nzval.bin"; std::ofstream rowptr_file(tmp_rowptr_fname, std::ios::binary); - rowptr_file.write((char*)H.rowptr().data(), H.rowptr().size() * sizeof(int32_t)); + rowptr_file.write((char*)H.rowptr().data(), + H.rowptr().size() * sizeof(int32_t)); std::ofstream colind_file(tmp_colind_fname, std::ios::binary); - colind_file.write((char*)H.colind().data(), H.colind().size() * sizeof(int32_t)); + colind_file.write((char*)H.colind().data(), + H.colind().size() * sizeof(int32_t)); std::ofstream nzval_file(tmp_nzval_fname, std::ios::binary); nzval_file.write((char*)H.nzval().data(), H.nzval().size() * sizeof(double)); #else - //std::cout << "NEW H " << H.m() << " " << H.nnz() << " " << H.indexing() << std::endl; + // std::cout << "NEW H " << H.m() << " " << H.nnz() << " " << H.indexing() << + // std::endl; // Read reference data std::vector ref_rowptr(H.rowptr().size()), @@ -95,7 +101,9 @@ TEMPLATE_TEST_CASE("CSR Hamiltonian","[ham_gen]", macis::DoubleLoopHamiltonianGe } #ifdef MACIS_ENABLE_MPI -TEMPLATE_TEST_CASE("Distributed CSR Hamiltonian","[ham_gen]", macis::DoubleLoopHamiltonianGenerator, macis::SortedDoubleLoopHamiltonianGenerator ) { +TEMPLATE_TEST_CASE("Distributed CSR Hamiltonian", "[ham_gen]", + macis::DoubleLoopHamiltonianGenerator, + macis::SortedDoubleLoopHamiltonianGenerator) { MPI_Barrier(MPI_COMM_WORLD); size_t norb = macis::read_fcidump_norb(water_ccpvdz_fcidump); size_t nocc = 5; diff --git a/tests/standalone_driver.cxx b/tests/standalone_driver.cxx index a025074e..867a0dee 100644 --- a/tests/standalone_driver.cxx +++ b/tests/standalone_driver.cxx @@ -67,7 +67,11 @@ int main(int argc, char** argv) { using wfn_type = macis::wfn_t; using wfn_traits = macis::wavefunction_traits; - MACIS_MPI_CODE(MPI_Init(&argc, &argv);) + MACIS_MPI_CODE( + int dummy; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &dummy); + if(dummy != MPI_THREAD_MULTIPLE) throw std::runtime_error("MPI Thread Init Failed"); + ) #ifdef MACIS_ENABLE_MPI auto world_rank = macis::comm_rank(MPI_COMM_WORLD);