diff --git a/include/htool/basic_types/matrix.hpp b/include/htool/basic_types/matrix.hpp index 50d53c8e..bee3f254 100644 --- a/include/htool/basic_types/matrix.hpp +++ b/include/htool/basic_types/matrix.hpp @@ -109,6 +109,12 @@ class Matrix { return m_data; } + T *release() { + T *result = m_data; + m_data = nullptr; + return result; + } + void assign(int number_of_rows, int number_of_cols, T *ptr, bool owning_data) { if (m_number_of_rows * m_number_of_cols > 0 && m_is_owning_data) delete[] m_data; diff --git a/include/htool/solvers/ddm.hpp b/include/htool/solvers/ddm.hpp index 65133ced..ebdf0881 100644 --- a/include/htool/solvers/ddm.hpp +++ b/include/htool/solvers/ddm.hpp @@ -6,8 +6,11 @@ #include "../misc/logger.hpp" #include "../misc/misc.hpp" #include "../wrappers/wrapper_hpddm.hpp" +#include "../wrappers/wrapper_lapack.hpp" #include "../wrappers/wrapper_mpi.hpp" -#include "coarse_space.hpp" +#include "./geneo/coarse_operator_builder.hpp" +#include "./interfaces/virtual_coarse_operator_builder.hpp" +#include "./interfaces/virtual_coarse_space_builder.hpp" namespace htool { @@ -21,16 +24,10 @@ class DDM { int n; int n_inside; - std::vector neighbors; - std::vector renum_to_global; - // const std::vector cluster_to_ovr_subdomain; - std::vector> intersections; - std::vector vec_ovr; std::unique_ptr> hpddm_op; Matrix &mat_loc; std::vector D; int nevi; - int size_E; bool one_level; bool two_level; mutable std::map infos; @@ -48,7 +45,7 @@ class DDM { hpddm_op.reset(); } - DDM(const DistributedOperator &distributed_operator, Matrix &local_dense_matrix, const std::vector &neighbors0, const std::vector> &intersections0) : n(local_dense_matrix.nb_rows()), n_inside(distributed_operator.get_target_partition().get_size_of_partition(get_rankWorld(distributed_operator.get_comm()))), neighbors(neighbors0), intersections(intersections0), vec_ovr(n), hpddm_op(std::make_unique>(&distributed_operator)), mat_loc(local_dense_matrix), D(n), one_level(0), two_level(0) { + DDM(const DistributedOperator &distributed_operator, Matrix &local_dense_matrix, const std::vector &neighbors, const std::vector> &intersections) : n(local_dense_matrix.nb_rows()), n_inside(distributed_operator.get_target_partition().get_size_of_partition(get_rankWorld(distributed_operator.get_comm()))), hpddm_op(std::make_unique>(&distributed_operator)), mat_loc(local_dense_matrix), D(n), one_level(0), two_level(0) { // Timing double mytime, maxtime; double time = MPI_Wtime(); @@ -68,16 +65,6 @@ class DDM { } } - // TODO: add symmetric eigensolver - if (sym) { - for (int j = 0; j < n; j++) { - for (int i = 0; i < j; i++) { - - mat_loc(i, j) = mat_loc(j, i); - } - } - } - hpddm_op->initialize(n, sym, mat_loc.data(), neighbors, intersections); fill(D.begin(), D.begin() + n_inside, 1); @@ -105,253 +92,255 @@ class DDM { one_level = 1; } - // TODO: take local VirtualHMatrix instead - // void build_coarse_space(Matrix &Mi, VirtualGenerator &generator_Bi, const std::vector &x) { - // // Timing - // double mytime, maxtime; - // double time = MPI_Wtime(); - - // // - // int info; - - // // Building Neumann matrix - // htool::VirtualHMatrix *const Bi(generator_Bi, hpddm_op->HA.get_cluster_tree_t().get_local_cluster_tree(), x, -1, MPI_COMM_SELF); - // Matrix Bi(n, n); - - // // Building Bi - // bool sym = false; - // const std::vector *> &MyLocalFarFieldMats = HBi.get_MyFarFieldMats(); - // const std::vector *> &MyLocalNearFieldMats = HBi.get_MyNearFieldMats(); - - // // Internal dense blocks - // for (int i = 0; i < MyLocalNearFieldMats.size(); i++) { - // const SubMatrix &submat = *(MyLocalNearFieldMats[i]); - // int local_nr = submat.nb_rows(); - // int local_nc = submat.nb_cols(); - // int offset_i = submat.get_offset_i() - hpddm_op->HA.get_local_offset(); - // int offset_j = submat.get_offset_j() - hpddm_op->HA.get_local_offset(); - // for (int i = 0; i < local_nc; i++) { - // std::copy_n(&(submat(0, i)), local_nr, Bi.data() + offset_i + (offset_j + i) * n); - // } - // } - - // // Internal compressed block - // Matrix FarFielBlock(n, n); - // for (int i = 0; i < MyLocalFarFieldMats.size(); i++) { - // const LowRankMatrix &lmat = *(MyLocalFarFieldMats[i]); - // int local_nr = lmat.nb_rows(); - // int local_nc = lmat.nb_cols(); - // int offset_i = lmat.get_offset_i() - hpddm_op->HA.get_local_offset(); - // int offset_j = lmat.get_offset_j() - hpddm_op->HA.get_local_offset(); - // ; - // FarFielBlock.resize(local_nr, local_nc); - // lmat.get_whole_matrix(&(FarFielBlock(0, 0))); - // for (int i = 0; i < local_nc; i++) { - // std::copy_n(&(FarFielBlock(0, i)), local_nr, Bi.data() + offset_i + (offset_j + i) * n); - // } - // } + void build_coarse_space(VirtualCoarseSpaceBuilder &coarse_space_builder, VirtualCoarseOperatorBuilder &coarse_operator_builder) { - // // Overlap - // std::vector horizontal_block(n - n_inside, n_inside), vertical_block(n, n - n_inside); - // horizontal_block = generator_Bi.get_submatrix(std::vector(renum_to_global.begin() + n_inside, renum_to_global.end()), std::vector(renum_to_global.begin(), renum_to_global.begin() + n_inside)).get_mat(); - // vertical_block = generator_Bi.get_submatrix(renum_to_global, std::vector(renum_to_global.begin() + n_inside, renum_to_global.end())).get_mat(); - // for (int j = 0; j < n_inside; j++) { - // std::copy_n(horizontal_block.begin() + j * (n - n_inside), n - n_inside, Bi.data() + n_inside + j * n); - // } - // for (int j = n_inside; j < n; j++) { - // std::copy_n(vertical_block.begin() + (j - n_inside) * n, n, Bi.data() + j * n); - // } + // Timing + std::vector mytime(3), maxtime(3); - // // LU facto for mass matrix - // int lda = n; - // std::vector _ipiv_mass(n); - // HPDDM::Lapack::getrf(&n, &n, Mi.data(), &lda, _ipiv_mass.data(), &info); + // Coarse space build + double time = MPI_Wtime(); + Matrix Z = coarse_space_builder.build_coarse_space(); + mytime[0] = MPI_Wtime() - time; - // // Partition of unity - // Matrix DAiD(n, n); - // for (int i = 0; i < n_inside; i++) { - // std::copy_n(&(mat_loc[i * n]), n_inside, &(DAiD(0, i))); - // } + nevi = Z.nb_cols(); - // // M^-1 - // const char l = 'N'; - // lda = n; - // int ldb = n; - // HPDDM::Lapack::getrs(&l, &n, &n, Mi.data(), &lda, _ipiv_mass.data(), DAiD.data(), &ldb, &info); - // HPDDM::Lapack::getrs(&l, &n, &n, Mi.data(), &lda, _ipiv_mass.data(), Bi.data(), &ldb, &info); + HPDDM::Option &opt = *HPDDM::Option::get(); + opt["geneo_nu"] = nevi; - // // Build local eigenvalue problem - // Matrix evp(n, n); - // Bi.mvprod(DAiD.data(), evp.data(), n); + // int rankWorld; + // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + // Z.csv_save("test_" + NbrToStr(rankWorld)); + CoefficientPrecision **Z_ptr_ptr = new CoefficientPrecision *[nevi]; + CoefficientPrecision *Z_ptr = Z.release(); + for (int i = 0; i < nevi; i++) { + Z_ptr_ptr[i] = Z_ptr + i * n; + } + hpddm_op->setVectors(Z_ptr_ptr); - // // eigenvalue problem - // hpddm_op->solveEVP(evp.data()); - // T *const *Z = const_cast(hpddm_op->getVectors()); - // HPDDM::Option &opt = *HPDDM::Option::get(); - // nevi = opt.val("geneo_nu", 2); + // + // int rankWorld; + // MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); + int sizeWorld; + MPI_Comm_size(hpddm_op->HA->get_comm(), &sizeWorld); - // // timing - // mytime = MPI_Wtime() - time; - // time = MPI_Wtime(); - // MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - // infos["DDM_geev_max"] = NbrToStr(maxtime); + time = MPI_Wtime(); + Matrix coarse_operator = coarse_operator_builder.build_coarse_operator(Z.nb_rows(), Z.nb_cols(), Z_ptr_ptr); + mytime[1] = MPI_Wtime() - time; - // // - // build_E(Z); - // } + time = MPI_Wtime(); + hpddm_op->buildTwo(MPI_COMM_WORLD, coarse_operator.release()); + mytime[2] = MPI_Wtime() - time; - void build_coarse_space(Matrix &Ki) { // Timing - double mytime, maxtime; - double time = MPI_Wtime(); + MPI_Reduce(&(mytime[0]), &(maxtime[0]), mytime.size(), MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - // - int info; + infos["DDM_geev_max"] = NbrToStr(maxtime[0]); + infos["DDM_setup_ZtAZ_max"] = NbrToStr(maxtime[1]); + infos["DDM_facto_ZtAZ_max"] = NbrToStr(maxtime[2]); + infos["GenEO_coarse_size"] = NbrToStr(coarse_operator.nb_cols()); + two_level = 1; + } - // Partition of unity - Matrix DAiD(n, n); - for (int i = 0; i < n_inside; i++) { - std::copy_n(mat_loc.data() + i * n, n_inside, &(DAiD(0, i))); - } + // void build_coarse_space(Matrix &Ki) { + // // Timing + // double mytime, maxtime; + // double time = MPI_Wtime(); - // Build local eigenvalue problem - int ldvl = n, ldvr = n, lwork = -1; - int lda = n, ldb = n; - std::vector alphar(n), alphai((is_complex() ? 0 : n)), beta(n); - std::vector work(n); - std::vector rwork(8 * n); - std::vector vl(n * n), vr(n * n); - std::vector index(n, 0); + // // + // int info; - int rankWorld; - MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); - // if (rankWorld == 0) { - // DAiD.csv_save("DAiD_" + NbrToStr(rankWorld)); - // Ki.csv_save("Ki_" + NbrToStr(rankWorld)); - // // std::cout << vr << "\n"; - // } - // MPI_Barrier(comm); - // if (rankWorld == 1) { - // std::cout << DAiD.nb_rows() << " " << DAiD.nb_cols() << "\n"; - // std::cout << "taille " << alphai.size() << "\n"; - // std::cout << Ki.nb_rows() << " " << Ki.nb_cols() << "\n"; - // DAiD.csv_save("DAiD_" + NbrToStr(rankWorld)); - // Ki.csv_save("Ki_" + NbrToStr(rankWorld)); - // // std::cout << vr << "\n"; - // } + // // Partition of unity + // Matrix DAiD(n, n); + // for (int i = 0; i < n_inside; i++) { + // std::copy_n(mat_loc.data() + i * n, n_inside, &(DAiD(0, i))); + // } - HPDDM::Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); - lwork = (int)std::real(work[0]); - work.resize(lwork); - HPDDM::Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); + // // Build local eigenvalue problem + // int ldvl = n, ldvr = n, lwork = -1; + // int lda = n, ldb = n; + // std::vector work(n); + // std::vector rwork; + // std::vector index(n, 0); + // std::iota(index.begin(), index.end(), int(0)); + + // // int rankWorld; + // // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + // // DAiD.csv_save("DAiD_" + NbrToStr(rankWorld)); + // // Ki.csv_save("Ki_" + NbrToStr(rankWorld)); + + // if (hpddm_op->HA->get_symmetry_type() == 'S' || hpddm_op->HA->get_symmetry_type() == 'H') { + // char uplo = hpddm_op->HA->get_storage_type(); + // int itype = 1; + // std::vector> w(n); + // if (is_complex()) { + // rwork.resize(3 * n - 2); + // } - for (int i = 0; i != index.size(); i++) { - index[i] = i; - } - std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { - return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15)); - }); - - // if (rankWorld == 0) { - // for (int i = 0; i < index.size(); i++) { - // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; - // } - // std::cout << "\n"; - // // std::cout << vr << "\n"; - // } - // MPI_Barrier(comm); - // if (rankWorld == 1) { - // std::cout << "alphar : " << alphar << "\n"; - // std::cout << "alphai : " << alphai << "\n"; - // std::cout << "beta : " << beta << "\n"; - // std::cout << "info: " << info << "\n"; - // for (int i = 0; i < index.size(); i++) { - // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; - // } - // std::cout << "\n"; - // // std::cout << vr << "\n"; - // } - HPDDM::Option &opt = *HPDDM::Option::get(); - nevi = 0; - double threshold = opt.val("geneo_threshold", -1.0); - if (threshold > 0.0) { - while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > threshold && nevi < index.size())) { - nevi++; - } + // Lapack::gv(&itype, "V", &uplo, &n, DAiD.data(), &lda, Ki.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); + // lwork = (int)std::real(work[0]); + // work.resize(lwork); + // Lapack::gv(&itype, "V", &uplo, &n, DAiD.data(), &lda, Ki.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); + + // std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { + // return (std::abs(w[a]) > std::abs(w[b])); + // }); + + // // if (rankWorld == 0) { + // // for (int i = 0; i < index.size(); i++) { + // // std::cout << std::abs(w[index[i]]) << " "; + // // } + // // std::cout << "\n"; + // // // std::cout << vr << "\n"; + // // } + // // MPI_Barrier(hpddm_op->HA->get_comm()); + // // if (rankWorld == 1) { + // // std::cout << "w : " << w << "\n"; + // // std::cout << "info: " << info << "\n"; + // // for (int i = 0; i < index.size(); i++) { + // // std::cout << std::abs(w[index[i]]) << " "; + // // } + // // std::cout << "\n"; + // // // std::cout << vr << "\n"; + // // } + + // HPDDM::Option &opt = *HPDDM::Option::get(); + // nevi = 0; + // double threshold = opt.val("geneo_threshold", -1.0); + // if (threshold > 0.0) { + // while (std::abs(w[index[nevi]]) > threshold && nevi < index.size()) { + // nevi++; + // } + + // } else { + // nevi = opt.val("geneo_nu", 2); + // } - } else { - nevi = opt.val("geneo_nu", 2); - } + // opt["geneo_nu"] = nevi; + // m_Z = new CoefficientPrecision *[nevi]; + // *m_Z = new CoefficientPrecision[nevi * n]; + // for (int i = 0; i < nevi; i++) { + // m_Z[i] = *m_Z + i * n; + // std::copy_n(DAiD.data() + index[i] * n, n_inside, m_Z[i]); + // for (int j = n_inside; j < n; j++) { - opt["geneo_nu"] = nevi; - m_Z = new CoefficientPrecision *[nevi]; - *m_Z = new CoefficientPrecision[nevi * n]; - for (int i = 0; i < nevi; i++) { - m_Z[i] = *m_Z + i * n; - std::copy_n(vr.data() + index[i] * n, n_inside, m_Z[i]); - for (int j = n_inside; j < n; j++) { + // m_Z[i][j] = 0; + // } + // } + // } else { + // if (is_complex()) { + // rwork.resize(8 * n); + // } + // std::vector alphar(n), alphai((is_complex() ? 0 : n)), beta(n); + // std::vector vl(n * n), vr(n * n); + + // Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); + // lwork = (int)std::real(work[0]); + // work.resize(lwork); + // Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); + + // std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { + // return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15)); + // }); + + // // if (rankWorld == 0) { + // // // for (int i = 0; i < index.size(); i++) { + // // // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; + // // // } + // // // std::cout << "\n"; + // // std::cout << vr << "\n"; + // // } + // // MPI_Barrier(hpddm_op->HA->get_comm()); + // // if (rankWorld == 1) { + // // // std::cout << "alphar : " << alphar << "\n"; + // // // std::cout << "alphai : " << alphai << "\n"; + // // // std::cout << "beta : " << beta << "\n"; + // // // std::cout << "info: " << info << "\n"; + // // // for (int i = 0; i < index.size(); i++) { + // // // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; + // // // } + // // // std::cout << "\n"; + // // std::cout << vr << "\n"; + // // } + + // HPDDM::Option &opt = *HPDDM::Option::get(); + // nevi = 0; + // double threshold = opt.val("geneo_threshold", -1.0); + // if (threshold > 0.0) { + // while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > threshold && nevi < index.size())) { + // nevi++; + // } + + // } else { + // nevi = opt.val("geneo_nu", 2); + // } - m_Z[i][j] = 0; - } - } + // opt["geneo_nu"] = nevi; + // m_Z = new CoefficientPrecision *[nevi]; + // *m_Z = new CoefficientPrecision[nevi * n]; + // for (int i = 0; i < nevi; i++) { + // m_Z[i] = *m_Z + i * n; + // std::copy_n(vr.data() + index[i] * n, n_inside, m_Z[i]); + // for (int j = n_inside; j < n; j++) { - hpddm_op->setVectors(m_Z); + // m_Z[i][j] = 0; + // } + // } + // } - // timing - mytime = MPI_Wtime() - time; - MPI_Barrier(hpddm_op->HA->get_comm()); - time = MPI_Wtime(); - MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - infos["DDM_geev_max"] = NbrToStr(maxtime); + // // Matrix Z_test(n, nevi); + // // Z_test.assign(n, nevi, m_Z[0], false); + // // // int rankWorld; + // // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + // // Z_test.csv_save("test_2_" + NbrToStr(rankWorld)); - // - build_E(m_Z); - } + // hpddm_op->setVectors(m_Z); - void build_E(CoefficientPrecision *const *Z) { - // - int sizeWorld; - MPI_Comm_size(hpddm_op->HA->get_comm(), &sizeWorld); + // // timing + // mytime = MPI_Wtime() - time; + // MPI_Barrier(hpddm_op->HA->get_comm()); + // time = MPI_Wtime(); + // MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); + // infos["DDM_geev_max"] = NbrToStr(maxtime); - // Timing - std::vector mytime(2), maxtime(2); - double time = MPI_Wtime(); + // // + // build_E(m_Z); + // } - // Allgather - std::vector recvcounts(sizeWorld); - std::vector displs(sizeWorld); - MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, hpddm_op->HA->get_comm()); + // void build_E(CoefficientPrecision *const *Z) { + // // + // int sizeWorld; + // MPI_Comm_size(hpddm_op->HA->get_comm(), &sizeWorld); - displs[0] = 0; - for (int i = 1; i < sizeWorld; i++) { - displs[i] = displs[i - 1] + recvcounts[i - 1]; - } + // // Timing + // std::vector mytime(2), maxtime(2); + // double time = MPI_Wtime(); - std::vector E; - build_coarse_space_outside(hpddm_op->HA, nevi, n, Z, E); - size_E = sqrt(E.size()); - mytime[0] = MPI_Wtime() - time; - MPI_Barrier(hpddm_op->HA->get_comm()); - time = MPI_Wtime(); + // // Allgather + // std::vector recvcounts(sizeWorld); + // std::vector displs(sizeWorld); + // MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, hpddm_op->HA->get_comm()); - int rankWorld; - MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); - if (rankWorld == 0) { - Matrix E_mat(size_E, size_E); - E_mat.assign(size_E, size_E, E.data(), false); - } + // displs[0] = 0; + // for (int i = 1; i < sizeWorld; i++) { + // displs[i] = displs[i - 1] + recvcounts[i - 1]; + // } - hpddm_op->buildTwo(MPI_COMM_WORLD, E.data()); + // Matrix E; + // htool::build_geneo_coarse_operator(*hpddm_op->HA, nevi, n, Z, E); + // mytime[0] = MPI_Wtime() - time; - mytime[1] = MPI_Wtime() - time; + // time = MPI_Wtime(); + // hpddm_op->buildTwo(MPI_COMM_WORLD, E.release()); + // mytime[1] = MPI_Wtime() - time; - // Timing - MPI_Reduce(&(mytime[0]), &(maxtime[0]), 2, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); + // // Timing + // MPI_Reduce(&(mytime[0]), &(maxtime[0]), 2, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - infos["DDM_setup_ZtAZ_max"] = NbrToStr(maxtime[0]); - infos["DDM_facto_ZtAZ_max"] = NbrToStr(maxtime[1]); - two_level = 1; - } + // infos["DDM_setup_ZtAZ_max"] = NbrToStr(maxtime[0]); + // infos["DDM_facto_ZtAZ_max"] = NbrToStr(maxtime[1]); + // infos["GenEO_coarse_size"] = NbrToStr(E.nb_cols()); + // two_level = 1; + // } void solve(const CoefficientPrecision *const rhs, CoefficientPrecision *const x, const int &mu = 1) { // Check facto @@ -522,10 +511,9 @@ class DDM { infos["DDM_local_coarse_size_max"] = "0"; infos["DDM_local_coarse_size_min"] = "0"; } else { - infos["GenEO_coarse_size"] = NbrToStr(size_E); - int nevi_mean = nevi; - int nevi_max = nevi; - int nevi_min = nevi; + int nevi_mean = nevi; + int nevi_max = nevi; + int nevi_min = nevi; if (rankWorld == 0) { MPI_Reduce(MPI_IN_PLACE, &(nevi_mean), 1, MPI_INT, MPI_SUM, 0, hpddm_op->HA->get_comm()); @@ -617,9 +605,7 @@ class DDM { int get_local_size() const { return n; } - const std::vector &get_local_to_global_numbering() const { - return renum_to_global; - } + // MPI_Comm get_comm() const { // return comm; // } diff --git a/include/htool/solvers/coarse_space.hpp b/include/htool/solvers/geneo/coarse_operator_builder.hpp similarity index 51% rename from include/htool/solvers/coarse_space.hpp rename to include/htool/solvers/geneo/coarse_operator_builder.hpp index 88019695..3debd55d 100644 --- a/include/htool/solvers/coarse_space.hpp +++ b/include/htool/solvers/geneo/coarse_operator_builder.hpp @@ -1,26 +1,28 @@ -#ifndef HTOOL_COARSE_SPACE_HPP -#define HTOOL_COARSE_SPACE_HPP +#ifndef HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP +#define HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP -#include "../distributed_operator/distributed_operator.hpp" -#include "../misc/misc.hpp" +#include "../../distributed_operator/distributed_operator.hpp" +#include "../../misc/misc.hpp" +#include "../interfaces/virtual_coarse_operator_builder.hpp" #include #include namespace htool { -template -void build_coarse_space_outside(const DistributedOperator *const HA, int nevi, int n, const T *const *Z, std::vector &E) { + +template +void build_geneo_coarse_operator(const DistributedOperator &HA, int nevi, int n, const CoefficientPrecision *const *Z, Matrix &E) { // - MPI_Comm comm = HA->get_comm(); + MPI_Comm comm = HA.get_comm(); int sizeWorld, rankWorld; MPI_Comm_rank(comm, &rankWorld); MPI_Comm_size(comm, &sizeWorld); - int n_inside = HA->get_target_partition().get_size_of_partition(rankWorld); + int n_inside = HA.get_target_partition().get_size_of_partition(rankWorld); // Allgather std::vector recvcounts(sizeWorld); std::vector displs(sizeWorld); - MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA->get_comm()); + MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA.get_comm()); displs[0] = 0; @@ -30,25 +32,26 @@ void build_coarse_space_outside(const DistributedOperator *const HA, int nevi int nevi_max = *std::max_element(recvcounts.begin(), recvcounts.end()); int size_E = std::accumulate(recvcounts.begin(), recvcounts.end(), 0); - E.resize(size_E * size_E, 0); - std::vector evi(nevi * n, 0); + E.resize(size_E, size_E, 0); + std::vector evi(nevi * n, 0); for (int i = 0; i < nevi; i++) { + // std::cout << i << "\n"; std::copy_n(Z[i], n_inside, evi.data() + i * n); } // There should not be any bloc larger than the size of the subdmain, so margin should be useless - // std::pair max_local_size = HA->get_max_size_blocks(); + // std::pair max_local_size = HA.get_max_size_blocks(); // int local_max_size_i = max_local_size.first; // int local_max_size_j = max_local_size.second; // int local_max_size = std::max(local_max_size_i, local_max_size_j); // int margin = local_max_size_j; - // if (HA->get_symmetry_type() != 'N') { + // if (HA.get_symmetry_type() != 'N') { // margin = local_max_size; // } int margin = 0; - std::vector AZ(nevi_max * n_inside, 0); - std::vector vec_ovr(n); + std::vector AZ(nevi_max * n_inside, 0); + std::vector vec_ovr(n); // if (rankWorld == 0) { // for (int i = 0; i < nevi; i++) { @@ -71,7 +74,7 @@ void build_coarse_space_outside(const DistributedOperator *const HA, int nevi for (int i = 0; i < sizeWorld; i++) { if (recvcounts[i] == 0) continue; - std::vector buffer((HA->get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0); + std::vector buffer((HA.get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0); std::fill_n(AZ.data(), recvcounts[i] * n_inside, 0); if (rankWorld == i) { @@ -83,14 +86,14 @@ void build_coarse_space_outside(const DistributedOperator *const HA, int nevi } } // if (rankWorld == 0) { - // // std::cout << buffer.size() << " " << HA->get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n"; + // // std::cout << buffer.size() << " " << HA.get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n"; // std::cout << buffer << "\n"; // } - MPI_Bcast(buffer.data() + margin * recvcounts[i], HA->get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi::mpi_type(), i, HA->get_comm()); - if (HA->get_symmetry_type() == 'H') { + MPI_Bcast(buffer.data() + margin * recvcounts[i], HA.get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi::mpi_type(), i, HA.get_comm()); + if (HA.get_symmetry_type() == 'H') { conj_if_complex(buffer.data(), buffer.size()); } - HA->internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA->get_target_partition().get_offset_of_partition(i), HA->get_target_partition().get_size_of_partition(i)); + HA.internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA.get_target_partition().get_offset_of_partition(i), HA.get_target_partition().get_size_of_partition(i)); // if (rankWorld == 0) { // std::cout << "AZ " << i << "\n"; @@ -98,7 +101,7 @@ void build_coarse_space_outside(const DistributedOperator *const HA, int nevi // } // Removed because complex scalar product afterward - // if (HA->get_symmetry_type() == 'H') { + // if (HA.get_symmetry_type() == 'H') { // conj_if_complex(AZ.data(), AZ.size()); // } @@ -109,18 +112,33 @@ void build_coarse_space_outside(const DistributedOperator *const HA, int nevi // Parce que partition de l'unité... // synchronize(true); for (int jj = 0; jj < nevi; jj++) { - int coord_E_i = displs[i] + j; - int coord_E_j = displs[rankWorld] + jj; - E[coord_E_i + coord_E_j * size_E] = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), T(0), std::plus(), [](T u, T v) { return u * v; }); + int coord_E_i = displs[i] + j; + int coord_E_j = displs[rankWorld] + jj; + E(coord_E_i, coord_E_j) = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), CoefficientPrecision(0), std::plus(), [](CoefficientPrecision u, CoefficientPrecision v) { return u * v; }); } } } if (rankWorld == 0) - MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi::mpi_type(), MPI_SUM, 0, HA->get_comm()); + MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi::mpi_type(), MPI_SUM, 0, HA.get_comm()); else - MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi::mpi_type(), MPI_SUM, 0, HA->get_comm()); + MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi::mpi_type(), MPI_SUM, 0, HA.get_comm()); } +template +class GeneoCoarseOperatorBuilder : public VirtualCoarseOperatorBuilder { + private: + const DistributedOperator &m_distributed_operator; + + public: + GeneoCoarseOperatorBuilder(const DistributedOperator &HA) : m_distributed_operator(HA) {} + + Matrix build_coarse_operator(int n, int nevi, CoefficientPrecision **coarse_space) override { + Matrix coarse_operator; + htool::build_geneo_coarse_operator(m_distributed_operator, nevi, n, coarse_space, coarse_operator); + return coarse_operator; + } +}; + } // namespace htool #endif diff --git a/include/htool/solvers/geneo/coarse_space_builder.hpp b/include/htool/solvers/geneo/coarse_space_builder.hpp new file mode 100644 index 00000000..74229920 --- /dev/null +++ b/include/htool/solvers/geneo/coarse_space_builder.hpp @@ -0,0 +1,155 @@ +#ifndef HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP +#define HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP + +#include "../../basic_types/matrix.hpp" +// #include "../../wrappers/wrapper_hpddm.hpp" +#include "../../wrappers/wrapper_lapack.hpp" +// #include "../hmatrix/hmatrix.hpp" +#include "../interfaces/virtual_coarse_space_builder.hpp" +namespace htool { + +template +class GeneoCoarseSpaceDenseBuilder : public VirtualCoarseSpaceBuilder { + + protected: + int m_size_wo_overlap; + int m_size_with_overlap; + Matrix m_DAiD; + Matrix m_Bi; + char m_symmetry = 'N'; + char m_uplo = 'N'; + int m_geneo_nu = 2; + htool::underlying_type m_geneo_threshold = -1.; + explicit GeneoCoarseSpaceDenseBuilder(int size_wo_overlap, const Matrix &Ai, Matrix &Bi, char symmetry, char uplo, int geneo_nu, htool::underlying_type geneo_threshold) : m_size_wo_overlap(size_wo_overlap), m_size_with_overlap(Ai.nb_cols()), m_DAiD(m_size_with_overlap, m_size_with_overlap), m_Bi(Bi), m_symmetry(symmetry), m_uplo(uplo), m_geneo_nu(geneo_nu), m_geneo_threshold(geneo_threshold) { + for (int i = 0; i < m_size_wo_overlap; i++) { + std::copy_n(Ai.data() + i * m_size_with_overlap, m_size_wo_overlap, &(m_DAiD(0, i))); + } + } + + public: + static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, const Matrix &Ai, Matrix &Bi, char symmetry, char uplo, int geneo_nu) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, geneo_nu, -1}; } + + static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, const Matrix &Ai, Matrix &Bi, char symmetry, char uplo, htool::underlying_type geneo_threshold) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, 0, geneo_threshold}; } + + virtual Matrix build_coarse_space() override { + + int n = m_size_with_overlap; + int ldvl = n, ldvr = n, lwork = -1; + int lda = n, ldb = n; + std::vector work(n); + std::vector rwork; + std::vector index(n, 0); + std::iota(index.begin(), index.end(), int(0)); + int nevi = m_geneo_nu; + int info; + // int rankWorld; + // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + // m_DAiD.csv_save("m_DAiD_" + NbrToStr(rankWorld)); + // std::cout << "?????? " << m_Bi.nb_rows() << " " << m_Bi.nb_cols() << "\n"; + // m_Bi.csv_save("m_Bi_" + NbrToStr(rankWorld)); + // std::cout << "BOUH " << m_geneo_nu << " " << m_geneo_threshold << "\n"; + if (m_symmetry == 'S' || m_symmetry == 'H') { + int itype = 1; + std::vector> w(n); + if (is_complex()) { + rwork.resize(3 * n - 2); + } + + // std::cout << "OUAAAAH\n"; + Lapack::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); + + // std::cout << "OUAAAAH 2\n"; + std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { + return (std::abs(w[a]) > std::abs(w[b])); + }); + if (m_geneo_threshold > 0.0) { + nevi = 0; + while (std::abs(w[index[nevi]]) > m_geneo_threshold && nevi < index.size()) { + nevi++; + } + } + + // if (rankWorld == 0) { + // for (int i = 0; i < index.size(); i++) { + // std::cout << std::abs(w[index[i]]) << " "; + // } + // std::cout << "\n"; + // // std::cout << vr << "\n"; + // } + // MPI_Barrier(MPI_COMM_WORLD); + // if (rankWorld == 1) { + // std::cout << "w : " << w << "\n"; + // std::cout << "info: " << info << "\n"; + // for (int i = 0; i < index.size(); i++) { + // std::cout << std::abs(w[index[i]]) << " "; + // } + // std::cout << "\n"; + // // std::cout << vr << "\n"; + // } + + Matrix Z(n, nevi); + for (int i = 0; i < m_size_wo_overlap; i++) { + for (int j = 0; j < nevi; j++) { + Z(i, j) = m_DAiD(i, index[j]); + } + } + return Z; + } + + if (is_complex()) { + rwork.resize(8 * n); + } + std::vector alphar(n), alphai((is_complex() ? 0 : n)), beta(n); + std::vector vl(n * n), vr(n * n); + + Lapack::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); + + std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { + return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15)); + }); + if (m_geneo_threshold > 0.0) { + nevi = 0; + while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > m_geneo_threshold && nevi < index.size())) { + nevi++; + } + } + + // if (rankWorld == 0) { + // for (int i = 0; i < index.size(); i++) { + // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; + // } + // std::cout << "\n"; + // // std::cout << vr << "\n"; + // } + // MPI_Barrier(MPI_COMM_WORLD); + // if (rankWorld == 1) { + // // std::cout << "alphar : " << alphar << "\n"; + // // std::cout << "alphai : " << alphai << "\n"; + // // std::cout << "beta : " << beta << "\n"; + // // std::cout << "info: " << info << "\n"; + // for (int i = 0; i < index.size(); i++) { + // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; + // } + // std::cout << "\n"; + // // std::cout << vr << "\n"; + // } + + Matrix Z(n, nevi); + for (int i = 0; i < m_size_wo_overlap; i++) { + for (int j = 0; j < nevi; j++) { + Z(i, j) = vr[index[j] * n + i]; + } + } + return Z; + } +}; + +} // namespace htool + +#endif diff --git a/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp b/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp new file mode 100644 index 00000000..33a1c0fc --- /dev/null +++ b/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp @@ -0,0 +1,17 @@ +#ifndef HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP +#define HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP + +#include "../../basic_types/matrix.hpp" + +namespace htool { + +template +class VirtualCoarseOperatorBuilder { + public: + virtual Matrix build_coarse_operator(int nb_rows, int nb_cols, CoefficientPrecision **Z) = 0; + virtual ~VirtualCoarseOperatorBuilder() {} +}; + +} // namespace htool + +#endif diff --git a/include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp b/include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp new file mode 100644 index 00000000..306cb4a9 --- /dev/null +++ b/include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp @@ -0,0 +1,15 @@ +#ifndef HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP +#define HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP + +namespace htool { + +template +class VirtualCoarseSpaceBuilder { + public: + virtual Matrix build_coarse_space() = 0; + virtual ~VirtualCoarseSpaceBuilder() {} +}; + +} // namespace htool + +#endif diff --git a/include/htool/solvers/utility.hpp b/include/htool/solvers/utility.hpp index e7c7280e..5fd12cb7 100644 --- a/include/htool/solvers/utility.hpp +++ b/include/htool/solvers/utility.hpp @@ -105,13 +105,11 @@ class DefaultDDMSolverBuilderAddingOverlap { }; std::vector> m_intersections; - private: - Matrix m_block_diagonal_dense_matrix; - public: + Matrix block_diagonal_dense_matrix; DDM solver; - DefaultDDMSolverBuilderAddingOverlap(DistributedOperator &distributed_operator, const HMatrix *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation &generator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), m_block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {} + DefaultDDMSolverBuilderAddingOverlap(DistributedOperator &distributed_operator, const HMatrix *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation &generator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {} }; template @@ -154,13 +152,11 @@ class DefaultDDMSolverBuilder { return permuted_block_diagonal_dense_matrix_with_overlap; }; - private: - Matrix m_block_diagonal_dense_matrix; - public: + Matrix block_diagonal_dense_matrix; DDM solver; - DefaultDDMSolverBuilder(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix, const std::vector &neighbors, const std::vector> &intersections) : m_block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, intersections) {} + DefaultDDMSolverBuilder(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix, const std::vector &neighbors, const std::vector> &intersections) : block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, intersections) {} }; } // namespace htool diff --git a/include/htool/wrappers/wrapper_lapack.hpp b/include/htool/wrappers/wrapper_lapack.hpp index c6b86d18..dc0ecbf1 100644 --- a/include/htool/wrappers/wrapper_lapack.hpp +++ b/include/htool/wrappers/wrapper_lapack.hpp @@ -9,9 +9,17 @@ # define HTOOL_LAPACK_F77(func) func##_ #endif -#define HTOOL_GENERATE_EXTERN_LAPACK_COMPLEX(C, T, B, U) \ - void HTOOL_LAPACK_F77(B##gesvd)(const char *, const char *, const int *, const int *, U *, const int *, U *, U *, const int *, U *, const int *, U *, const int *, int *); \ - void HTOOL_LAPACK_F77(C##gesvd)(const char *, const char *, const int *, const int *, T *, const int *, U *, T *, const int *, T *, const int *, T *, const int *, U *, int *); +#define HTOOL_GENERATE_EXTERN_LAPACK(C, T, U, SYM, ORT) \ + void HTOOL_LAPACK_F77(C##SYM##gv)(const int *, const char *, const char *, const int *, T *, const int *, T *, const int *, T *, T *, const int *, int *); + +// const int *itype, const char *jobz, const char *UPLO, const int *n, T *A, const int *lda, T *B, const int *ldb, T *W, T *work, const int *lwork, U *, int *info) +#define HTOOL_GENERATE_EXTERN_LAPACK_COMPLEX(C, T, B, U) \ + void HTOOL_LAPACK_F77(B##gesvd)(const char *, const char *, const int *, const int *, U *, const int *, U *, U *, const int *, U *, const int *, U *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##gesvd)(const char *, const char *, const int *, const int *, T *, const int *, U *, T *, const int *, T *, const int *, T *, const int *, U *, int *); \ + void HTOOL_LAPACK_F77(B##ggev)(const char *, const char *, const int *, U *, const int *, U *, const int *, U *, U *, U *, U *, const int *, U *, const int *, U *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##ggev)(const char *, const char *, const int *, T *, const int *, T *, const int *, T *, T *, T *, const int *, T *, const int *, T *, const int *, U *, int *); \ + void HTOOL_LAPACK_F77(B##sygv)(const int *, const char *, const char *, const int *, U *, const int *, U *, const int *, U *, U *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##hegv)(const int *, const char *, const char *, const int *, T *, const int *, T *, const int *, U *, T *, const int *, U *, int *); #if !defined(PETSC_HAVE_BLASLAPACK) # ifndef _MKL_H_ @@ -40,18 +48,44 @@ struct Lapack { /* Function: gesvd * computes the singular value decomposition (SVD). */ static void gesvd(const char *, const char *, const int *, const int *, K *, const int *, underlying_type *, K *, const int *, K *, const int *, K *, const int *, underlying_type *, int *); + /* Function: ggev + * Computes the eigenvalues and (optionally) the eigenvectors of a nonsymmetric generalized eigenvalue problem. */ + static void ggev(const char *, const char *, const int *, K *, const int *, K *, const int *, K *, K *, K *, K *, const int *, K *, const int *, K *, const int *, underlying_type *, int *); + /* Function: gv + * Computes the eigenvalues and (optionally) the eigenvectors of a hermitian/symetric generalized eigenvalue problem. */ + static void gv(const int *, const char *, const char *, const int *, K *, const int *, K *, const int *, underlying_type *, K *, const int *, underlying_type *, int *); }; -# define HTOOL_GENERATE_LAPACK_COMPLEX(C, T, B, U) \ - template <> \ - inline void Lapack::gesvd(const char *jobu, const char *jobvt, const int *m, const int *n, U *a, const int *lda, U *s, U *u, const int *ldu, U *vt, const int *ldvt, U *work, const int *lwork, U *, int *info) { \ - HTOOL_LAPACK_F77(B##gesvd) \ - (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info); \ - } \ - template <> \ - inline void Lapack::gesvd(const char *jobu, const char *jobvt, const int *m, const int *n, T *a, const int *lda, U *s, T *u, const int *ldu, T *vt, const int *ldvt, T *work, const int *lwork, U *rwork, int *info) { \ - HTOOL_LAPACK_F77(C##gesvd) \ - (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info); \ +# define HTOOL_GENERATE_LAPACK_COMPLEX(C, T, B, U) \ + template <> \ + inline void Lapack::gesvd(const char *jobu, const char *jobvt, const int *m, const int *n, U *a, const int *lda, U *s, U *u, const int *ldu, U *vt, const int *ldvt, U *work, const int *lwork, U *, int *info) { \ + HTOOL_LAPACK_F77(B##gesvd) \ + (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::gesvd(const char *jobu, const char *jobvt, const int *m, const int *n, T *a, const int *lda, U *s, T *u, const int *ldu, T *vt, const int *ldvt, T *work, const int *lwork, U *rwork, int *info) { \ + HTOOL_LAPACK_F77(C##gesvd) \ + (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, info); \ + } \ + template <> \ + inline void Lapack::ggev(const char *jobvl, const char *jobvr, const int *n, U *a, const int *lda, U *b, const int *ldb, U *alphar, U *alphai, U *beta, U *vl, const int *ldvl, U *vr, const int *ldvr, U *work, const int *lwork, U *, int *info) { \ + HTOOL_LAPACK_F77(B##ggev) \ + (jobvl, jobvr, n, a, lda, b, ldb, alphar, alphai, beta, vl, ldvl, vr, ldvr, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::ggev(const char *jobvl, const char *jobvr, const int *n, T *a, const int *lda, T *b, const int *ldb, T *alpha, T *, T *beta, T *vl, const int *ldvl, T *vr, const int *ldvr, T *work, const int *lwork, U *rwork, int *info) { \ + HTOOL_LAPACK_F77(C##ggev) \ + (jobvl, jobvr, n, a, lda, b, ldb, alpha, beta, vl, ldvl, vr, ldvr, work, lwork, rwork, info); \ + } \ + template <> \ + inline void Lapack::gv(const int *itype, const char *jobz, const char *uplo, const int *n, U *a, const int *lda, U *b, const int *ldb, U *w, U *work, const int *lwork, U *, int *info) { \ + HTOOL_LAPACK_F77(B##sygv) \ + (itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::gv(const int *itype, const char *jobz, const char *uplo, const int *n, T *a, const int *lda, T *b, const int *ldb, U *w, T *work, const int *lwork, U *rwork, int *info) { \ + HTOOL_LAPACK_F77(C##hegv) \ + (itype, jobz, uplo, n, a, lda, b, ldb, w, work, lwork, rwork, info); \ } HTOOL_GENERATE_LAPACK_COMPLEX(c, std::complex, s, float) diff --git a/tests/functional_tests/solvers/test_solver.cpp b/tests/functional_tests/solvers/test_solver.cpp index d1d51229..f49d5d2c 100644 --- a/tests/functional_tests/solvers/test_solver.cpp +++ b/tests/functional_tests/solvers/test_solver.cpp @@ -16,18 +16,22 @@ int main(int argc, char *argv[]) { bool is_error = false; - for (auto nb_rhs : {1, 10}) { - for (auto symmetry : {'S'}) { - std::string datapath_final = symmetry == 'S' ? datapath + "/output_sym/" : datapath + "/output_non_sym/"; - - is_error = is_error || test_solver_wo_overlap(argc, argv, nb_rhs, symmetry, datapath_final); - is_error = is_error || test_solver_ddm_adding_overlap(argc, argv, nb_rhs, symmetry, datapath_final); - is_error = is_error || test_solver_ddm(argc, argv, nb_rhs, symmetry, datapath_final); + for (auto nb_rhs : {1, 5}) { + for (auto data_symmetry : {'N', 'S'}) { + std::string datapath_final = data_symmetry == 'S' ? datapath + "/output_sym/" : datapath + "/output_non_sym/"; + std::vector symmetries = {'N'}; + if (data_symmetry == 'S') { + symmetries.push_back('S'); + } + for (auto symmetry : symmetries) { + + is_error = is_error || test_solver_wo_overlap(argc, argv, nb_rhs, symmetry, datapath_final); + is_error = is_error || test_solver_ddm_adding_overlap(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + is_error = is_error || test_solver_ddm(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + } } } - // test = test || test_solver_ddm(argc, argv, 1, 'S', true); - // Finalize the MPI environment. MPI_Finalize(); diff --git a/tests/functional_tests/solvers/test_solver_ddm.hpp b/tests/functional_tests/solvers/test_solver_ddm.hpp index 390a90c4..eb18d4c0 100644 --- a/tests/functional_tests/solvers/test_solver_ddm.hpp +++ b/tests/functional_tests/solvers/test_solver_ddm.hpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include #include @@ -7,7 +9,7 @@ using namespace std; using namespace htool; -int test_solver_ddm(int argc, char *argv[], int mu, char symmetric, std::string datapath) { +int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char symmetric, std::string datapath) { // Get the number of processes int size; @@ -94,6 +96,8 @@ int test_solver_ddm(int argc, char *argv[], int mu, char symmetric, std::string LocalNumberingBuilder local_numbering_builder(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections); auto &renumbered_intersections = local_numbering_builder.intersections; auto &local_to_global_numbering = local_numbering_builder.local_to_global_numbering; + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // int local_size_with_overlap = local_to_global_numbering.size(); std::vector geometry(n), local_geometry(local_to_global_numbering.size() * 3); bytes_to_vector(geometry, datapath + "/geometry.bin"); @@ -144,11 +148,17 @@ int test_solver_ddm(int argc, char *argv[], int mu, char symmetric, std::string MPI_Barrier(MPI_COMM_WORLD); opt.parse("-hpddm_schwarz_method asm "); Matrix> Ki; - if (symmetric == 'S' && size > 1) { + + if (data_symmetry == 'S' && size > 1) { opt.remove("geneo_threshold"); opt.parse("-hpddm_geneo_nu 2"); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - ddm_with_overlap.build_coarse_space(Ki); + // ddm_with_overlap.build_coarse_space(Ki); + + auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); + // geneo_coarse_space_dense_builder.set_geneo_nu(2); + GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + ddm_with_overlap.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); } ddm_with_overlap.facto_one_level(); @@ -181,7 +191,7 @@ int test_solver_ddm(int argc, char *argv[], int mu, char symmetric, std::string x_global = 0; // DDM two level ASM with overlap - if (symmetric == 'S' && size > 1) { + if (data_symmetry == 'S' && size > 1) { if (rank == 0) std::cout << "ASM two level with overlap:" << std::endl; MPI_Barrier(MPI_COMM_WORLD); @@ -216,12 +226,16 @@ int test_solver_ddm(int argc, char *argv[], int mu, char symmetric, std::string // DDM solver with threshold if (rank == 0) std::cout << "RAS two level with overlap and threshold:" << std::endl; - opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive -hpddm_geneo_threshold 100"); + opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive"); // DDM> ddm_with_overlap_threshold(Generator, &Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); DefaultDDMSolverBuilder, double> default_ddm_solver_with_threshold(Operator, local_hmatrix, neighbors, renumbered_intersections); DDM> &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; // build_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - ddm_with_overlap_threshold.build_coarse_space(Ki); + // ddm_with_overlap_threshold.build_coarse_space(Ki); + auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); + // geneo_coarse_space_dense_builder.set_geneo_threshold(100); + GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + ddm_with_overlap_threshold.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); ddm_with_overlap_threshold.facto_one_level(); ddm_with_overlap_threshold.solve(f_global.data(), x_global.data(), mu); ddm_with_overlap_threshold.print_infos(); diff --git a/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp b/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp index 84368643..77b01525 100644 --- a/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp +++ b/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp @@ -7,7 +7,7 @@ using namespace std; using namespace htool; -int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char symmetric, std::string datapath) { +int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_symmetry, char symmetric, std::string datapath) { // Get the number of processes int size; @@ -42,13 +42,6 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char symmetri std::cout << "Creating cluster tree" << std::endl; Cluster target_cluster = read_cluster_tree(datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree_properties.csv", datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree.csv"); - // std::vectortab(n); - // std::iota(tab.begin(),tab.end(),int(0)); - // t->build(p,std::vector(n,0),tab,std::vector(n,1),2); - // std::vector permutation_test(n); - // bytes_to_vector(permutation_test,datapath+"permutation.bin"); - // t->save(p,"test_cluster",{0,1,2,3}); - // Matrix if (rank == 0) std::cout << "Creating generators" << std::endl; @@ -130,11 +123,17 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char symmetri MPI_Barrier(MPI_COMM_WORLD); opt.parse("-hpddm_schwarz_method asm "); Matrix> Ki; - if (symmetric == 'S' && size > 1) { + if (data_symmetry == 'S' && size > 1) { opt.remove("geneo_threshold"); opt.parse("-hpddm_geneo_nu 2"); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - ddm_with_overlap.build_coarse_space(Ki); + // ddm_with_overlap.build_coarse_space(Ki); + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // int local_size_with_overlap = ovr_subdomain_to_global.size(); + auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); + // geneo_coarse_space_dense_builder.set_geneo_nu(4); + GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + ddm_with_overlap.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); } ddm_with_overlap.facto_one_level(); @@ -167,7 +166,7 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char symmetri x_global = 0; // DDM two level ASM with overlap - if (symmetric == 'S' && size > 1) { + if (data_symmetry == 'S' && size > 1) { if (rank == 0) std::cout << "ASM two level with overlap:" << std::endl; MPI_Barrier(MPI_COMM_WORLD); @@ -207,7 +206,12 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char symmetri DefaultDDMSolverBuilderAddingOverlap, double> default_ddm_solver_with_threshold(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); DDM> &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; // build_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - ddm_with_overlap_threshold.build_coarse_space(Ki); + // ddm_with_overlap_threshold.build_coarse_space(Ki); + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); + // geneo_coarse_space_dense_builder.set_geneo_threshold(100); + GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + ddm_with_overlap_threshold.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); ddm_with_overlap_threshold.facto_one_level(); ddm_with_overlap_threshold.solve(f_global.data(), x_global.data(), mu); ddm_with_overlap_threshold.print_infos();