Skip to content

Commit

Permalink
add interface for local eigensolver
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreMarchand20 committed Jan 29, 2024
1 parent 715a495 commit e2105b0
Show file tree
Hide file tree
Showing 11 changed files with 563 additions and 314 deletions.
6 changes: 6 additions & 0 deletions include/htool/basic_types/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ class Matrix {
return m_data;
}

T *release() {
T *result = m_data;
m_data = nullptr;
return result;
}

void assign(int number_of_rows, int number_of_cols, T *ptr, bool owning_data) {
if (m_number_of_rows * m_number_of_cols > 0 && m_is_owning_data)
delete[] m_data;
Expand Down
466 changes: 226 additions & 240 deletions include/htool/solvers/ddm.hpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
#ifndef HTOOL_COARSE_SPACE_HPP
#define HTOOL_COARSE_SPACE_HPP
#ifndef HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP
#define HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP

#include "../distributed_operator/distributed_operator.hpp"
#include "../misc/misc.hpp"
#include "../../distributed_operator/distributed_operator.hpp"
#include "../../misc/misc.hpp"
#include "../interfaces/virtual_coarse_operator_builder.hpp"
#include <algorithm>
#include <numeric>

namespace htool {
template <typename T>
void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi, int n, const T *const *Z, std::vector<T> &E) {

template <typename CoefficientPrecision>
void build_geneo_coarse_operator(const DistributedOperator<CoefficientPrecision> &HA, int nevi, int n, const CoefficientPrecision *const *Z, Matrix<CoefficientPrecision> &E) {
//

MPI_Comm comm = HA->get_comm();
MPI_Comm comm = HA.get_comm();
int sizeWorld, rankWorld;
MPI_Comm_rank(comm, &rankWorld);
MPI_Comm_size(comm, &sizeWorld);
int n_inside = HA->get_target_partition().get_size_of_partition(rankWorld);
int n_inside = HA.get_target_partition().get_size_of_partition(rankWorld);

// Allgather
std::vector<int> recvcounts(sizeWorld);
std::vector<int> displs(sizeWorld);
MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA->get_comm());
MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA.get_comm());

displs[0] = 0;

Expand All @@ -30,25 +32,26 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi

int nevi_max = *std::max_element(recvcounts.begin(), recvcounts.end());
int size_E = std::accumulate(recvcounts.begin(), recvcounts.end(), 0);
E.resize(size_E * size_E, 0);
std::vector<T> evi(nevi * n, 0);
E.resize(size_E, size_E, 0);
std::vector<CoefficientPrecision> evi(nevi * n, 0);
for (int i = 0; i < nevi; i++) {
// std::cout << i << "\n";
std::copy_n(Z[i], n_inside, evi.data() + i * n);
}

// There should not be any bloc larger than the size of the subdmain, so margin should be useless
// std::pair<int, int> max_local_size = HA->get_max_size_blocks();
// std::pair<int, int> max_local_size = HA.get_max_size_blocks();
// int local_max_size_i = max_local_size.first;
// int local_max_size_j = max_local_size.second;
// int local_max_size = std::max(local_max_size_i, local_max_size_j);
// int margin = local_max_size_j;
// if (HA->get_symmetry_type() != 'N') {
// if (HA.get_symmetry_type() != 'N') {
// margin = local_max_size;
// }
int margin = 0;

std::vector<T> AZ(nevi_max * n_inside, 0);
std::vector<T> vec_ovr(n);
std::vector<CoefficientPrecision> AZ(nevi_max * n_inside, 0);
std::vector<CoefficientPrecision> vec_ovr(n);

// if (rankWorld == 0) {
// for (int i = 0; i < nevi; i++) {
Expand All @@ -71,7 +74,7 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
for (int i = 0; i < sizeWorld; i++) {
if (recvcounts[i] == 0)
continue;
std::vector<T> buffer((HA->get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0);
std::vector<CoefficientPrecision> buffer((HA.get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0);
std::fill_n(AZ.data(), recvcounts[i] * n_inside, 0);

if (rankWorld == i) {
Expand All @@ -83,22 +86,22 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
}
}
// if (rankWorld == 0) {
// // std::cout << buffer.size() << " " << HA->get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n";
// // std::cout << buffer.size() << " " << HA.get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n";
// std::cout << buffer << "\n";
// }
MPI_Bcast(buffer.data() + margin * recvcounts[i], HA->get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi<T>::mpi_type(), i, HA->get_comm());
if (HA->get_symmetry_type() == 'H') {
MPI_Bcast(buffer.data() + margin * recvcounts[i], HA.get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi<CoefficientPrecision>::mpi_type(), i, HA.get_comm());
if (HA.get_symmetry_type() == 'H') {
conj_if_complex(buffer.data(), buffer.size());
}
HA->internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA->get_target_partition().get_offset_of_partition(i), HA->get_target_partition().get_size_of_partition(i));
HA.internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA.get_target_partition().get_offset_of_partition(i), HA.get_target_partition().get_size_of_partition(i));

// if (rankWorld == 0) {
// std::cout << "AZ " << i << "\n";
// std::cout << AZ << "\n";
// }

// Removed because complex scalar product afterward
// if (HA->get_symmetry_type() == 'H') {
// if (HA.get_symmetry_type() == 'H') {
// conj_if_complex(AZ.data(), AZ.size());
// }

Expand All @@ -109,18 +112,33 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
// Parce que partition de l'unité...
// synchronize(true);
for (int jj = 0; jj < nevi; jj++) {
int coord_E_i = displs[i] + j;
int coord_E_j = displs[rankWorld] + jj;
E[coord_E_i + coord_E_j * size_E] = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), T(0), std::plus<T>(), [](T u, T v) { return u * v; });
int coord_E_i = displs[i] + j;
int coord_E_j = displs[rankWorld] + jj;
E(coord_E_i, coord_E_j) = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), CoefficientPrecision(0), std::plus<CoefficientPrecision>(), [](CoefficientPrecision u, CoefficientPrecision v) { return u * v; });
}
}
}

if (rankWorld == 0)
MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi<T>::mpi_type(), MPI_SUM, 0, HA->get_comm());
MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi<CoefficientPrecision>::mpi_type(), MPI_SUM, 0, HA.get_comm());
else
MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi<T>::mpi_type(), MPI_SUM, 0, HA->get_comm());
MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi<CoefficientPrecision>::mpi_type(), MPI_SUM, 0, HA.get_comm());
}

template <typename CoefficientPrecision>
class GeneoCoarseOperatorBuilder : public VirtualCoarseOperatorBuilder<CoefficientPrecision> {
private:
const DistributedOperator<CoefficientPrecision> &m_distributed_operator;

public:
GeneoCoarseOperatorBuilder(const DistributedOperator<CoefficientPrecision> &HA) : m_distributed_operator(HA) {}

Matrix<CoefficientPrecision> build_coarse_operator(int n, int nevi, CoefficientPrecision **coarse_space) override {
Matrix<CoefficientPrecision> coarse_operator;
htool::build_geneo_coarse_operator(m_distributed_operator, nevi, n, coarse_space, coarse_operator);
return coarse_operator;
}
};

} // namespace htool
#endif
155 changes: 155 additions & 0 deletions include/htool/solvers/geneo/coarse_space_builder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#ifndef HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP
#define HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP

#include "../../basic_types/matrix.hpp"
// #include "../../wrappers/wrapper_hpddm.hpp"
#include "../../wrappers/wrapper_lapack.hpp"
// #include "../hmatrix/hmatrix.hpp"
#include "../interfaces/virtual_coarse_space_builder.hpp"
namespace htool {

template <typename CoefficientPrecision>
class GeneoCoarseSpaceDenseBuilder : public VirtualCoarseSpaceBuilder<CoefficientPrecision> {

protected:
int m_size_wo_overlap;
int m_size_with_overlap;
Matrix<CoefficientPrecision> m_DAiD;
Matrix<CoefficientPrecision> m_Bi;
char m_symmetry = 'N';
char m_uplo = 'N';
int m_geneo_nu = 2;
htool::underlying_type<CoefficientPrecision> m_geneo_threshold = -1.;
explicit GeneoCoarseSpaceDenseBuilder(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu, htool::underlying_type<CoefficientPrecision> geneo_threshold) : m_size_wo_overlap(size_wo_overlap), m_size_with_overlap(Ai.nb_cols()), m_DAiD(m_size_with_overlap, m_size_with_overlap), m_Bi(Bi), m_symmetry(symmetry), m_uplo(uplo), m_geneo_nu(geneo_nu), m_geneo_threshold(geneo_threshold) {
for (int i = 0; i < m_size_wo_overlap; i++) {
std::copy_n(Ai.data() + i * m_size_with_overlap, m_size_wo_overlap, &(m_DAiD(0, i)));
}
}

public:
static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, geneo_nu, -1}; }

static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, htool::underlying_type<CoefficientPrecision> geneo_threshold) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, 0, geneo_threshold}; }

virtual Matrix<CoefficientPrecision> build_coarse_space() override {

int n = m_size_with_overlap;
int ldvl = n, ldvr = n, lwork = -1;
int lda = n, ldb = n;
std::vector<CoefficientPrecision> work(n);
std::vector<double> rwork;
std::vector<int> index(n, 0);
std::iota(index.begin(), index.end(), int(0));
int nevi = m_geneo_nu;
int info;
// int rankWorld;
// MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld);
// m_DAiD.csv_save("m_DAiD_" + NbrToStr(rankWorld));
// std::cout << "?????? " << m_Bi.nb_rows() << " " << m_Bi.nb_cols() << "\n";
// m_Bi.csv_save("m_Bi_" + NbrToStr(rankWorld));
// std::cout << "BOUH " << m_geneo_nu << " " << m_geneo_threshold << "\n";
if (m_symmetry == 'S' || m_symmetry == 'H') {
int itype = 1;
std::vector<underlying_type<CoefficientPrecision>> w(n);
if (is_complex<CoefficientPrecision>()) {
rwork.resize(3 * n - 2);
}

// std::cout << "OUAAAAH\n";
Lapack<CoefficientPrecision>::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info);
lwork = (int)std::real(work[0]);
work.resize(lwork);
Lapack<CoefficientPrecision>::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info);

// std::cout << "OUAAAAH 2\n";
std::sort(index.begin(), index.end(), [&](const int &a, const int &b) {
return (std::abs(w[a]) > std::abs(w[b]));
});
if (m_geneo_threshold > 0.0) {
nevi = 0;
while (std::abs(w[index[nevi]]) > m_geneo_threshold && nevi < index.size()) {
nevi++;
}
}

// if (rankWorld == 0) {
// for (int i = 0; i < index.size(); i++) {
// std::cout << std::abs(w[index[i]]) << " ";
// }
// std::cout << "\n";
// // std::cout << vr << "\n";
// }
// MPI_Barrier(MPI_COMM_WORLD);
// if (rankWorld == 1) {
// std::cout << "w : " << w << "\n";
// std::cout << "info: " << info << "\n";
// for (int i = 0; i < index.size(); i++) {
// std::cout << std::abs(w[index[i]]) << " ";
// }
// std::cout << "\n";
// // std::cout << vr << "\n";
// }

Matrix<CoefficientPrecision> Z(n, nevi);
for (int i = 0; i < m_size_wo_overlap; i++) {
for (int j = 0; j < nevi; j++) {
Z(i, j) = m_DAiD(i, index[j]);
}
}
return Z;
}

if (is_complex<CoefficientPrecision>()) {
rwork.resize(8 * n);
}
std::vector<CoefficientPrecision> alphar(n), alphai((is_complex<CoefficientPrecision>() ? 0 : n)), beta(n);
std::vector<CoefficientPrecision> vl(n * n), vr(n * n);

Lapack<CoefficientPrecision>::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info);
lwork = (int)std::real(work[0]);
work.resize(lwork);
Lapack<CoefficientPrecision>::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info);

std::sort(index.begin(), index.end(), [&](const int &a, const int &b) {
return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15));
});
if (m_geneo_threshold > 0.0) {
nevi = 0;
while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > m_geneo_threshold && nevi < index.size())) {
nevi++;
}
}

// if (rankWorld == 0) {
// for (int i = 0; i < index.size(); i++) {
// std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " ";
// }
// std::cout << "\n";
// // std::cout << vr << "\n";
// }
// MPI_Barrier(MPI_COMM_WORLD);
// if (rankWorld == 1) {
// // std::cout << "alphar : " << alphar << "\n";
// // std::cout << "alphai : " << alphai << "\n";
// // std::cout << "beta : " << beta << "\n";
// // std::cout << "info: " << info << "\n";
// for (int i = 0; i < index.size(); i++) {
// std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " ";
// }
// std::cout << "\n";
// // std::cout << vr << "\n";
// }

Matrix<CoefficientPrecision> Z(n, nevi);
for (int i = 0; i < m_size_wo_overlap; i++) {
for (int j = 0; j < nevi; j++) {
Z(i, j) = vr[index[j] * n + i];
}
}
return Z;
}
};

} // namespace htool

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP
#define HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP

#include "../../basic_types/matrix.hpp"

namespace htool {

template <typename CoefficientPrecision>
class VirtualCoarseOperatorBuilder {
public:
virtual Matrix<CoefficientPrecision> build_coarse_operator(int nb_rows, int nb_cols, CoefficientPrecision **Z) = 0;
virtual ~VirtualCoarseOperatorBuilder() {}
};

} // namespace htool

#endif
15 changes: 15 additions & 0 deletions include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP
#define HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP

namespace htool {

template <typename CoefficientPrecision>
class VirtualCoarseSpaceBuilder {
public:
virtual Matrix<CoefficientPrecision> build_coarse_space() = 0;
virtual ~VirtualCoarseSpaceBuilder() {}
};

} // namespace htool

#endif
12 changes: 4 additions & 8 deletions include/htool/solvers/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,11 @@ class DefaultDDMSolverBuilderAddingOverlap {
};
std::vector<std::vector<int>> m_intersections;

private:
Matrix<CoefficientPrecision> m_block_diagonal_dense_matrix;

public:
Matrix<CoefficientPrecision> block_diagonal_dense_matrix;
DDM<CoefficientPrecision> solver;

DefaultDDMSolverBuilderAddingOverlap(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation<CoefficientPrecision> &generator, const std::vector<int> &ovr_subdomain_to_global, const std::vector<int> &cluster_to_ovr_subdomain, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), m_block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {}
DefaultDDMSolverBuilderAddingOverlap(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation<CoefficientPrecision> &generator, const std::vector<int> &ovr_subdomain_to_global, const std::vector<int> &cluster_to_ovr_subdomain, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {}
};

template <typename CoefficientPrecision, typename CoordinatePrecision>
Expand Down Expand Up @@ -154,13 +152,11 @@ class DefaultDDMSolverBuilder {
return permuted_block_diagonal_dense_matrix_with_overlap;
};

private:
Matrix<CoefficientPrecision> m_block_diagonal_dense_matrix;

public:
Matrix<CoefficientPrecision> block_diagonal_dense_matrix;
DDM<CoefficientPrecision> solver;

DefaultDDMSolverBuilder(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> &block_diagonal_hmatrix, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, intersections) {}
DefaultDDMSolverBuilder(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> &block_diagonal_hmatrix, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, intersections) {}
};

} // namespace htool
Expand Down
Loading

0 comments on commit e2105b0

Please sign in to comment.