add interface for local eigensolver

htool-ddm · Jan 29, 2024 · e2105b0 · e2105b0
1 parent 715a495
commit e2105b0
Show file tree

Hide file tree

Showing 11 changed files with 563 additions and 314 deletions.
diff --git a/include/htool/basic_types/matrix.hpp b/include/htool/basic_types/matrix.hpp
@@ -109,6 +109,12 @@ class Matrix {
         return m_data;
     }
 
+    T *release() {
+        T *result = m_data;
+        m_data    = nullptr;
+        return result;
+    }
+
     void assign(int number_of_rows, int number_of_cols, T *ptr, bool owning_data) {
         if (m_number_of_rows * m_number_of_cols > 0 && m_is_owning_data)
             delete[] m_data;

diff --git a/include/htool/solvers/ddm.hpp b/include/htool/solvers/ddm.hpp
diff --git a/include/htool/solvers/coarse_space.hpp → ...solvers/geneo/coarse_operator_builder.hpp b/include/htool/solvers/coarse_space.hpp → ...solvers/geneo/coarse_operator_builder.hpp
@@ -1,26 +1,28 @@
-#ifndef HTOOL_COARSE_SPACE_HPP
-#define HTOOL_COARSE_SPACE_HPP
+#ifndef HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP
+#define HTOOL_GENEO_COARSE_OPERATOR_BUILDER_HPP
 
-#include "../distributed_operator/distributed_operator.hpp"
-#include "../misc/misc.hpp"
+#include "../../distributed_operator/distributed_operator.hpp"
+#include "../../misc/misc.hpp"
+#include "../interfaces/virtual_coarse_operator_builder.hpp"
 #include <algorithm>
 #include <numeric>
 
 namespace htool {
-template <typename T>
-void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi, int n, const T *const *Z, std::vector<T> &E) {
+
+template <typename CoefficientPrecision>
+void build_geneo_coarse_operator(const DistributedOperator<CoefficientPrecision> &HA, int nevi, int n, const CoefficientPrecision *const *Z, Matrix<CoefficientPrecision> &E) {
     //
 
-    MPI_Comm comm = HA->get_comm();
+    MPI_Comm comm = HA.get_comm();
     int sizeWorld, rankWorld;
     MPI_Comm_rank(comm, &rankWorld);
     MPI_Comm_size(comm, &sizeWorld);
-    int n_inside = HA->get_target_partition().get_size_of_partition(rankWorld);
+    int n_inside = HA.get_target_partition().get_size_of_partition(rankWorld);
 
     // Allgather
     std::vector<int> recvcounts(sizeWorld);
     std::vector<int> displs(sizeWorld);
-    MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA->get_comm());
+    MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, HA.get_comm());
 
     displs[0] = 0;
 
@@ -30,25 +32,26 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
 
     int nevi_max = *std::max_element(recvcounts.begin(), recvcounts.end());
     int size_E   = std::accumulate(recvcounts.begin(), recvcounts.end(), 0);
-    E.resize(size_E * size_E, 0);
-    std::vector<T> evi(nevi * n, 0);
+    E.resize(size_E, size_E, 0);
+    std::vector<CoefficientPrecision> evi(nevi * n, 0);
     for (int i = 0; i < nevi; i++) {
+        // std::cout << i << "\n";
         std::copy_n(Z[i], n_inside, evi.data() + i * n);
     }
 
     // There should not be any bloc larger than the size of the subdmain, so margin should be useless
-    // std::pair<int, int> max_local_size = HA->get_max_size_blocks();
+    // std::pair<int, int> max_local_size = HA.get_max_size_blocks();
     // int local_max_size_i               = max_local_size.first;
     // int local_max_size_j               = max_local_size.second;
     // int local_max_size                 = std::max(local_max_size_i, local_max_size_j);
     // int margin                         = local_max_size_j;
-    // if (HA->get_symmetry_type() != 'N') {
+    // if (HA.get_symmetry_type() != 'N') {
     //     margin = local_max_size;
     // }
     int margin = 0;
 
-    std::vector<T> AZ(nevi_max * n_inside, 0);
-    std::vector<T> vec_ovr(n);
+    std::vector<CoefficientPrecision> AZ(nevi_max * n_inside, 0);
+    std::vector<CoefficientPrecision> vec_ovr(n);
 
     // if (rankWorld == 0) {
     //     for (int i = 0; i < nevi; i++) {
@@ -71,7 +74,7 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
     for (int i = 0; i < sizeWorld; i++) {
         if (recvcounts[i] == 0)
             continue;
-        std::vector<T> buffer((HA->get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0);
+        std::vector<CoefficientPrecision> buffer((HA.get_target_partition().get_size_of_partition(i) + 2 * margin) * recvcounts[i], 0);
         std::fill_n(AZ.data(), recvcounts[i] * n_inside, 0);
 
         if (rankWorld == i) {
@@ -83,22 +86,22 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
             }
         }
         // if (rankWorld == 0) {
-        //     // std::cout << buffer.size() << " " << HA->get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n";
+        //     // std::cout << buffer.size() << " " << HA.get_target_partition()->get_size_of_partition(i) << " " << recvcounts[i] << "\n";
         //     std::cout << buffer << "\n";
         // }
-        MPI_Bcast(buffer.data() + margin * recvcounts[i], HA->get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi<T>::mpi_type(), i, HA->get_comm());
-        if (HA->get_symmetry_type() == 'H') {
+        MPI_Bcast(buffer.data() + margin * recvcounts[i], HA.get_target_partition().get_size_of_partition(i) * recvcounts[i], wrapper_mpi<CoefficientPrecision>::mpi_type(), i, HA.get_comm());
+        if (HA.get_symmetry_type() == 'H') {
             conj_if_complex(buffer.data(), buffer.size());
         }
-        HA->internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA->get_target_partition().get_offset_of_partition(i), HA->get_target_partition().get_size_of_partition(i));
+        HA.internal_sub_matrix_product_to_local(buffer.data(), AZ.data(), recvcounts[i], HA.get_target_partition().get_offset_of_partition(i), HA.get_target_partition().get_size_of_partition(i));
 
         // if (rankWorld == 0) {
         //     std::cout << "AZ " << i << "\n";
         //     std::cout << AZ << "\n";
         // }
 
         // Removed because complex scalar product afterward
-        // if (HA->get_symmetry_type() == 'H') {
+        // if (HA.get_symmetry_type() == 'H') {
         //     conj_if_complex(AZ.data(), AZ.size());
         // }
 
@@ -109,18 +112,33 @@ void build_coarse_space_outside(const DistributedOperator<T> *const HA, int nevi
             // Parce que partition de l'unité...
             // synchronize(true);
             for (int jj = 0; jj < nevi; jj++) {
-                int coord_E_i                     = displs[i] + j;
-                int coord_E_j                     = displs[rankWorld] + jj;
-                E[coord_E_i + coord_E_j * size_E] = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), T(0), std::plus<T>(), [](T u, T v) { return u * v; });
+                int coord_E_i           = displs[i] + j;
+                int coord_E_j           = displs[rankWorld] + jj;
+                E(coord_E_i, coord_E_j) = std::inner_product(evi.data() + jj * n, evi.data() + jj * n + n_inside, vec_ovr.data(), CoefficientPrecision(0), std::plus<CoefficientPrecision>(), [](CoefficientPrecision u, CoefficientPrecision v) { return u * v; });
             }
         }
     }
 
     if (rankWorld == 0)
-        MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi<T>::mpi_type(), MPI_SUM, 0, HA->get_comm());
+        MPI_Reduce(MPI_IN_PLACE, E.data(), size_E * size_E, wrapper_mpi<CoefficientPrecision>::mpi_type(), MPI_SUM, 0, HA.get_comm());
     else
-        MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi<T>::mpi_type(), MPI_SUM, 0, HA->get_comm());
+        MPI_Reduce(E.data(), E.data(), size_E * size_E, wrapper_mpi<CoefficientPrecision>::mpi_type(), MPI_SUM, 0, HA.get_comm());
 }
 
+template <typename CoefficientPrecision>
+class GeneoCoarseOperatorBuilder : public VirtualCoarseOperatorBuilder<CoefficientPrecision> {
+  private:
+    const DistributedOperator<CoefficientPrecision> &m_distributed_operator;
+
+  public:
+    GeneoCoarseOperatorBuilder(const DistributedOperator<CoefficientPrecision> &HA) : m_distributed_operator(HA) {}
+
+    Matrix<CoefficientPrecision> build_coarse_operator(int n, int nevi, CoefficientPrecision **coarse_space) override {
+        Matrix<CoefficientPrecision> coarse_operator;
+        htool::build_geneo_coarse_operator(m_distributed_operator, nevi, n, coarse_space, coarse_operator);
+        return coarse_operator;
+    }
+};
+
 } // namespace htool
 #endif
diff --git a/include/htool/solvers/geneo/coarse_space_builder.hpp b/include/htool/solvers/geneo/coarse_space_builder.hpp
@@ -0,0 +1,155 @@
+#ifndef HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP
+#define HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP
+
+#include "../../basic_types/matrix.hpp"
+// #include "../../wrappers/wrapper_hpddm.hpp"
+#include "../../wrappers/wrapper_lapack.hpp"
+// #include "../hmatrix/hmatrix.hpp"
+#include "../interfaces/virtual_coarse_space_builder.hpp"
+namespace htool {
+
+template <typename CoefficientPrecision>
+class GeneoCoarseSpaceDenseBuilder : public VirtualCoarseSpaceBuilder<CoefficientPrecision> {
+
+  protected:
+    int m_size_wo_overlap;
+    int m_size_with_overlap;
+    Matrix<CoefficientPrecision> m_DAiD;
+    Matrix<CoefficientPrecision> m_Bi;
+    char m_symmetry                                                = 'N';
+    char m_uplo                                                    = 'N';
+    int m_geneo_nu                                                 = 2;
+    htool::underlying_type<CoefficientPrecision> m_geneo_threshold = -1.;
+    explicit GeneoCoarseSpaceDenseBuilder(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu, htool::underlying_type<CoefficientPrecision> geneo_threshold) : m_size_wo_overlap(size_wo_overlap), m_size_with_overlap(Ai.nb_cols()), m_DAiD(m_size_with_overlap, m_size_with_overlap), m_Bi(Bi), m_symmetry(symmetry), m_uplo(uplo), m_geneo_nu(geneo_nu), m_geneo_threshold(geneo_threshold) {
+        for (int i = 0; i < m_size_wo_overlap; i++) {
+            std::copy_n(Ai.data() + i * m_size_with_overlap, m_size_wo_overlap, &(m_DAiD(0, i)));
+        }
+    }
+
+  public:
+    static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, geneo_nu, -1}; }
+
+    static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, htool::underlying_type<CoefficientPrecision> geneo_threshold) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, 0, geneo_threshold}; }
+
+    virtual Matrix<CoefficientPrecision> build_coarse_space() override {
+
+        int n    = m_size_with_overlap;
+        int ldvl = n, ldvr = n, lwork = -1;
+        int lda = n, ldb = n;
+        std::vector<CoefficientPrecision> work(n);
+        std::vector<double> rwork;
+        std::vector<int> index(n, 0);
+        std::iota(index.begin(), index.end(), int(0));
+        int nevi = m_geneo_nu;
+        int info;
+        // int rankWorld;
+        // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld);
+        // m_DAiD.csv_save("m_DAiD_" + NbrToStr(rankWorld));
+        // std::cout << "?????? " << m_Bi.nb_rows() << " " << m_Bi.nb_cols() << "\n";
+        // m_Bi.csv_save("m_Bi_" + NbrToStr(rankWorld));
+        // std::cout << "BOUH " << m_geneo_nu << " " << m_geneo_threshold << "\n";
+        if (m_symmetry == 'S' || m_symmetry == 'H') {
+            int itype = 1;
+            std::vector<underlying_type<CoefficientPrecision>> w(n);
+            if (is_complex<CoefficientPrecision>()) {
+                rwork.resize(3 * n - 2);
+            }
+
+            // std::cout << "OUAAAAH\n";
+            Lapack<CoefficientPrecision>::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info);
+            lwork = (int)std::real(work[0]);
+            work.resize(lwork);
+            Lapack<CoefficientPrecision>::gv(&itype, "V", &m_uplo, &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info);
+
+            // std::cout << "OUAAAAH 2\n";
+            std::sort(index.begin(), index.end(), [&](const int &a, const int &b) {
+                return (std::abs(w[a]) > std::abs(w[b]));
+            });
+            if (m_geneo_threshold > 0.0) {
+                nevi = 0;
+                while (std::abs(w[index[nevi]]) > m_geneo_threshold && nevi < index.size()) {
+                    nevi++;
+                }
+            }
+
+            // if (rankWorld == 0) {
+            //     for (int i = 0; i < index.size(); i++) {
+            //         std::cout << std::abs(w[index[i]]) << " ";
+            //     }
+            //     std::cout << "\n";
+            //     // std::cout << vr << "\n";
+            // }
+            // MPI_Barrier(MPI_COMM_WORLD);
+            // if (rankWorld == 1) {
+            //     std::cout << "w : " << w << "\n";
+            //     std::cout << "info: " << info << "\n";
+            //     for (int i = 0; i < index.size(); i++) {
+            //         std::cout << std::abs(w[index[i]]) << " ";
+            //     }
+            //     std::cout << "\n";
+            //     // std::cout << vr << "\n";
+            // }
+
+            Matrix<CoefficientPrecision> Z(n, nevi);
+            for (int i = 0; i < m_size_wo_overlap; i++) {
+                for (int j = 0; j < nevi; j++) {
+                    Z(i, j) = m_DAiD(i, index[j]);
+                }
+            }
+            return Z;
+        }
+
+        if (is_complex<CoefficientPrecision>()) {
+            rwork.resize(8 * n);
+        }
+        std::vector<CoefficientPrecision> alphar(n), alphai((is_complex<CoefficientPrecision>() ? 0 : n)), beta(n);
+        std::vector<CoefficientPrecision> vl(n * n), vr(n * n);
+
+        Lapack<CoefficientPrecision>::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info);
+        lwork = (int)std::real(work[0]);
+        work.resize(lwork);
+        Lapack<CoefficientPrecision>::ggev("N", "V", &n, m_DAiD.data(), &lda, m_Bi.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info);
+
+        std::sort(index.begin(), index.end(), [&](const int &a, const int &b) {
+            return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15));
+        });
+        if (m_geneo_threshold > 0.0) {
+            nevi = 0;
+            while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > m_geneo_threshold && nevi < index.size())) {
+                nevi++;
+            }
+        }
+
+        // if (rankWorld == 0) {
+        //     for (int i = 0; i < index.size(); i++) {
+        //         std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " ";
+        //     }
+        //     std::cout << "\n";
+        //     // std::cout << vr << "\n";
+        // }
+        // MPI_Barrier(MPI_COMM_WORLD);
+        // if (rankWorld == 1) {
+        //     // std::cout << "alphar : " << alphar << "\n";
+        //     // std::cout << "alphai : " << alphai << "\n";
+        //     // std::cout << "beta : " << beta << "\n";
+        //     // std::cout << "info: " << info << "\n";
+        //     for (int i = 0; i < index.size(); i++) {
+        //         std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " ";
+        //     }
+        //     std::cout << "\n";
+        //     // std::cout << vr << "\n";
+        // }
+
+        Matrix<CoefficientPrecision> Z(n, nevi);
+        for (int i = 0; i < m_size_wo_overlap; i++) {
+            for (int j = 0; j < nevi; j++) {
+                Z(i, j) = vr[index[j] * n + i];
+            }
+        }
+        return Z;
+    }
+};
+
+} // namespace htool
+
+#endif
diff --git a/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp b/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp
@@ -0,0 +1,17 @@
+#ifndef HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP
+#define HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP
+
+#include "../../basic_types/matrix.hpp"
+
+namespace htool {
+
+template <typename CoefficientPrecision>
+class VirtualCoarseOperatorBuilder {
+  public:
+    virtual Matrix<CoefficientPrecision> build_coarse_operator(int nb_rows, int nb_cols, CoefficientPrecision **Z) = 0;
+    virtual ~VirtualCoarseOperatorBuilder() {}
+};
+
+} // namespace htool
+
+#endif
diff --git a/include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp b/include/htool/solvers/interfaces/virtual_coarse_space_builder.hpp
@@ -0,0 +1,15 @@
+#ifndef HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP
+#define HTOOL_VIRTUAL_COARSE_SPACE_BUILDER_HPP
+
+namespace htool {
+
+template <typename CoefficientPrecision>
+class VirtualCoarseSpaceBuilder {
+  public:
+    virtual Matrix<CoefficientPrecision> build_coarse_space() = 0;
+    virtual ~VirtualCoarseSpaceBuilder() {}
+};
+
+} // namespace htool
+
+#endif
diff --git a/include/htool/solvers/utility.hpp b/include/htool/solvers/utility.hpp
@@ -105,13 +105,11 @@ class DefaultDDMSolverBuilderAddingOverlap {
     };
     std::vector<std::vector<int>> m_intersections;
 
-  private:
-    Matrix<CoefficientPrecision> m_block_diagonal_dense_matrix;
-
   public:
+    Matrix<CoefficientPrecision> block_diagonal_dense_matrix;
     DDM<CoefficientPrecision> solver;
 
-    DefaultDDMSolverBuilderAddingOverlap(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation<CoefficientPrecision> &generator, const std::vector<int> &ovr_subdomain_to_global, const std::vector<int> &cluster_to_ovr_subdomain, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), m_block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {}
+    DefaultDDMSolverBuilderAddingOverlap(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> *block_diagonal_hmatrix, const VirtualGeneratorWithPermutation<CoefficientPrecision> &generator, const std::vector<int> &ovr_subdomain_to_global, const std::vector<int> &cluster_to_ovr_subdomain, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {}
 };
 
 template <typename CoefficientPrecision, typename CoordinatePrecision>
@@ -154,13 +152,11 @@ class DefaultDDMSolverBuilder {
         return permuted_block_diagonal_dense_matrix_with_overlap;
     };
 
-  private:
-    Matrix<CoefficientPrecision> m_block_diagonal_dense_matrix;
-
   public:
+    Matrix<CoefficientPrecision> block_diagonal_dense_matrix;
     DDM<CoefficientPrecision> solver;
 
-    DefaultDDMSolverBuilder(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> &block_diagonal_hmatrix, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : m_block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, m_block_diagonal_dense_matrix, neighbors, intersections) {}
+    DefaultDDMSolverBuilder(DistributedOperator<CoefficientPrecision> &distributed_operator, const HMatrix<CoefficientPrecision, CoordinatePrecision> &block_diagonal_hmatrix, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) : block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, intersections) {}
 };
 
 } // namespace htool