diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f57ae6ef..a229c146 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -132,28 +132,31 @@ jobs: # ref: 14d013835eceb940b9b51dd90462fe67834db777 fetch-depth: 0 # to get tags - name: Checkout hpddm - uses: actions/checkout@v3 - with: - path: "hpddm" - repository: hpddm/hpddm - ref: afb9562c9c6673d9ff760985b104ef8d4caa3621 + run: | + git clone https://github.com/hpddm/hpddm.git hpddm + # cd hpddm & git checkout afb9562c9c6673d9ff760985b104ef8d4caa3621 + # uses: actions/checkout@v3 + # with: + # path: "hpddm" + # repository: hpddm/hpddm + # ref: afb9562c9c6673d9ff760985b104ef8d4caa3621 - name: Build tests run: | cd htool && mkdir build && cd build cmake -DHTOOL_WITH_EXAMPLES=1 -DMPIEXEC_PREFLAGS="${{ matrix.MPIEXEC_PREFLAGS }}" -DUSE_SANITIZER=${{ matrix.USE_SANITIZER }} -DCMAKE_BUILD_TYPE=${{ matrix.CMAKE_BUILD_TYPE }} -DCODE_COVERAGE=${{ matrix.CODE_COVERAGE }} ../ - make -j2 build-tests + make -j 4 build-tests - name: Run tests run: | cd htool/build export OMP_NUM_THREADS=2 - ${{ matrix.ASAN_OPTIONS }} ctest --output-on-failure + ${{ matrix.ASAN_OPTIONS }} ctest -j 4 --output-on-failure - name: Build examples run: | cd htool/build - make -j2 build-examples + make -j 4 build-examples - name: Build documentation run: | diff --git a/include/htool/distributed_operator/utility.hpp b/include/htool/distributed_operator/utility.hpp index 76cea87a..3ffdd2c6 100644 --- a/include/htool/distributed_operator/utility.hpp +++ b/include/htool/distributed_operator/utility.hpp @@ -26,7 +26,7 @@ class DefaultApproximationBuilder { DistributedOperator distributed_operator; const HMatrix *block_diagonal_hmatrix{nullptr}; - DefaultApproximationBuilder(const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, htool::underlying_type epsilon, htool::underlying_type eta, char symmetry, char UPLO, MPI_Comm communicator) : target_partition(target_cluster), source_partition(source_cluster), hmatrix(HMatrixTreeBuilder(target_cluster, source_cluster, epsilon, eta, symmetry, UPLO, -1, get_rankWorld(communicator)).build(generator)), local_hmatrix(hmatrix, target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster, symmetry, UPLO, false, false), distributed_operator(target_partition, source_partition, symmetry, UPLO, communicator) { + DefaultApproximationBuilder(const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, htool::underlying_type epsilon, htool::underlying_type eta, char symmetry, char UPLO, MPI_Comm communicator) : target_partition(target_cluster), source_partition(source_cluster), hmatrix(HMatrixTreeBuilder(target_cluster, source_cluster, epsilon, eta, symmetry, UPLO, -1, get_rankWorld(communicator), get_rankWorld(communicator)).build(generator)), local_hmatrix(hmatrix, target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster, symmetry, UPLO, false, false), distributed_operator(target_partition, source_partition, symmetry, UPLO, communicator) { distributed_operator.add_local_operator(&local_hmatrix); block_diagonal_hmatrix = hmatrix.get_sub_hmatrix(target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator))); } @@ -52,7 +52,7 @@ class DefaultLocalApproximationBuilder { const HMatrix *block_diagonal_hmatrix{nullptr}; public: - DefaultLocalApproximationBuilder(const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, htool::underlying_type epsilon, htool::underlying_type eta, char symmetry, char UPLO, MPI_Comm communicator) : target_partition(target_cluster), source_partition(source_cluster), hmatrix(HMatrixTreeBuilder(target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator)), epsilon, eta, symmetry, UPLO, -1, -1).build(generator)), local_hmatrix(hmatrix, target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator)), symmetry, UPLO, false, false), distributed_operator(target_partition, source_partition, symmetry, UPLO, communicator) { + DefaultLocalApproximationBuilder(const VirtualGenerator &generator, const Cluster &target_cluster, const Cluster &source_cluster, htool::underlying_type epsilon, htool::underlying_type eta, char symmetry, char UPLO, MPI_Comm communicator) : target_partition(target_cluster), source_partition(source_cluster), hmatrix(HMatrixTreeBuilder(target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator)), epsilon, eta, symmetry, UPLO, -1, -1, -1).build(generator)), local_hmatrix(hmatrix, target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator)), symmetry, UPLO, false, false), distributed_operator(target_partition, source_partition, symmetry, UPLO, communicator) { distributed_operator.add_local_operator(&local_hmatrix); block_diagonal_hmatrix = hmatrix.get_sub_hmatrix(target_cluster.get_cluster_on_partition(get_rankWorld(communicator)), source_cluster.get_cluster_on_partition(get_rankWorld(communicator))); } diff --git a/include/htool/hmatrix/hmatrix.hpp b/include/htool/hmatrix/hmatrix.hpp index e054c60d..848ea2f3 100644 --- a/include/htool/hmatrix/hmatrix.hpp +++ b/include/htool/hmatrix/hmatrix.hpp @@ -20,6 +20,12 @@ namespace htool { // Class template > class HMatrix : public TreeNode, HMatrixTreeData> { + public: + enum class StorageType { + Dense, + LowRank, + Hierarchical + }; private: // Data members @@ -42,11 +48,6 @@ class HMatrix : public TreeNode m_leaves_in_diagonal_block{}; - enum class StorageType { - Dense, - LowRank, - Hierarchical - }; StorageType m_storage_type{StorageType::Hierarchical}; void set_leaves_in_cache() const { @@ -91,9 +92,51 @@ class HMatrix : public TreeNode *target_cluster, const Cluster *source_cluster) : TreeNode>(parent), m_target_cluster(target_cluster), m_source_cluster(source_cluster) {} - // no copy - HMatrix(const HMatrix &) = delete; - HMatrix &operator=(const HMatrix &) = delete; + HMatrix(const HMatrix &rhs) : TreeNode, HMatrixTreeData>(rhs), m_target_cluster(rhs.m_target_cluster), m_source_cluster(rhs.m_source_cluster), m_symmetry(rhs.m_symmetry), m_UPLO(rhs.m_UPLO), m_leaves(), m_leaves_for_symmetry(), m_symmetry_type_for_leaves(), m_storage_type(rhs.m_storage_type) { + Logger::get_instance().log(LogLevel::INFO, "Deep copy of HMatrix"); + this->m_depth = rhs.m_depth; + this->m_is_root = rhs.m_is_root; + this->m_tree_data = std::make_shared>(*rhs.m_tree_data); + this->m_children.clear(); + for (auto &child : rhs.m_children) { + this->m_children.emplace_back(std::make_unique>(*child)); + } + if (rhs.m_dense_data) { + m_dense_data = std::make_unique>(*rhs.m_dense_data); + } + if (rhs.m_low_rank_data) { + m_low_rank_data = std::make_unique>(*rhs.m_low_rank_data); + } + } + HMatrix &operator=(const HMatrix &rhs) { + Logger::get_instance().log(LogLevel::INFO, "Deep copy of HMatrix"); + if (&rhs == this) { + return *this; + } + this->m_depth = rhs.m_depth; + this->m_is_root = rhs.m_is_root; + this->m_tree_data = std::make_shared>(*rhs.m_tree_data); + this->m_children.clear(); + for (auto &child : rhs.m_children) { + this->m_children.emplace_back(std::make_unique>(*child)); + } + m_target_cluster = rhs.m_target_cluster; + m_source_cluster = rhs.m_source_cluster; + m_symmetry = rhs.m_symmetry; + m_UPLO = rhs.m_UPLO; + m_storage_type = rhs.m_storage_type; + + if (rhs.m_dense_data) { + m_dense_data = std::make_unique>(*rhs.m_dense_data); + } + if (rhs.m_low_rank_data) { + m_low_rank_data = std::make_unique>(*rhs.m_low_rank_data); + } + m_leaves.clear(); + m_leaves_for_symmetry.clear(); + return *this; + } + HMatrix(HMatrix &&) noexcept = default; HMatrix &operator=(HMatrix &&) noexcept = default; virtual ~HMatrix() = default; @@ -101,6 +144,33 @@ class HMatrix : public TreeNode &get_target_cluster() const { return *m_target_cluster; } const Cluster &get_source_cluster() const { return *m_source_cluster; } + int nb_cols() const { return m_source_cluster->get_size(); } + int nb_rows() const { return m_target_cluster->get_size(); } + + HMatrix *get_child_or_this(const Cluster &required_target_cluster, const Cluster &required_source_cluster) { + if (*m_target_cluster == required_target_cluster and *m_source_cluster == required_source_cluster) { + return this; + } + for (auto &child : this->m_children) { + if (child->get_target_cluster() == required_target_cluster and child->get_source_cluster() == required_source_cluster) { + return child.get(); + } + } + return nullptr; + } + + const HMatrix *get_child_or_this(const Cluster &required_target_cluster, const Cluster &required_source_cluster) const { + if (*m_target_cluster == required_target_cluster and *m_source_cluster == required_source_cluster) { + return this; + } + for (auto &child : this->m_children) { + if (child->get_target_cluster() == required_target_cluster and child->get_source_cluster() == required_source_cluster) { + return child.get(); + } + } + return nullptr; + } + int get_rank() const { return m_storage_type == StorageType::LowRank ? m_low_rank_data->rank_of() : -1; } @@ -113,7 +183,9 @@ class HMatrix : public TreeNode *get_dense_data() const { return m_dense_data.get(); } + Matrix *get_dense_data() { return m_dense_data.get(); } const LowRankMatrix *get_low_rank_data() const { return m_low_rank_data.get(); } + LowRankMatrix *get_low_rank_data() { return m_low_rank_data.get(); } char get_symmetry() const { return m_symmetry; } char get_UPLO() const { return m_UPLO; } const HMatrixTreeData *get_hmatrix_tree_data() const { return this->m_tree_data.get(); } @@ -136,6 +208,26 @@ class HMatrix : public TreeNode *get_sub_hmatrix(const Cluster &target_cluster, const Cluster &source_cluster) { + std::queue *> hmatrix_queue; + hmatrix_queue.push(this); + + while (!hmatrix_queue.empty()) { + HMatrix *current_hmatrix = hmatrix_queue.front(); + hmatrix_queue.pop(); + + if (target_cluster == current_hmatrix->get_target_cluster() && source_cluster == current_hmatrix->get_source_cluster()) { + return current_hmatrix; + } + + auto &children = current_hmatrix->get_children(); + for (auto &child : children) { + hmatrix_queue.push(child.get()); + } + } + return nullptr; + } + StorageType get_storage_type() const { return m_storage_type; } // HMatrix node setters void set_symmetry(char symmetry) { m_symmetry = symmetry; } @@ -145,6 +237,7 @@ class HMatrix : public TreeNodem_tree_data->m_eta = eta; } @@ -186,6 +279,15 @@ class HMatrix : public TreeNode &dense_matrix) { + this->delete_children(); + m_leaves.clear(); + m_leaves_for_symmetry.clear(); + m_dense_data = std::make_unique>(); + m_dense_data->assign(dense_matrix.nb_rows(), dense_matrix.nb_cols(), dense_matrix.release(), true); + m_storage_type = StorageType::Dense; + } + // Linear algebra void add_vector_product(char trans, CoefficientPrecision alpha, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) const; void add_matrix_product_row_major(char trans, CoefficientPrecision alpha, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) const; diff --git a/include/htool/hmatrix/interfaces/virtual_lrmat_generator.hpp b/include/htool/hmatrix/interfaces/virtual_lrmat_generator.hpp index d25f1a48..81b02eee 100644 --- a/include/htool/hmatrix/interfaces/virtual_lrmat_generator.hpp +++ b/include/htool/hmatrix/interfaces/virtual_lrmat_generator.hpp @@ -1,7 +1,7 @@ #ifndef HTOOL_VIRTUAL_LRMAT_GENERATOR_HPP #define HTOOL_VIRTUAL_LRMAT_GENERATOR_HPP -#include "../../basic_types/matrix.hpp" +#include "../../matrix/matrix.hpp" #include "../../misc/misc.hpp" #include "virtual_generator.hpp" #include diff --git a/include/htool/hmatrix/linalg/add_hmatrix_hmatrix_product.hpp b/include/htool/hmatrix/linalg/add_hmatrix_hmatrix_product.hpp new file mode 100644 index 00000000..522e3d56 --- /dev/null +++ b/include/htool/hmatrix/linalg/add_hmatrix_hmatrix_product.hpp @@ -0,0 +1,527 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_HMATRIX_HMATRIX_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_HMATRIX_PRODUCT_HPP + +#include "../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../hmatrix.hpp" +#include "../lrmat/linalg/add_lrmat_lrmat.hpp" +#include "scale.hpp" + +namespace htool { + +template +void add_hmatrix_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (beta != CoefficientPrecision(1)) { + scale(beta, C); + } + + if (A.is_hierarchical() and B.is_hierarchical()) { + + bool block_tree_not_consistent = (A.get_target_cluster().get_rank() < 0 || A.get_source_cluster().get_rank() < 0 || B.get_target_cluster().get_rank() < 0 || B.get_source_cluster().get_rank() < 0); + + std::vector> + low_rank_matrices; + std::vector *> output_clusters, middle_clusters, input_clusters; + auto get_output_cluster_A{&HMatrix::get_target_cluster}; + auto get_input_cluster_A{&HMatrix::get_source_cluster}; + + if (transa != 'N') { + get_input_cluster_A = &HMatrix::get_target_cluster; + get_output_cluster_A = &HMatrix::get_source_cluster; + } + const Cluster &output_cluster = (A.*get_output_cluster_A)(); + const Cluster &middle_cluster = (A.*get_input_cluster_A)(); + const Cluster &input_cluster = B.get_source_cluster(); + + if (output_cluster.is_leaf() || (block_tree_not_consistent and output_cluster.get_rank() >= 0)) { + output_clusters.push_back(&output_cluster); + } else if (block_tree_not_consistent) { + for (auto &output_cluster_child : output_cluster.get_clusters_on_partition()) { + output_clusters.push_back(output_cluster_child); + } + } else { + for (auto &output_cluster_child : output_cluster.get_children()) { + output_clusters.push_back(output_cluster_child.get()); + } + } + + if (input_cluster.is_leaf() || (block_tree_not_consistent and input_cluster.get_rank() >= 0)) { + input_clusters.push_back(&input_cluster); + } else if (block_tree_not_consistent) { + for (auto &input_cluster_child : input_cluster.get_clusters_on_partition()) { + input_clusters.push_back(input_cluster_child); + } + } else { + for (auto &input_cluster_child : input_cluster.get_children()) { + input_clusters.push_back(input_cluster_child.get()); + } + } + + if (middle_cluster.is_leaf() || (block_tree_not_consistent and middle_cluster.get_rank() >= 0)) { + middle_clusters.push_back(&middle_cluster); + } else if (block_tree_not_consistent) { + for (auto &middle_cluster_child : middle_cluster.get_clusters_on_partition()) { + middle_clusters.push_back(middle_cluster_child); + } + } else { + for (auto &middle_cluster_child : middle_cluster.get_children()) { + middle_clusters.push_back(middle_cluster_child.get()); + } + } + + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + low_rank_matrices.emplace_back(C.get_epsilon()); + low_rank_matrices.back().get_U().resize(output_cluster_child->get_size(), 0); + low_rank_matrices.back().get_V().resize(0, input_cluster_child->get_size()); + for (auto &middle_cluster_child : middle_clusters) { + const HMatrix *A_child = (transa == 'N') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + const HMatrix *B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + if (A_child->get_symmetry() == 'N') { + add_hmatrix_hmatrix_product(transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), low_rank_matrices.back()); + } else { + add_hmatrix_hmatrix_product_symmetry('L', transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), low_rank_matrices.back(), A_child->get_UPLO(), A_child->get_symmetry()); + } + } + } + } + int index = 0; + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + add_lrmat_lrmat(C, output_cluster, input_cluster, low_rank_matrices[index], *output_cluster_child, *input_cluster_child); + index++; + } + } + } else { + if (A.is_dense()) { + if (B.is_low_rank()) { + add_matrix_lrmat_product(transa, transb, alpha, *A.get_dense_data(), *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else if (B.is_dense()) { + add_matrix_matrix_product(transa, transb, alpha, *A.get_dense_data(), *B.get_dense_data(), CoefficientPrecision(1), C); + } else { + add_matrix_hmatrix_product(transa, transb, alpha, *A.get_dense_data(), B, CoefficientPrecision(1), C); + } + } else if (A.is_low_rank()) { + if (B.is_dense()) { + add_lrmat_matrix_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_dense_data(), CoefficientPrecision(1), C); + } else if (B.is_low_rank()) { + add_lrmat_lrmat_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else { + add_lrmat_hmatrix_product(transa, transb, alpha, *A.get_low_rank_data(), B, CoefficientPrecision(1), C); + } + } else { + if (B.is_low_rank()) { + add_hmatrix_lrmat_product(transa, transb, alpha, A, *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else if (B.is_dense()) { + add_hmatrix_matrix_product(transa, transb, alpha, A, *B.get_dense_data(), CoefficientPrecision(1), C); + } + } + } +} + +template +void add_hmatrix_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, Matrix &C) { + + if (beta != CoefficientPrecision(1)) { + scale(beta, C); + } + + if (A.get_target_cluster().get_rank() < 0 || A.get_source_cluster().get_rank() < 0 || B.get_target_cluster().get_rank() < 0 || B.get_source_cluster().get_rank() < 0) { + + Matrix B_dense(B.nb_rows(), B.nb_cols()); + copy_to_dense(B, B_dense.data()); + add_hmatrix_matrix_product(transa, 'N', alpha, A, B_dense, CoefficientPrecision(1), C); // It could be optimized, but it should not happen often... + // std::vector *> output_clusters, middle_clusters, input_clusters; + // auto get_output_cluster_A{&HMatrix::get_target_cluster}; + // auto get_input_cluster_A{&HMatrix::get_source_cluster}; + + // if (transa != 'N') { + // get_input_cluster_A = &HMatrix::get_target_cluster; + // get_output_cluster_A = &HMatrix::get_source_cluster; + // } + // const Cluster &output_cluster = (A.*get_output_cluster_A)(); + // const Cluster &middle_cluster = (A.*get_input_cluster_A)(); + // const Cluster &input_cluster = B.get_source_cluster(); + + // if (output_cluster.get_rank() >= 0) { + // output_clusters.push_back(&output_cluster); + // } else { + // for (auto &output_cluster_child : output_cluster.get_clusters_on_partition()) { + // output_clusters.push_back(output_cluster_child); + // } + // } + // if (input_cluster.get_rank() >= 0) { + // input_clusters.push_back(&input_cluster); + // } else { + // for (auto &input_cluster_child : input_cluster.get_clusters_on_partition()) { + // input_clusters.push_back(input_cluster_child); + // } + // } + + // if (middle_cluster.get_rank() >= 0) { + // middle_clusters.push_back(&middle_cluster); + // } else { + // for (auto &middle_cluster_child : middle_cluster.get_clusters_on_partition()) { + // middle_clusters.push_back(middle_cluster_child); + // } + // } + + // for (auto &output_cluster_child : output_clusters) { + // for (auto &input_cluster_child : input_clusters) { + // for (auto &middle_cluster_child : middle_clusters) { + // const HMatrix *A_child = (transa == 'N') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + // const HMatrix *B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + // add_hmatrix_hmatrix_product(transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), C); + // } + // } + // } + } else { + if (A.is_dense()) { + if (B.is_low_rank()) { + add_matrix_lrmat_product(transa, transb, alpha, *A.get_dense_data(), *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else if (B.is_dense()) { + add_matrix_matrix_product(transa, transb, alpha, *A.get_dense_data(), *B.get_dense_data(), CoefficientPrecision(1), C); + } else { + add_matrix_hmatrix_product(transa, transb, alpha, *A.get_dense_data(), B, CoefficientPrecision(1), C); + } + } else if (A.is_low_rank()) { + if (B.is_dense()) { + add_lrmat_matrix_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_dense_data(), CoefficientPrecision(1), C); + } else if (B.is_low_rank()) { + add_lrmat_lrmat_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else { + add_lrmat_hmatrix_product(transa, transb, alpha, *A.get_low_rank_data(), B, CoefficientPrecision(1), C); + } + } else { + if (B.is_low_rank()) { + add_hmatrix_lrmat_product(transa, transb, alpha, A, *B.get_low_rank_data(), CoefficientPrecision(1), C); + } else if (B.is_dense()) { + add_hmatrix_matrix_product(transa, transb, alpha, A, *B.get_dense_data(), CoefficientPrecision(1), C); + } else { + Matrix B_dense(B.get_target_cluster().get_size(), B.get_source_cluster().get_size()); + copy_to_dense(B, B_dense.data()); + add_hmatrix_matrix_product(transa, transb, alpha, A, B_dense, CoefficientPrecision(1), C); + } + } + } +} + +template +void add_hmatrix_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, HMatrix &C) { + + if (C.is_dense()) { + add_hmatrix_hmatrix_product(transa, transb, alpha, A, B, beta, *C.get_dense_data()); + } else if (C.is_low_rank()) { + add_hmatrix_hmatrix_product(transa, transb, alpha, A, B, beta, *C.get_low_rank_data()); + } else { + if (A.is_hierarchical() and B.is_hierarchical()) { + if (beta != CoefficientPrecision(1)) { + sequential_scale(beta, C); + } + bool block_tree_not_consistent = (A.get_target_cluster().get_rank() < 0 || A.get_source_cluster().get_rank() < 0 || B.get_target_cluster().get_rank() < 0 || B.get_source_cluster().get_rank() < 0); + + std::vector *> output_clusters, middle_clusters, input_clusters; + auto get_output_cluster_A{&HMatrix::get_target_cluster}; + auto get_input_cluster_A{&HMatrix::get_source_cluster}; + + if (transa != 'N') { + get_input_cluster_A = &HMatrix::get_target_cluster; + get_output_cluster_A = &HMatrix::get_source_cluster; + } + const Cluster &output_cluster = (A.*get_output_cluster_A)(); + const Cluster &middle_cluster = (A.*get_input_cluster_A)(); + const Cluster &input_cluster = B.get_source_cluster(); + + if (output_cluster.is_leaf() || (block_tree_not_consistent and output_cluster.get_rank() >= 0)) { + output_clusters.push_back(&output_cluster); + } else if (block_tree_not_consistent) { + for (auto &output_cluster_child : output_cluster.get_clusters_on_partition()) { + output_clusters.push_back(output_cluster_child); + } + } else { + for (auto &output_cluster_child : output_cluster.get_children()) { + output_clusters.push_back(output_cluster_child.get()); + } + } + + if (input_cluster.is_leaf() || (block_tree_not_consistent and input_cluster.get_rank() >= 0)) { + input_clusters.push_back(&input_cluster); + } else if (block_tree_not_consistent) { + for (auto &input_cluster_child : input_cluster.get_clusters_on_partition()) { + input_clusters.push_back(input_cluster_child); + } + } else { + for (auto &input_cluster_child : input_cluster.get_children()) { + input_clusters.push_back(input_cluster_child.get()); + } + } + + if (middle_cluster.is_leaf() || (block_tree_not_consistent and middle_cluster.get_rank() >= 0)) { + middle_clusters.push_back(&middle_cluster); + } else if (block_tree_not_consistent) { + for (auto &middle_cluster_child : middle_cluster.get_clusters_on_partition()) { + middle_clusters.push_back(middle_cluster_child); + } + } else { + for (auto &middle_cluster_child : middle_cluster.get_children()) { + middle_clusters.push_back(middle_cluster_child.get()); + } + } + + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + for (auto &middle_cluster_child : middle_clusters) { + auto &&A_child = (transa == 'N') ? A.get_child_or_this(*output_cluster_child, *middle_cluster_child) : A.get_child_or_this(*middle_cluster_child, *output_cluster_child); + if (A_child->get_symmetry() == 'N') { + add_hmatrix_hmatrix_product(transa, transb, alpha, *A_child, *B.get_child_or_this(*middle_cluster_child, *input_cluster_child), CoefficientPrecision(1), *C.get_child_or_this(*output_cluster_child, *input_cluster_child)); + } else { + add_hmatrix_hmatrix_product_symmetry('L', transa, transb, alpha, *A_child, *B.get_child_or_this(*middle_cluster_child, *input_cluster_child), CoefficientPrecision(1), *C.get_child_or_this(*output_cluster_child, *input_cluster_child), A_child->get_UPLO(), A_child->get_symmetry()); + } + } + } + } + } else { + Matrix C_dense(C.get_target_cluster().get_size(), C.get_source_cluster().get_size()); + copy_to_dense(C, C_dense.data()); + if (A.is_dense()) { + if (B.is_dense()) { + add_matrix_matrix_product(transa, transb, alpha, *A.get_dense_data(), *B.get_dense_data(), beta, C_dense); + } else if (B.is_low_rank()) { + add_matrix_lrmat_product(transa, transb, alpha, *A.get_dense_data(), *B.get_low_rank_data(), beta, C_dense); + } else { + add_matrix_hmatrix_product(transa, transb, alpha, *A.get_dense_data(), B, beta, C_dense); + } + } else if (A.is_low_rank()) { + if (B.is_dense()) { + add_lrmat_matrix_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_dense_data(), beta, C_dense); + } else if (B.is_low_rank()) { + add_lrmat_lrmat_product(transa, transb, alpha, *A.get_low_rank_data(), *B.get_low_rank_data(), beta, C_dense); + } else { + add_lrmat_hmatrix_product(transa, transb, alpha, *A.get_low_rank_data(), B, beta, C_dense); + } + } else if (A.is_hierarchical()) { + if (B.is_dense()) { + add_hmatrix_matrix_product(transa, transb, alpha, A, *B.get_dense_data(), beta, C_dense); + } else if (B.is_low_rank()) { + add_hmatrix_lrmat_product(transa, transb, alpha, A, *B.get_low_rank_data(), beta, C_dense); + } + } + C.set_dense_data(C_dense); + } + } +} + +template +void add_hmatrix_hmatrix_product_symmetry(char side, char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, LowRankMatrix &C, char UPLO, char symm) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + if (beta != CoefficientPrecision(1)) { + scale(beta, C); + } + + if (A.is_hierarchical() and B.is_hierarchical()) { + bool block_tree_not_consistent = (A.get_target_cluster().get_rank() < 0 || A.get_source_cluster().get_rank() < 0 || B.get_target_cluster().get_rank() < 0 || B.get_source_cluster().get_rank() < 0); + + std::vector> + low_rank_matrices; + std::vector *> output_clusters, middle_clusters, input_clusters; + auto get_output_cluster_A{&HMatrix::get_target_cluster}; + auto get_input_cluster_A{&HMatrix::get_source_cluster}; + + const Cluster &output_cluster = (A.*get_output_cluster_A)(); + const Cluster &middle_cluster = (A.*get_input_cluster_A)(); + const Cluster &input_cluster = B.get_source_cluster(); + + if (output_cluster.is_leaf() || (block_tree_not_consistent and output_cluster.get_rank() >= 0)) { + output_clusters.push_back(&output_cluster); + } else if (block_tree_not_consistent) { + for (auto &output_cluster_child : output_cluster.get_clusters_on_partition()) { + output_clusters.push_back(output_cluster_child); + } + } else { + for (auto &output_cluster_child : output_cluster.get_children()) { + output_clusters.push_back(output_cluster_child.get()); + } + } + + if (input_cluster.is_leaf() || (block_tree_not_consistent and input_cluster.get_rank() >= 0)) { + input_clusters.push_back(&input_cluster); + } else if (block_tree_not_consistent) { + for (auto &input_cluster_child : input_cluster.get_clusters_on_partition()) { + input_clusters.push_back(input_cluster_child); + } + } else { + for (auto &input_cluster_child : input_cluster.get_children()) { + input_clusters.push_back(input_cluster_child.get()); + } + } + + if (middle_cluster.is_leaf() || (block_tree_not_consistent and middle_cluster.get_rank() >= 0)) { + middle_clusters.push_back(&middle_cluster); + } else if (block_tree_not_consistent) { + for (auto &middle_cluster_child : middle_cluster.get_clusters_on_partition()) { + middle_clusters.push_back(middle_cluster_child); + } + } else { + for (auto &middle_cluster_child : middle_cluster.get_children()) { + middle_clusters.push_back(middle_cluster_child.get()); + } + } + + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + low_rank_matrices.emplace_back(C.get_epsilon()); + low_rank_matrices.back().get_U().resize(output_cluster_child->get_size(), 0); + low_rank_matrices.back().get_V().resize(0, input_cluster_child->get_size()); + for (auto &middle_cluster_child : middle_clusters) { + const HMatrix *A_child, *B_child; + char actual_transa; + if (output_cluster_child == middle_cluster_child) { + A_child = (transa == 'N') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + add_hmatrix_hmatrix_product_symmetry(side, transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), low_rank_matrices.back(), UPLO, symm); + } else if (output_cluster_child->get_offset() >= middle_cluster_child->get_offset() + middle_cluster_child->get_size()) { + A_child = (A.get_UPLO() == 'L') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + actual_transa = (A.get_UPLO() == 'L') ? 'N' : 'T'; + add_hmatrix_hmatrix_product(actual_transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), low_rank_matrices.back()); + } else { + A_child = (A.get_UPLO() == 'U') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + actual_transa = (A.get_UPLO() == 'U') ? 'N' : 'T'; + add_hmatrix_hmatrix_product(actual_transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), low_rank_matrices.back()); + } + } + } + } + int index = 0; + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + add_lrmat_lrmat(C, output_cluster, input_cluster, low_rank_matrices[index], *output_cluster_child, *input_cluster_child); + index++; + } + } + } else { + add_hmatrix_hmatrix_product(transa, transb, alpha, A, B, CoefficientPrecision(1), C); + } +} + +template +void add_hmatrix_hmatrix_product_symmetry(char side, char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, Matrix &C, char, char) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + if (beta != CoefficientPrecision(1)) { + scale(beta, C); + } + if (A.is_hierarchical() and B.is_hierarchical()) { + Matrix B_dense(B.get_target_cluster().get_size(), B.get_source_cluster().get_size()); + copy_to_dense(B, B_dense.data()); + add_hmatrix_matrix_product(transa, transb, alpha, A, B_dense, CoefficientPrecision(1), C); + } else { + add_hmatrix_hmatrix_product(transa, transb, alpha, A, B, CoefficientPrecision(1), C); + } +} + +template +void add_hmatrix_hmatrix_product_symmetry(char side, char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const HMatrix &B, CoefficientPrecision beta, HMatrix &C, char UPLO, char symm) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_hmatrix_hmatrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + if (beta != CoefficientPrecision(1)) { + scale(beta, C); + } + + if (A.is_hierarchical() and B.is_hierarchical() and C.is_hierarchical()) { + bool block_tree_not_consistent = (A.get_target_cluster().get_rank() < 0 || A.get_source_cluster().get_rank() < 0 || B.get_target_cluster().get_rank() < 0 || B.get_source_cluster().get_rank() < 0); + + std::vector> + low_rank_matrices; + std::vector *> output_clusters, middle_clusters, input_clusters; + auto get_output_cluster_A{&HMatrix::get_target_cluster}; + auto get_input_cluster_A{&HMatrix::get_source_cluster}; + + const Cluster &output_cluster = (A.*get_output_cluster_A)(); + const Cluster &middle_cluster = (A.*get_input_cluster_A)(); + const Cluster &input_cluster = B.get_source_cluster(); + + if (output_cluster.is_leaf() || (block_tree_not_consistent and output_cluster.get_rank() >= 0)) { + output_clusters.push_back(&output_cluster); + } else if (block_tree_not_consistent) { + for (auto &output_cluster_child : output_cluster.get_clusters_on_partition()) { + output_clusters.push_back(output_cluster_child); + } + } else { + for (auto &output_cluster_child : output_cluster.get_children()) { + output_clusters.push_back(output_cluster_child.get()); + } + } + + if (input_cluster.is_leaf() || (block_tree_not_consistent and input_cluster.get_rank() >= 0)) { + input_clusters.push_back(&input_cluster); + } else if (block_tree_not_consistent) { + for (auto &input_cluster_child : input_cluster.get_clusters_on_partition()) { + input_clusters.push_back(input_cluster_child); + } + } else { + for (auto &input_cluster_child : input_cluster.get_children()) { + input_clusters.push_back(input_cluster_child.get()); + } + } + + if (middle_cluster.is_leaf() || (block_tree_not_consistent and middle_cluster.get_rank() >= 0)) { + middle_clusters.push_back(&middle_cluster); + } else if (block_tree_not_consistent) { + for (auto &middle_cluster_child : middle_cluster.get_clusters_on_partition()) { + middle_clusters.push_back(middle_cluster_child); + } + } else { + for (auto &middle_cluster_child : middle_cluster.get_children()) { + middle_clusters.push_back(middle_cluster_child.get()); + } + } + + for (auto &output_cluster_child : output_clusters) { + for (auto &input_cluster_child : input_clusters) { + for (auto &middle_cluster_child : middle_clusters) { + const HMatrix *A_child, *B_child; + HMatrix *C_child; + char actual_transa; + if (output_cluster_child == middle_cluster_child) { + A_child = (transa == 'N') ? A.get_child_or_this(*output_cluster_child, *middle_cluster_child) : A.get_child_or_this(*middle_cluster_child, *output_cluster_child); + B_child = B.get_child_or_this(*middle_cluster_child, *input_cluster_child); + C_child = C.get_child_or_this(*output_cluster_child, *input_cluster_child); + add_hmatrix_hmatrix_product_symmetry(side, transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), *C_child, UPLO, symm); + } else if (output_cluster_child->get_offset() >= middle_cluster_child->get_offset() + middle_cluster_child->get_size()) { + A_child = (A.get_UPLO() == 'L') ? A.get_child_or_this(*output_cluster_child, *middle_cluster_child) : A.get_child_or_this(*middle_cluster_child, *output_cluster_child); + B_child = B.get_child_or_this(*middle_cluster_child, *input_cluster_child); + C_child = C.get_child_or_this(*output_cluster_child, *input_cluster_child); + actual_transa = (A.get_UPLO() == 'L') ? 'N' : 'T'; + add_hmatrix_hmatrix_product(actual_transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), *C_child); + } else { + A_child = (A.get_UPLO() == 'U') ? A.get_sub_hmatrix(*output_cluster_child, *middle_cluster_child) : A.get_sub_hmatrix(*middle_cluster_child, *output_cluster_child); + B_child = B.get_sub_hmatrix(*middle_cluster_child, *input_cluster_child); + C_child = C.get_child_or_this(*output_cluster_child, *input_cluster_child); + actual_transa = (A.get_UPLO() == 'U') ? 'N' : 'T'; + add_hmatrix_hmatrix_product(actual_transa, transb, alpha, *A_child, *B_child, CoefficientPrecision(1), *C_child); + } + } + } + } + } else { + add_hmatrix_hmatrix_product(transa, transb, alpha, A, B, CoefficientPrecision(1), C); + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_hmatrix_lrmat_product.hpp b/include/htool/hmatrix/linalg/add_hmatrix_lrmat_product.hpp new file mode 100644 index 00000000..b48dfb0d --- /dev/null +++ b/include/htool/hmatrix/linalg/add_hmatrix_lrmat_product.hpp @@ -0,0 +1,97 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_HMATRIX_LOW_RANK_MATRIX_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_LOW_RANK_MATRIX_PRODUCT_HPP + +#include "../hmatrix.hpp" +#include "add_hmatrix_matrix_product.hpp" + +namespace htool { + +// template +// void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const LowRankMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { +// if (transb != 'N') { +// htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE +// } +// auto &U = B.get_U(); +// auto &V = B.get_V(); +// auto rank = B.rank_of(); +// } + +template +void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const LowRankMatrix &B, CoefficientPrecision beta, Matrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = B.rank_of(); + + if (rank != 0) { + auto &U = B.get_U(); + auto &V = B.get_V(); + Matrix AU; + if (transa == 'N') { + AU.resize(A.nb_rows(), U.nb_cols()); + } else { + AU.resize(A.nb_cols(), U.nb_cols()); + } + add_hmatrix_matrix_product(transa, 'N', 1, A, U, 0, AU); + add_matrix_matrix_product('N', 'N', alpha, AU, V, beta, C); + } +} + +template +void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const LowRankMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = B.rank_of(); + + if (rank != 0) { + auto &U_B = B.get_U(); + auto &V_B = B.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + if (beta == CoefficientPrecision(0) || C.rank_of() == 0) { + if (transa == 'N') { + U_C.resize(A.nb_rows(), U_B.nb_cols()); + } else { + U_C.resize(A.nb_cols(), U_B.nb_cols()); + } + V_C = V_B; + add_hmatrix_matrix_product(transa, 'N', alpha, A, U_B, 0, U_C); + } else { + + // Concatenate V_B and V_C + Matrix new_V; + scale(beta, V_C); + new_V.resize(V_B.nb_rows() + V_C.nb_rows(), V_C.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_B.data() + j * V_B.nb_rows(), V_B.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows()); + } + + // Compute AU= A*U_A + Matrix AU; + if (transa == 'N') { + AU.resize(A.nb_rows(), U_B.nb_cols()); + } else { + AU.resize(A.nb_cols(), U_B.nb_cols()); + } + add_hmatrix_matrix_product(transa, 'N', alpha, A, U_B, 0, AU); + + // Concatenate U_A and U_C + Matrix new_U; + new_U.resize(AU.nb_rows(), AU.nb_cols() + U_C.nb_cols()); + std::copy_n(AU.data(), AU.nb_rows() * AU.nb_cols(), new_U.data()); + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + AU.nb_rows() * AU.nb_cols()); + + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_hmatrix_matrix_product.hpp b/include/htool/hmatrix/linalg/add_hmatrix_matrix_product.hpp new file mode 100644 index 00000000..e4f56edd --- /dev/null +++ b/include/htool/hmatrix/linalg/add_hmatrix_matrix_product.hpp @@ -0,0 +1,102 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_HMATRIX_MATRIX_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_MATRIX_PRODUCT_HPP + +#include "../hmatrix.hpp" +#include "add_hmatrix_matrix_product_row_major.hpp" + +namespace htool { + +template +void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const Matrix &B, CoefficientPrecision beta, Matrix &C) { + + Matrix transposed_B, transposed_C; + transpose(B, transposed_B); + transpose(C, transposed_C); + sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, transposed_B.data(), beta, transposed_C.data(), transposed_C.nb_rows()); + transpose(transposed_C, C); +} + +template +void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const Matrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + bool C_is_overwritten = (beta == CoefficientPrecision(0) || C.rank_of() == 0); + + int nb_rows = (transa == 'N') ? A.nb_rows() : A.nb_cols(); + int nb_cols = B.nb_cols(); + + // + Matrix AB(nb_rows, nb_cols); + add_hmatrix_matrix_product(transa, transb, alpha, A, B, CoefficientPrecision(0), AB); + + // SVD truncation + std::vector> singular_values(std::min(nb_rows, nb_cols)); + Matrix u(nb_rows, nb_rows); + Matrix vt(nb_cols, nb_cols); + int truncated_rank = SVD_truncation(AB, C.get_epsilon(), u, vt, singular_values); + + // new_U=u*sqrt(tildeS) and new_V=sqrt(tildeS)*vt in the right dimensions + Matrix *new_U_ptr, *new_V_ptr; + Matrix U_1, V_1; + if (C_is_overwritten) { + new_U_ptr = &C.get_U(); + new_V_ptr = &C.get_V(); + } else { + new_U_ptr = &U_1; + new_V_ptr = &V_1; + } + + { + Matrix &new_U = *new_U_ptr; + Matrix &new_V = *new_V_ptr; + int M = nb_rows; + int N = nb_cols; + int incx = 1; + new_U.resize(M, truncated_rank); + new_V.resize(truncated_rank, N); + CoefficientPrecision scaling_coef; + for (int r = 0; r < truncated_rank; r++) { + scaling_coef = std::sqrt(singular_values[r]); + std::copy_n(u.data() + r * u.nb_rows(), u.nb_cols(), new_U.data() + r * M); + Blas::scal(&M, &scaling_coef, new_U.data() + r * M, &incx); + } + for (int r = 0; r < vt.nb_cols(); r++) { + std::copy_n(vt.data() + r * vt.nb_rows(), truncated_rank, new_V.data() + r * truncated_rank); + } + + for (int r = 0; r < truncated_rank; r++) { + for (int j = 0; j < new_V.nb_cols(); j++) { + new_V(r, j) = std::sqrt(singular_values[r]) * new_V(r, j); + } + } + } + + if (C_is_overwritten) { + return; + } + + // Concatenate U_1 and U_2 + Matrix &U_2 = C.get_U(); + Matrix new_U(U_1.nb_rows(), U_1.nb_cols() + U_2.nb_cols()); + std::copy_n(U_1.data(), U_1.nb_rows() * U_1.nb_cols(), new_U.data()); + std::copy_n(U_2.data(), U_2.nb_rows() * U_2.nb_cols(), new_U.data() + U_1.nb_rows() * U_1.nb_cols()); + + // Concatenate V_1 and V_2 + Matrix &V_2 = C.get_V(); + scale(beta, V_2); + Matrix new_V(V_1.nb_rows() + V_2.nb_rows(), V_2.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_1.data() + j * V_1.nb_rows(), V_1.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_2.data() + j * V_2.nb_rows(), V_2.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_1.nb_rows()); + } + + // Set C + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); +} +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_hmatrix_matrix_product_row_major.hpp b/include/htool/hmatrix/linalg/add_hmatrix_matrix_product_row_major.hpp new file mode 100644 index 00000000..950600f6 --- /dev/null +++ b/include/htool/hmatrix/linalg/add_hmatrix_matrix_product_row_major.hpp @@ -0,0 +1,153 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_HMATRIX_MATRIX_PRODUCT_ROW_MAJOR_HPP +#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_MATRIX_PRODUCT_ROW_MAJOR_HPP + +#include "../hmatrix.hpp" + +namespace htool { + +template +void sequential_add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) { + // set_leaves_in_cache(); + auto &leaves = A.get_leaves(); + auto &leaves_for_symmetry = A.get_leaves_for_symmetry(); + + if ((transa == 'T' && A.get_symmetry_for_leaves() == 'H') + || (transa == 'C' && A.get_symmetry_for_leaves() == 'S')) { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not supported (transa=" + std::string(1, transa) + " with " + A.get_symmetry_for_leaves() + ")"); // LCOV_EXCL_LINE + } + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for sequential_add_hmatrix_matrix_product_row_major (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + int out_size(A.get_target_cluster().get_size() * mu); + auto get_output_cluster{&HMatrix::get_target_cluster}; + auto get_input_cluster{&HMatrix::get_source_cluster}; + int local_output_offset = A.get_target_cluster().get_offset(); + int local_input_offset = A.get_source_cluster().get_offset(); + char trans_sym = (A.get_symmetry_for_leaves() == 'S') ? 'T' : 'C'; + + if (transa != 'N') { + out_size = A.get_source_cluster().get_size() * mu; + get_input_cluster = &HMatrix::get_target_cluster; + get_output_cluster = &HMatrix::get_source_cluster; + local_input_offset = A.get_target_cluster().get_offset(); + local_output_offset = A.get_source_cluster().get_offset(); + trans_sym = 'N'; + } + + int incx(1), incy(1); + if (CoefficientPrecision(beta) != CoefficientPrecision(1)) { + // TODO: use blas + std::transform(C, C + out_size, C, [&beta](CoefficientPrecision &c) { return c * beta; }); + } + + // Contribution champ lointain + std::vector temp(out_size, 0); + for (int b = 0; b < leaves.size(); b++) { + int input_offset = (leaves[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves[b]->*get_output_cluster)().get_offset(); + leaves[b]->add_matrix_product_row_major(transa, 1, B + (input_offset - local_input_offset) * mu, 1, temp.data() + (output_offset - local_output_offset) * mu, mu); + } + + // Symmetry part of the diagonal part + if (A.get_symmetry_for_leaves() != 'N') { + for (int b = 0; b < leaves_for_symmetry.size(); b++) { + int input_offset = (leaves_for_symmetry[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves_for_symmetry[b]->*get_output_cluster)().get_offset(); + leaves_for_symmetry[b]->add_matrix_product_row_major(trans_sym, 1, B + (output_offset - local_input_offset) * mu, 1, temp.data() + (input_offset - local_output_offset) * mu, mu); + } + } + Blas::axpy(&out_size, &alpha, temp.data(), &incx, C, &incy); +} + +template +void openmp_add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *B, CoefficientPrecision beta, CoefficientPrecision *C, int mu) { + // set_leaves_in_cache(); + auto &leaves = A.get_leaves(); + auto &leaves_for_symmetry = A.get_leaves_for_symmetry(); + + if ((transa == 'T' && A.get_symmetry_for_leaves() == 'H') + || (transa == 'C' && A.get_symmetry_for_leaves() == 'S')) { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not supported (transa=" + std::string(1, transa) + " with " + A.get_symmetry_for_leaves() + ")"); // LCOV_EXCL_LINE + } + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for openmp_add_hmatrix_matrix_product_row_major (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + int out_size(A.get_target_cluster().get_size() * mu); + auto get_output_cluster{&HMatrix::get_target_cluster}; + auto get_input_cluster{&HMatrix::get_source_cluster}; + int local_output_offset = A.get_target_cluster().get_offset(); + int local_input_offset = A.get_source_cluster().get_offset(); + char trans_sym = (A.get_symmetry_for_leaves() == 'S') ? 'T' : 'C'; + + if (transa != 'N') { + out_size = A.get_source_cluster().get_size() * mu; + get_input_cluster = &HMatrix::get_target_cluster; + get_output_cluster = &HMatrix::get_source_cluster; + local_input_offset = A.get_target_cluster().get_offset(); + local_output_offset = A.get_source_cluster().get_offset(); + trans_sym = 'N'; + } + + int incx(1), incy(1); + if (CoefficientPrecision(beta) != CoefficientPrecision(1)) { + // TODO: use blas + std::transform(C, C + out_size, C, [&beta](CoefficientPrecision &c) { return c * beta; }); + } + + // Contribution champ lointain +#if defined(_OPENMP) +# pragma omp parallel +#endif + { + std::vector temp(out_size, 0); +#if defined(_OPENMP) +# pragma omp for schedule(guided) nowait +#endif + for (int b = 0; b < leaves.size(); b++) { + int input_offset = (leaves[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves[b]->*get_output_cluster)().get_offset(); + add_hmatrix_matrix_product_row_major(transa, transb, CoefficientPrecision(1), *leaves[b], B + (input_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (output_offset - local_output_offset) * mu, mu); + } + + // Symmetry part of the diagonal part + if (A.get_symmetry_for_leaves() != 'N') { +#if defined(_OPENMP) +# pragma omp for schedule(guided) nowait +#endif + for (int b = 0; b < leaves_for_symmetry.size(); b++) { + int input_offset = (leaves_for_symmetry[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves_for_symmetry[b]->*get_output_cluster)().get_offset(); + add_hmatrix_matrix_product_row_major(trans_sym, 'N', CoefficientPrecision(1), *leaves_for_symmetry[b], B + (output_offset - local_input_offset) * mu, CoefficientPrecision(1), temp.data() + (input_offset - local_output_offset) * mu, mu); + } + } + +#if defined(_OPENMP) +# pragma omp critical +#endif + Blas::axpy(&out_size, &alpha, temp.data(), &incx, C, &incy); + } +} + +template +void add_hmatrix_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) { + switch (A.get_storage_type()) { + case HMatrix::StorageType::Dense: + if (A.get_symmetry() == 'N') { + A.get_dense_data()->add_matrix_product_row_major(transa, alpha, in, beta, out, mu); + } else { + A.get_dense_data()->add_matrix_product_symmetric_row_major(transa, alpha, in, beta, out, mu, A.get_UPLO(), A.get_symmetry()); + } + break; + case HMatrix::StorageType::LowRank: + A.get_low_rank_data()->add_matrix_product_row_major(transa, alpha, in, beta, out, mu); + break; + default: + openmp_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, in, beta, out, mu); + break; + } +} +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_hmatrix_vector_product.hpp b/include/htool/hmatrix/linalg/add_hmatrix_vector_product.hpp new file mode 100644 index 00000000..ccda0d8c --- /dev/null +++ b/include/htool/hmatrix/linalg/add_hmatrix_vector_product.hpp @@ -0,0 +1,134 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_HMATRIX_VECTOR_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_HMATRIX_VECTOR_PRODUCT_HPP + +#include "../hmatrix.hpp" + +namespace htool { + +// template +// void sequential_add_hmatrix_vector_product(char trans, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) { + +// int out_size(A.get_target_cluster().get_size()); +// auto get_output_cluster{&HMatrix::get_target_cluster}; +// auto get_input_cluster{&HMatrix::get_source_cluster}; +// int local_input_offset = A.get_source_cluster().get_offset(); +// int local_output_offset = A.get_target_cluster().get_offset(); +// char trans_sym = (A.get_symmetry_for_leaves() == 'S') ? 'T' : 'C'; + +// if (trans != 'N') { +// out_size = A.get_source_cluster().get_size(); +// get_input_cluster = &HMatrix::get_target_cluster; +// get_output_cluster = &HMatrix::get_source_cluster; +// local_input_offset = A.get_target_cluster().get_offset(); +// local_output_offset = A.get_source_cluster().get_offset(); +// trans_sym = 'N'; +// } + +// if (CoefficientPrecision(beta) != CoefficientPrecision(1)) { +// std::transform(out, out + out_size, out, [&beta](CoefficientPrecision &c) { return c * beta; }); +// } + +// // Contribution champ lointain +// std::vector temp(out_size, 0); +// // for (int b = 0; b < A.get_leaves().size(); b++) { +// for (auto &leaf : A.get_leaves()) { +// int input_offset = (leaf->*get_input_cluster)().get_offset(); +// int output_offset = (leaf->*get_output_cluster)().get_offset(); +// leaf->add_vector_product(trans, 1, in + input_offset - local_input_offset, alpha, out + (output_offset - local_output_offset)); +// } + +// // Symmetry part of the diagonal part +// if (A.get_symmetry_for_leaves() != 'N') { +// // for (int b = 0; b < A.get_leaves_for_symmetry().size(); b++) { +// for (auto &leaf_for_symmetry : A.get_leaves_for_symmetry()) { +// int input_offset = (leaf_for_symmetry->*get_input_cluster)().get_offset(); +// int output_offset = (leaf_for_symmetry->*get_output_cluster)().get_offset(); +// leaf_for_symmetry->add_vector_product(trans_sym, alpha, in + output_offset - local_input_offset, 1, out + (input_offset - local_output_offset)); +// } +// } +// } + +template +void openmp_add_hmatrix_vector_product(char trans, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) { + + // A.set_leaves_in_cache(); + auto &leaves = A.get_leaves(); + auto &leaves_for_symmetry = A.get_leaves_for_symmetry(); + + int out_size(A.get_target_cluster().get_size()); + auto get_output_cluster{&HMatrix::get_target_cluster}; + auto get_input_cluster{&HMatrix::get_source_cluster}; + int local_input_offset = A.get_source_cluster().get_offset(); + int local_output_offset = A.get_target_cluster().get_offset(); + char trans_sym = (A.get_symmetry_for_leaves() == 'S') ? 'T' : 'C'; + + if (trans != 'N') { + out_size = A.get_source_cluster().get_size(); + get_input_cluster = &HMatrix::get_target_cluster; + get_output_cluster = &HMatrix::get_source_cluster; + local_input_offset = A.get_target_cluster().get_offset(); + local_output_offset = A.get_source_cluster().get_offset(); + trans_sym = 'N'; + } + + int incx(1), incy(1); + if (CoefficientPrecision(beta) != CoefficientPrecision(1)) { + // TODO: use blas + std::transform(out, out + out_size, out, [&beta](CoefficientPrecision &c) { return c * beta; }); + } + +// Contribution champ lointain +#if defined(_OPENMP) +# pragma omp parallel +#endif + { + std::vector temp(out_size, 0); +#if defined(_OPENMP) +# pragma omp for schedule(guided) nowait +#endif + for (int b = 0; b < leaves.size(); b++) { + int input_offset = (leaves[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves[b]->*get_output_cluster)().get_offset(); + leaves[b]->add_vector_product(trans, 1, in + input_offset - local_input_offset, 1, temp.data() + (output_offset - local_output_offset)); + } + + // Symmetry part of the diagonal part + if (A.get_symmetry_for_leaves() != 'N') { +#if defined(_OPENMP) +# pragma omp for schedule(guided) nowait +#endif + for (int b = 0; b < leaves_for_symmetry.size(); b++) { + int input_offset = (leaves_for_symmetry[b]->*get_input_cluster)().get_offset(); + int output_offset = (leaves_for_symmetry[b]->*get_output_cluster)().get_offset(); + leaves_for_symmetry[b]->add_vector_product(trans_sym, 1, in + output_offset - local_input_offset, 1, temp.data() + (input_offset - local_output_offset)); + } + } + +#if defined(_OPENMP) +# pragma omp critical +#endif + Blas::axpy(&out_size, &alpha, temp.data(), &incx, out, &incy); + } +} + +template +void add_hmatrix_vector_product(char trans, CoefficientPrecision alpha, const HMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) { + switch (A.get_storage_type()) { + case HMatrix::StorageType::Dense: + if (A.get_symmetry() == 'N') { + A.get_dense_data()->add_vector_product(trans, alpha, in, beta, out); + } else { + A.get_dense_data()->add_vector_product_symmetric(trans, alpha, in, beta, out, A.get_UPLO(), A.get_symmetry()); + } + break; + case HMatrix::StorageType::LowRank: + A.get_low_rank_data()->add_vector_product(trans, alpha, in, beta, out); + break; + default: + openmp_add_hmatrix_vector_product(trans, alpha, A, in, beta, out); + break; + } +} +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_lrmat_hmatrix_product.hpp b/include/htool/hmatrix/linalg/add_lrmat_hmatrix_product.hpp new file mode 100644 index 00000000..b693fb1d --- /dev/null +++ b/include/htool/hmatrix/linalg/add_lrmat_hmatrix_product.hpp @@ -0,0 +1,94 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_LOW_RANK_MATRIX_HMATRIX_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_LOW_RANK_MATRIX_HMATRIX_PRODUCT_HPP + +#include "../hmatrix.hpp" +#include "add_matrix_hmatrix_product.hpp" + +namespace htool { + +template +void add_lrmat_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const HMatrix &B, CoefficientPrecision beta, Matrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + auto rank = A.rank_of(); + if (rank != 0) { + auto &U = A.get_U(); + auto &V = A.get_V(); + if (transa == 'N') { + Matrix VB(V.nb_rows(), B.nb_cols()); + add_matrix_hmatrix_product(transa, 'N', 1, V, B, 0, VB); + add_matrix_matrix_product(transa, 'N', alpha, U, VB, beta, C); + } else { + Matrix UtB(V.nb_rows(), B.nb_cols()); + add_matrix_hmatrix_product(transa, 'N', 1, U, B, 0, UtB); + add_matrix_matrix_product(transa, 'N', alpha, V, UtB, beta, C); + } + } +} + +template +void add_lrmat_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const HMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_matrix_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = A.rank_of(); + if (rank != 0) { + auto &U_A = A.get_U(); + auto &V_A = A.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + if (beta == CoefficientPrecision(0) || C.rank_of() == 0) { + if (transa == 'N') { + V_C.resize(V_A.nb_rows(), B.nb_cols()); + U_C = U_A; + add_matrix_hmatrix_product(transa, 'N', alpha, V_A, B, 0, V_C); + } else { + V_C.resize(U_A.nb_cols(), B.nb_cols()); + transpose(V_A, U_C); + add_matrix_hmatrix_product(transa, 'N', alpha, U_A, B, 0, V_C); + } + } else { + Matrix VB; + Matrix new_U; + if (transa == 'N') { + // Concatenate U_A and U_C + new_U.resize(U_A.nb_rows(), U_A.nb_cols() + U_C.nb_cols()); + std::copy_n(U_A.data(), U_A.nb_rows() * U_A.nb_cols(), new_U.data()); + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + U_A.nb_rows() * U_A.nb_cols()); + + // Compute VB=V_B*B + VB.resize(V_A.nb_rows(), B.nb_cols()); + add_matrix_hmatrix_product(transa, 'N', alpha, V_A, B, 0, VB); + } else { + // Concatenate V_At and U_C + new_U.resize(V_A.nb_cols(), U_A.nb_cols() + U_C.nb_cols()); + for (int i = 0; i < V_A.nb_rows(); i++) { + for (int j = 0; j < V_A.nb_cols(); j++) { + new_U(j, i) = V_A(i, j); + } + } + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + V_A.nb_rows() * V_A.nb_cols()); + + // Compute VB=V_B*B + VB.resize(V_A.nb_rows(), B.nb_cols()); + add_matrix_hmatrix_product(transa, 'N', alpha, U_A, B, 0, VB); + } + + // Concatenate VB and V_C + scale(beta, V_C); + Matrix new_V(VB.nb_rows() + V_C.nb_rows(), V_C.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(VB.data() + j * VB.nb_rows(), VB.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + VB.nb_rows()); + } + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } + } +} +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/add_matrix_hmatrix_product.hpp b/include/htool/hmatrix/linalg/add_matrix_hmatrix_product.hpp new file mode 100644 index 00000000..ad3b450f --- /dev/null +++ b/include/htool/hmatrix/linalg/add_matrix_hmatrix_product.hpp @@ -0,0 +1,104 @@ +#ifndef HTOOL_HMATRIX_LINALG_ADD_MATRIX_HMATRIX_PRODUCT_HPP +#define HTOOL_HMATRIX_LINALG_ADD_MATRIX_HMATRIX_PRODUCT_HPP + +#include "../hmatrix.hpp" +#include "add_hmatrix_matrix_product_row_major.hpp" + +namespace htool { + +template +void add_matrix_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const Matrix &A, const HMatrix &B, CoefficientPrecision beta, Matrix &C) { + + if (transa == 'N') { + sequential_add_hmatrix_matrix_product_row_major('T', transb, alpha, B, A.data(), beta, C.data(), C.nb_rows()); + } else { + Matrix transposed_A; + transpose(A, transposed_A); + sequential_add_hmatrix_matrix_product_row_major('T', transb, alpha, B, transposed_A.data(), beta, C.data(), C.nb_rows()); + } +} + +template +void add_matrix_hmatrix_product(char transa, char transb, CoefficientPrecision alpha, const Matrix &A, const HMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + bool C_is_overwritten = (beta == CoefficientPrecision(0) || C.rank_of() == 0); + + int nb_rows = (transa == 'N') ? A.nb_rows() : A.nb_cols(); + int nb_cols = B.nb_cols(); + + // + Matrix AB(nb_rows, nb_cols); + add_matrix_hmatrix_product(transa, transb, alpha, A, B, CoefficientPrecision(0), AB); + + // SVD truncation + std::vector> singular_values(std::min(nb_rows, nb_cols)); + Matrix u(nb_rows, nb_rows); + Matrix vt(nb_cols, nb_cols); + int truncated_rank = SVD_truncation(AB, C.get_epsilon(), u, vt, singular_values); + + // new_U=u*sqrt(tildeS) and new_V=sqrt(tildeS)*vt in the right dimensions + Matrix *new_U_ptr, *new_V_ptr; + Matrix U_1, V_1; + if (C_is_overwritten) { + new_U_ptr = &C.get_U(); + new_V_ptr = &C.get_V(); + } else { + new_U_ptr = &U_1; + new_V_ptr = &V_1; + } + + { + Matrix &new_U = *new_U_ptr; + Matrix &new_V = *new_V_ptr; + int M = nb_rows; + int N = nb_cols; + int incx = 1; + new_U.resize(M, truncated_rank); + new_V.resize(truncated_rank, N); + CoefficientPrecision scaling_coef; + for (int r = 0; r < truncated_rank; r++) { + scaling_coef = std::sqrt(singular_values[r]); + std::copy_n(u.data() + r * u.nb_rows(), u.nb_cols(), new_U.data() + r * M); + Blas::scal(&M, &scaling_coef, new_U.data() + r * M, &incx); + } + for (int r = 0; r < vt.nb_cols(); r++) { + std::copy_n(vt.data() + r * vt.nb_rows(), truncated_rank, new_V.data() + r * truncated_rank); + } + + for (int r = 0; r < truncated_rank; r++) { + for (int j = 0; j < new_V.nb_cols(); j++) { + new_V(r, j) = std::sqrt(singular_values[r]) * new_V(r, j); + } + } + } + + if (C_is_overwritten) { + return; + } + + // Concatenate U_1 and U_2 + Matrix &U_2 = C.get_U(); + Matrix new_U(U_1.nb_rows(), U_1.nb_cols() + U_2.nb_cols()); + std::copy_n(U_1.data(), U_1.nb_rows() * U_1.nb_cols(), new_U.data()); + std::copy_n(U_2.data(), U_2.nb_rows() * U_2.nb_cols(), new_U.data() + U_1.nb_rows() * U_1.nb_cols()); + + // Concatenate V_1 and V_2 + Matrix &V_2 = C.get_V(); + scale(beta, V_2); + Matrix new_V(V_1.nb_rows() + V_2.nb_rows(), V_2.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_1.data() + j * V_1.nb_rows(), V_1.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_2.data() + j * V_2.nb_rows(), V_2.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_1.nb_rows()); + } + + // Set C + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); +} +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/linalg/interface.hpp b/include/htool/hmatrix/linalg/interface.hpp new file mode 100644 index 00000000..f412c26c --- /dev/null +++ b/include/htool/hmatrix/linalg/interface.hpp @@ -0,0 +1,13 @@ +#ifndef HTOOL_HMATRIX_LINALG_INTERFACE_HPP +#define HTOOL_HMATRIX_LINALG_INTERFACE_HPP + +#include "add_hmatrix_hmatrix_product.hpp" +#include "add_hmatrix_lrmat_product.hpp" +#include "add_hmatrix_matrix_product.hpp" +#include "add_hmatrix_matrix_product_row_major.hpp" +#include "add_hmatrix_vector_product.hpp" +#include "add_lrmat_hmatrix_product.hpp" +#include "add_matrix_hmatrix_product.hpp" +#include "scale.hpp" + +#endif diff --git a/include/htool/hmatrix/linalg/scale.hpp b/include/htool/hmatrix/linalg/scale.hpp new file mode 100644 index 00000000..4bca50e6 --- /dev/null +++ b/include/htool/hmatrix/linalg/scale.hpp @@ -0,0 +1,59 @@ +#ifndef HTOOL_HMATRIX_LINALG_SCALE_HPP +#define HTOOL_HMATRIX_LINALG_SCALE_HPP + +#include "../hmatrix.hpp" +#include "../lrmat/linalg/scale.hpp" + +namespace htool { + +template +void openmp_scale(CoefficientPrecision da, HMatrix &A) { + std::vector *> leaves; + preorder_tree_traversal(A, [&leaves](HMatrix ¤t_node) { + if (!current_node.is_hierarchical()) { + leaves.push_back(¤t_node); + } + }); + +#if defined(_OPENMP) +# pragma omp parallel +#endif + { +#if defined(_OPENMP) +# pragma omp for schedule(guided) nowait +#endif + for (int b = 0; b < leaves.size(); b++) { + if (leaves[b]->is_dense()) { + scale(da, *leaves[b]->get_dense_data()); + } else { + scale(da, *leaves[b]->get_low_rank_data()); + } + } + } +} + +template +void sequential_scale(CoefficientPrecision da, HMatrix &A) { + std::vector *> leaves; + preorder_tree_traversal(A, [&leaves](HMatrix ¤t_node) { + if (!current_node.is_hierarchical()) { + leaves.push_back(¤t_node); + } + }); + for (int b = 0; b < leaves.size(); b++) { + if (leaves[b]->is_dense()) { + scale(da, *leaves[b]->get_dense_data()); + } else { + scale(da, *leaves[b]->get_low_rank_data()); + } + } +} + +template +void scale(CoefficientPrecision da, HMatrix &A) { + openmp_scale(da, A); +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/SVD.hpp b/include/htool/hmatrix/lrmat/SVD.hpp index 6c9a0354..c3b295e3 100644 --- a/include/htool/hmatrix/lrmat/SVD.hpp +++ b/include/htool/hmatrix/lrmat/SVD.hpp @@ -1,6 +1,7 @@ #ifndef HTOOL_SVD_HPP #define HTOOL_SVD_HPP +#include "../../matrix/utils/SVD_truncation.hpp" #include "../../wrappers/wrapper_lapack.hpp" #include "lrmat.hpp" @@ -14,76 +15,44 @@ class SVD final : public VirtualLowRankGenerator &A, const Cluster &target_cluster, const Cluster &source_cluster, underlying_type epsilon, int &rank, Matrix &U, Matrix &V) const override { - int reqrank = 0; - int M = target_cluster.get_size(); - int N = source_cluster.get_size(); + int M = target_cluster.get_size(); + int N = source_cluster.get_size(); //// Matrix assembling - double Norm = 0; Matrix mat(M, N); A.copy_submatrix(target_cluster.get_size(), source_cluster.get_size(), target_cluster.get_offset(), source_cluster.get_offset(), mat.data()); - Norm = normFrob(mat); - //// SVD - int m = M; - int n = N; - int lda = m; - int ldu = m; - int ldvt = n; - int lwork = -1; - int info; - std::vector> singular_values(std::min(m, n)); - Matrix u(m, m); - // std::vector vt (n*n); - Matrix vt(n, n); - std::vector work(std::min(m, n)); - std::vector> rwork(5 * std::min(m, n)); + std::vector> singular_values(std::min(M, N)); + Matrix u(M, M); + Matrix vt(N, N); - Lapack::gesvd("A", "A", &m, &n, mat.data(), &lda, singular_values.data(), u.data(), &ldu, vt.data(), &ldvt, work.data(), &lwork, rwork.data(), &info); - lwork = (int)std::real(work[0]); - work.resize(lwork); - Lapack::gesvd("A", "A", &m, &n, mat.data(), &lda, singular_values.data(), u.data(), &ldu, vt.data(), &ldvt, work.data(), &lwork, rwork.data(), &info); + int truncated_rank = SVD_truncation(mat, epsilon, u, vt, singular_values); if (rank == -1) { - - // Compute Frobenius norm of the approximation error - int j = singular_values.size(); - double svd_norm = 0; - - do { - j = j - 1; - svd_norm += std::pow(std::abs(singular_values[j]), 2); - } while (j > 0 && std::sqrt(svd_norm) / Norm < epsilon); - - reqrank = std::min(j + 1, std::min(m, n)); - - if (reqrank * (M + N) > (M * N)) { - reqrank = -1; + if (truncated_rank * (M + N) > (M * N)) { + truncated_rank = -1; } - rank = reqrank; } else { - reqrank = std::min(rank, std::min(M, N)); + truncated_rank = std::min(rank, std::min(M, N)); } - if (rank > 0) { - U.resize(M, rank); - V.resize(rank, N); + if (truncated_rank > 0) { + U.resize(M, truncated_rank); + V.resize(truncated_rank, N); for (int i = 0; i < M; i++) { - for (int j = 0; j < reqrank; j++) { + for (int j = 0; j < truncated_rank; j++) { U(i, j) = u(i, j) * singular_values[j]; } } - for (int i = 0; i < reqrank; i++) { + for (int i = 0; i < truncated_rank; i++) { for (int j = 0; j < N; j++) { V(i, j) = vt(i, j); } } } } - - // T get_singular_value(int i) { return singular_values[i]; } }; } // namespace htool diff --git a/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat.hpp b/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat.hpp new file mode 100644 index 00000000..71a4a55b --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat.hpp @@ -0,0 +1,59 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_LRMAT_LRMAT_HPP +#define HTOOL_LRMAT_LINALG_ADD_LRMAT_LRMAT_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../lrmat.hpp" +#include "add_lrmat_matrix_product.hpp" + +namespace htool { +template +void add_lrmat_lrmat(LowRankMatrix &C, const Cluster &target_cluster, const Cluster &source_cluster, const LowRankMatrix &R, const Cluster &target_cluster_child, const Cluster &source_cluster_child) { + int row_offset = target_cluster_child.get_offset() - target_cluster.get_offset(); + int col_offset = source_cluster_child.get_offset() - source_cluster.get_offset(); + + bool C_is_overwritten = (C.rank_of() == 0); + + auto &U_R = R.get_U(); + auto &V_R = R.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + + if (C_is_overwritten) { + U_C.resize(C.nb_rows(), R.rank_of()); + V_C.resize(R.rank_of(), C.nb_cols()); + + for (int i = 0; i < U_R.nb_cols(); i++) { + std::copy_n(U_R.data() + U_R.nb_rows() * i, U_R.nb_rows(), U_C.data() + i * U_C.nb_rows() + row_offset); + } + for (int j = 0; j < V_R.nb_cols(); j++) { + std::copy_n(V_R.data() + j * V_R.nb_rows(), V_R.nb_rows(), V_C.data() + (j + col_offset) * V_C.nb_rows()); + } + + } else { + + // Concatenate U_C and U_R + Matrix new_U(C.nb_rows(), U_R.nb_cols() + U_C.nb_cols()); + std::copy_n(U_C.data(), U_C.nb_cols() * U_C.nb_rows(), new_U.data()); + + for (int i = 0; i < U_R.nb_cols(); i++) { + std::copy_n(U_R.data() + U_R.nb_rows() * i, U_R.nb_rows(), new_U.data() + U_C.nb_cols() * C.nb_rows() + i * new_U.nb_rows() + row_offset); + } + // Concatenate V_C and V_R + Matrix new_V(V_C.nb_rows() + V_R.nb_rows(), C.nb_cols()); + + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows()); + } + for (int j = 0; j < V_R.nb_cols(); j++) { + std::copy_n(V_R.data() + j * V_R.nb_rows(), V_R.nb_rows(), new_V.data() + (j + col_offset) * new_V.nb_rows() + V_C.nb_rows()); + } + + // Set C + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat_product.hpp b/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat_product.hpp new file mode 100644 index 00000000..45338193 --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_lrmat_lrmat_product.hpp @@ -0,0 +1,70 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_LRMAT_LRMAT_PRODUCT_HPP +#define HTOOL_LRMAT_LINALG_ADD_LRMAT_LRMAT_PRODUCT_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../lrmat.hpp" +#include "add_lrmat_matrix_product.hpp" + +namespace htool { + +template +void add_lrmat_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const LowRankMatrix &B, CoefficientPrecision beta, Matrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + auto rank_A = A.rank_of(); + auto rank_B = B.rank_of(); + if (rank_A != 0 and rank_B != 0) { // the order of operations could be optimized + auto &U_B = B.get_U(); + auto &V_B = B.get_V(); + Matrix AUB(C.nb_rows(), B.rank_of()); + add_lrmat_matrix_product(transa, 'N', CoefficientPrecision(1), A, U_B, CoefficientPrecision(0), AUB); + add_matrix_matrix_product('N', 'N', alpha, AUB, V_B, beta, C); + } +} + +template +void add_lrmat_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const LowRankMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank_A = A.rank_of(); + auto rank_B = B.rank_of(); + if (rank_A != 0 and rank_B != 0) { // the order of operations could be optimized + auto &U_B = B.get_U(); + auto &V_B = B.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + if (beta == CoefficientPrecision(0) || C.rank_of() == 0) { + if (transa == 'N') { + U_C.resize(A.nb_rows(), U_B.nb_cols()); + + } else { + U_C.resize(A.nb_cols(), U_B.nb_cols()); + } + V_C = V_B; + add_lrmat_matrix_product(transa, 'N', alpha, A, U_B, CoefficientPrecision(0), U_C); + } else { + Matrix new_U, sub_new_U; + new_U.resize(C.nb_rows(), U_B.nb_cols() + U_C.nb_cols()); + sub_new_U.assign(C.nb_rows(), U_B.nb_cols(), new_U.data(), false); + add_lrmat_matrix_product(transa, 'N', alpha, A, U_B, CoefficientPrecision(0), sub_new_U); + std::copy_n(U_C.data(), U_C.nb_cols() * U_C.nb_rows(), new_U.data() + (C.nb_rows() * U_B.nb_cols())); + + // Concatenate V_B and V_C + scale(beta, V_C); + Matrix new_V(V_B.nb_rows() + V_C.nb_rows(), V_C.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_B.data() + j * V_B.nb_rows(), V_B.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows()); + } + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product.hpp b/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product.hpp new file mode 100644 index 00000000..a6cd1bc8 --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product.hpp @@ -0,0 +1,101 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_LRMAT_MATRIX_PRODUCT_HPP +#define HTOOL_LRMAT_LINALG_ADD_LRMAT_MATRIX_PRODUCT_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../../../matrix/linalg/scale.hpp" +#include "../../../matrix/linalg/transpose.hpp" +#include "../lrmat.hpp" +#include "../utils/recompression.hpp" + +namespace htool { + +template +void add_lrmat_matrix_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const Matrix &B, CoefficientPrecision beta, Matrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_matrix_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = A.rank_of(); + + if (rank != 0) { + auto &U = A.get_U(); + auto &V = A.get_V(); + if (transa == 'N') { + Matrix VB(V.nb_rows(), B.nb_cols()); + add_matrix_matrix_product(transa, 'N', 1, V, B, 0, VB); + add_matrix_matrix_product(transa, 'N', alpha, U, VB, beta, C); + } else { + Matrix UtB(V.nb_rows(), B.nb_cols()); + add_matrix_matrix_product(transa, 'N', 1, U, B, 0, UtB); + add_matrix_matrix_product(transa, 'N', alpha, V, UtB, beta, C); + } + } +} + +template +void add_lrmat_matrix_product(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const Matrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_matrix_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + auto rank = A.rank_of(); + if (rank != 0) { + auto &U_A = A.get_U(); + auto &V_A = A.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + if (beta == CoefficientPrecision(0) || C.rank_of() == 0) { + if (transa == 'N') { + V_C.resize(V_A.nb_rows(), B.nb_cols()); + U_C = U_A; + add_matrix_matrix_product(transa, 'N', alpha, V_A, B, 0, V_C); + } else { + V_C.resize(U_A.nb_cols(), B.nb_cols()); + transpose(V_A, U_C); + add_matrix_matrix_product(transa, 'N', alpha, U_A, B, 0, V_C); + } + if (C.rank_of() * (C.nb_rows() + C.nb_cols()) > (C.nb_rows() * C.nb_cols())) { + recompression(C); + } + } else { + Matrix VB; + Matrix new_U; + if (transa == 'N') { + // Concatenate U_A and U_C + new_U.resize(U_A.nb_rows(), U_A.nb_cols() + U_C.nb_cols()); + std::copy_n(U_A.data(), U_A.nb_rows() * U_A.nb_cols(), new_U.data()); + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + U_A.nb_rows() * U_A.nb_cols()); + + // Compute VB=V_B*B + VB.resize(V_A.nb_rows(), B.nb_cols()); + add_matrix_matrix_product(transa, 'N', alpha, V_A, B, 0, VB); + } else { + // Concatenate V_At and U_C + new_U.resize(V_A.nb_cols(), U_A.nb_cols() + U_C.nb_cols()); + for (int i = 0; i < V_A.nb_rows(); i++) { + for (int j = 0; j < V_A.nb_cols(); j++) { + new_U(j, i) = V_A(i, j); + } + } + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + V_A.nb_rows() * V_A.nb_cols()); + + // Compute VB=V_B*B + VB.resize(V_A.nb_rows(), B.nb_cols()); + add_matrix_matrix_product(transa, 'N', alpha, U_A, B, 0, VB); + } + + // Concatenate VB and V_C + scale(beta, V_C); + Matrix new_V(VB.nb_rows() + V_C.nb_rows(), V_C.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(VB.data() + j * VB.nb_rows(), VB.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + VB.nb_rows()); + } + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product_row_major.hpp b/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product_row_major.hpp new file mode 100644 index 00000000..ec151341 --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_lrmat_matrix_product_row_major.hpp @@ -0,0 +1,33 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_LRMAT_MATRIX_PRODUCT_ROW_MAJOR_HPP +#define HTOOL_LRMAT_LINALG_ADD_LRMAT_MATRIX_PRODUCT_ROW_MAJOR_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product_row_major.hpp" +#include "../lrmat.hpp" + +namespace htool { + +template +void add_lrmat_matrix_product_row_major(char transa, char transb, CoefficientPrecision alpha, const LowRankMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_lrmat_matrix_product_row_major (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = A.rank_of(); + + if (rank != 0) { + auto &U = A.get_U(); + auto &V = A.get_V(); + if (transa == 'N') { + std::vector a(rank * mu); + V.add_matrix_product_row_major(transa, 1, in, 0, a.data(), mu); + U.add_matrix_product_row_major(transa, alpha, a.data(), beta, out, mu); + } else { + std::vector a(rank * mu); + U.add_matrix_product_row_major(transa, 1, in, 0, a.data(), mu); + V.add_matrix_product_row_major(transa, alpha, a.data(), beta, out, mu); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_lrmat_vector_product.hpp b/include/htool/hmatrix/lrmat/linalg/add_lrmat_vector_product.hpp new file mode 100644 index 00000000..c257749d --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_lrmat_vector_product.hpp @@ -0,0 +1,27 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_LRMAT_VECTOR_PRODUCT_HPP +#define HTOOL_LRMAT_LINALG_ADD_LRMAT_VECTOR_PRODUCT_HPP +#include "../../../matrix/linalg/add_matrix_vector_product.hpp" +#include "../lrmat.hpp" +namespace htool { + +template +void add_lrmat_vector_product(char trans, CoefficientPrecision alpha, const LowRankMatrix &A, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) { + auto rank = A.rank_of(); + if (rank != 0) { + auto &U = A.get_U(); + auto &V = A.get_V(); + if (trans == 'N') { + std::vector a(rank); + add_matrix_vector_product(trans, CoefficientPrecision(1.), V, in, 0, a.data()); + add_matrix_vector_product(trans, alpha, U, a.data(), beta, out); + } else { + std::vector a(rank); + add_matrix_vector_product(trans, CoefficientPrecision(1.), U, in, 0, a.data()); + add_matrix_vector_product(trans, alpha, V, a.data(), beta, out); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_matrix_lrmat_product.hpp b/include/htool/hmatrix/lrmat/linalg/add_matrix_lrmat_product.hpp new file mode 100644 index 00000000..086e20fa --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_matrix_lrmat_product.hpp @@ -0,0 +1,86 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_MATRIX_LRMAT_PRODUCT_HPP +#define HTOOL_LRMAT_LINALG_ADD_MATRIX_LRMAT_PRODUCT_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../lrmat.hpp" +#include "../utils/recompression.hpp" + +namespace htool { + +template +void add_matrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const Matrix &A, const LowRankMatrix &B, CoefficientPrecision beta, Matrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = B.rank_of(); + + if (rank != 0) { + auto &U = B.get_U(); + auto &V = B.get_V(); + Matrix AU; + if (transa == 'N') { + AU.resize(A.nb_rows(), U.nb_cols()); + } else { + AU.resize(A.nb_cols(), U.nb_cols()); + } + add_matrix_matrix_product(transa, 'N', 1, A, U, 0, AU); + add_matrix_matrix_product('N', 'N', alpha, AU, V, beta, C); + } +} + +template +void add_matrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const Matrix &A, const LowRankMatrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + auto rank = B.rank_of(); + if (rank != 0) { + auto &U_B = B.get_U(); + auto &V_B = B.get_V(); + auto &U_C = C.get_U(); + auto &V_C = C.get_V(); + if (beta == CoefficientPrecision(0) || C.rank_of() == 0) { + if (transa == 'N') { + U_C.resize(A.nb_rows(), U_B.nb_cols()); + } else { + U_C.resize(A.nb_cols(), U_B.nb_cols()); + } + V_C = V_B; + add_matrix_matrix_product(transa, 'N', alpha, A, U_B, 0, U_C); + } else { + + // Concatenate V_B and V_C + Matrix new_V; + scale(beta, V_C); + new_V.resize(V_B.nb_rows() + V_C.nb_rows(), V_C.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_B.data() + j * V_B.nb_rows(), V_B.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows()); + } + + // Compute AU= A*U_A + Matrix AU; + if (transa == 'N') { + AU.resize(A.nb_rows(), U_B.nb_cols()); + } else { + AU.resize(A.nb_cols(), U_B.nb_cols()); + } + add_matrix_matrix_product(transa, 'N', alpha, A, U_B, 0, AU); + + // Concatenate U_A and U_C + Matrix new_U; + new_U.resize(AU.nb_rows(), AU.nb_cols() + U_C.nb_cols()); + std::copy_n(AU.data(), AU.nb_rows() * AU.nb_cols(), new_U.data()); + std::copy_n(U_C.data(), U_C.nb_rows() * U_C.nb_cols(), new_U.data() + AU.nb_rows() * AU.nb_cols()); + + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/add_matrix_matrix_product.hpp b/include/htool/hmatrix/lrmat/linalg/add_matrix_matrix_product.hpp new file mode 100644 index 00000000..4d570a5c --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/add_matrix_matrix_product.hpp @@ -0,0 +1,92 @@ +#ifndef HTOOL_LRMAT_LINALG_ADD_MATRIX_MATRIX_PRODUCT_HPP +#define HTOOL_LRMAT_LINALG_ADD_MATRIX_MATRIX_PRODUCT_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../../../matrix/utils/SVD_truncation.hpp" +#include "../lrmat.hpp" +#include "../utils/recompression.hpp" + +namespace htool { + +template +void add_matrix_matrix_product(char transa, char transb, CoefficientPrecision alpha, const Matrix &A, const Matrix &B, CoefficientPrecision beta, LowRankMatrix &C) { + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + bool C_is_overwritten = (beta == CoefficientPrecision(0) || C.rank_of() == 0); + + int nb_rows = (transa == 'N') ? A.nb_rows() : A.nb_cols(); + int nb_cols = B.nb_cols(); + Matrix AB(nb_rows, nb_cols); + add_matrix_matrix_product(transa, transb, alpha, A, B, beta, AB); + + auto epsilon = C.get_epsilon(); + + // RL= u*S*vt + std::vector> singular_values(std::min(nb_rows, nb_cols)); + Matrix u(nb_rows, nb_rows); + Matrix vt(nb_cols, nb_cols); + int truncated_rank = SVD_truncation(AB, epsilon, u, vt, singular_values); + + // new_U=u*sqrt(tildeS) and new_V=sqrt(tildeS)*vt in the right dimensions + Matrix *new_U_ptr, *new_V_ptr; + Matrix U_1, V_1; + if (C_is_overwritten) { + new_U_ptr = &C.get_U(); + new_V_ptr = &C.get_V(); + } else { + new_U_ptr = &U_1; + new_V_ptr = &V_1; + } + + { + Matrix &new_U = *new_U_ptr; + Matrix &new_V = *new_V_ptr; + int M = nb_rows; + int N = nb_cols; + int incx = 1; + new_U.resize(M, truncated_rank); + new_V.resize(truncated_rank, N); + CoefficientPrecision scaling_coef; + for (int r = 0; r < truncated_rank; r++) { + scaling_coef = std::sqrt(singular_values[r]); + std::copy_n(u.data() + r * u.nb_rows(), u.nb_cols(), new_U.data() + r * M); + Blas::scal(&M, &scaling_coef, new_U.data() + r * M, &incx); + } + for (int r = 0; r < vt.nb_cols(); r++) { + std::copy_n(vt.data() + r * vt.nb_rows(), truncated_rank, new_V.data() + r * truncated_rank); + } + + for (int r = 0; r < truncated_rank; r++) { + for (int j = 0; j < new_V.nb_cols(); j++) { + new_V(r, j) = std::sqrt(singular_values[r]) * new_V(r, j); + } + } + } + + if (C_is_overwritten) { + return; + } + + // Concatenate U_1 and U_2 + Matrix &U_2 = C.get_U(); + Matrix new_U(U_1.nb_rows(), U_1.nb_cols() + U_2.nb_cols()); + std::copy_n(U_1.data(), U_1.nb_rows() * U_1.nb_cols(), new_U.data()); + std::copy_n(U_2.data(), U_2.nb_rows() * U_2.nb_cols(), new_U.data() + U_1.nb_rows() * U_1.nb_cols()); + + // Concatenate V_1 and V_2 + Matrix &V_2 = C.get_V(); + scale(beta, V_2); + Matrix new_V(V_1.nb_rows() + V_2.nb_rows(), V_2.nb_cols()); + for (int j = 0; j < new_V.nb_cols(); j++) { + std::copy_n(V_1.data() + j * V_1.nb_rows(), V_1.nb_rows(), new_V.data() + j * new_V.nb_rows()); + std::copy_n(V_2.data() + j * V_2.nb_rows(), V_2.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_1.nb_rows()); + } + C.get_U() = new_U; + C.get_V() = new_V; + recompression(C); +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/interface.hpp b/include/htool/hmatrix/lrmat/linalg/interface.hpp new file mode 100644 index 00000000..91858f90 --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/interface.hpp @@ -0,0 +1,12 @@ +#ifndef HTOOL_HMATRIX_LRMAT_LINALG_INTERFACE_HPP +#define HTOOL_HMATRIX_LRMAT_LINALG_INTERFACE_HPP + +#include "add_lrmat_lrmat_product.hpp" +#include "add_lrmat_matrix_product.hpp" +#include "add_lrmat_matrix_product_row_major.hpp" +#include "add_lrmat_vector_product.hpp" +#include "add_matrix_lrmat_product.hpp" +#include "add_matrix_matrix_product.hpp" +#include "scale.hpp" + +#endif diff --git a/include/htool/hmatrix/lrmat/linalg/scale.hpp b/include/htool/hmatrix/lrmat/linalg/scale.hpp new file mode 100644 index 00000000..c9d3844c --- /dev/null +++ b/include/htool/hmatrix/lrmat/linalg/scale.hpp @@ -0,0 +1,19 @@ +#ifndef HTOOL_LRMAT_LINALG_SCALE_HPP +#define HTOOL_LRMAT_LINALG_SCALE_HPP + +#include "../../../matrix/linalg/scale.hpp" +#include "../lrmat.hpp" +namespace htool { + +template +void scale(CoefficientPrecision da, LowRankMatrix &lrmat) { + if (lrmat.get_U().nb_rows() > lrmat.get_U().nb_cols()) { + scale(da, lrmat.get_U()); + } else { + scale(da, lrmat.get_V()); + } +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/lrmat/lrmat.hpp b/include/htool/hmatrix/lrmat/lrmat.hpp index e4f7b09b..4853596e 100644 --- a/include/htool/hmatrix/lrmat/lrmat.hpp +++ b/include/htool/hmatrix/lrmat/lrmat.hpp @@ -1,8 +1,8 @@ #ifndef HTOOL_LRMAT_HPP #define HTOOL_LRMAT_HPP -#include "../../basic_types/matrix.hpp" #include "../../clustering/cluster_node.hpp" +#include "../../matrix/matrix.hpp" #include "../interfaces/virtual_generator.hpp" #include "../interfaces/virtual_lrmat_generator.hpp" #include @@ -14,32 +14,35 @@ template m_U, m_V; underlying_type m_epsilon; public: // Constructors - LowRankMatrix(const VirtualGenerator &A, const VirtualLowRankGenerator &LRGenerator, const Cluster &target_cluster, const Cluster &source_cluster, int rank = -1, underlying_type epsilon = 1e-3) : m_rank(rank), m_number_of_rows(target_cluster.get_size()), m_number_of_columns(source_cluster.get_size()), m_U(), m_V(), m_epsilon(epsilon) { + LowRankMatrix(const VirtualGenerator &A, const VirtualLowRankGenerator &LRGenerator, const Cluster &target_cluster, const Cluster &source_cluster, int rank = -1, underlying_type epsilon = 1e-3) : m_U(), m_V(), m_epsilon(epsilon) { - if (m_rank == 0) { - - m_U.resize(m_number_of_rows, 1); - m_V.resize(1, m_number_of_columns); - std::fill_n(m_U.data(), m_number_of_rows, 0); - std::fill_n(m_V.data(), m_number_of_columns, 0); + if (rank == 0) { + m_U.resize(target_cluster.get_size(), 0); + m_V.resize(0, source_cluster.get_size()); } else { - LRGenerator.copy_low_rank_approximation(A, target_cluster, source_cluster, epsilon, m_rank, m_U, m_V); + LRGenerator.copy_low_rank_approximation(A, target_cluster, source_cluster, epsilon, rank, m_U, m_V); } } - // Getters - int nb_rows() const { return m_number_of_rows; } - int nb_cols() const { return m_number_of_columns; } - int rank_of() const { return m_rank; } + LowRankMatrix(underlying_type epsilon) : m_U(), m_V(), m_epsilon(epsilon) {} + LowRankMatrix &operator=(const LowRankMatrix &rhs) = default; + LowRankMatrix(const LowRankMatrix &rhs) = default; + + // Getters + int nb_rows() const { return m_U.nb_rows(); } + int nb_cols() const { return m_V.nb_cols(); } + int rank_of() const { return m_U.nb_cols(); } + + Matrix &get_U() { return m_U; } + Matrix &get_V() { return m_V; } + const Matrix &get_U() const { return m_U; } + const Matrix &get_V() const { return m_V; } CoefficientPrecision get_U(int i, int j) const { return m_U(i, j); } CoefficientPrecision get_V(int i, int j) const { return m_V(i, j); } void assign_U(int i, int j, CoefficientPrecision *ptr) { return m_U.assign(i, j, ptr); } @@ -51,42 +54,42 @@ class LowRankMatrix { } void add_vector_product(char trans, CoefficientPrecision alpha, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out) const { - if (m_rank == 0) { + if (m_U.nb_cols() == 0) { std::fill(out, out + m_U.nb_cols(), 0); } else if (trans == 'N') { - std::vector a(m_rank); + std::vector a(m_U.nb_cols()); m_V.add_vector_product(trans, 1, in, 0, a.data()); m_U.add_vector_product(trans, alpha, a.data(), beta, out); } else { - std::vector a(m_rank); + std::vector a(m_U.nb_cols()); m_U.add_vector_product(trans, 1, in, 0, a.data()); m_V.add_vector_product(trans, alpha, a.data(), beta, out); } } void add_matrix_product(char transa, CoefficientPrecision alpha, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) const { - if (m_rank == 0) { + if (m_U.nb_cols() == 0) { std::fill(out, out + m_V.nb_cols() * mu, 0); } else if (transa == 'N') { - std::vector a(m_rank * mu); + std::vector a(m_U.nb_cols() * mu); m_V.add_matrix_product(transa, 1, in, 0, a.data(), mu); m_U.add_matrix_product(transa, alpha, a.data(), beta, out, mu); } else { - std::vector a(m_rank * mu); + std::vector a(m_U.nb_cols() * mu); m_U.add_matrix_product(transa, 1, in, 0, a.data(), mu); m_V.add_matrix_product(transa, alpha, a.data(), beta, out, mu); } } void add_matrix_product_row_major(char transa, CoefficientPrecision alpha, const CoefficientPrecision *in, CoefficientPrecision beta, CoefficientPrecision *out, int mu) const { - if (m_rank == 0) { + if (m_U.nb_cols() == 0) { std::fill(out, out + m_V.nb_cols() * mu, 0); } else if (transa == 'N') { - std::vector a(m_rank * mu); + std::vector a(m_U.nb_cols() * mu); m_V.add_matrix_product_row_major(transa, 1, in, 0, a.data(), mu); m_U.add_matrix_product_row_major(transa, alpha, a.data(), beta, out, mu); } else { - std::vector a(m_rank * mu); + std::vector a(m_U.nb_cols() * mu); m_U.add_matrix_product_row_major(transa, 1, in, 0, a.data(), mu); m_V.add_matrix_product_row_major(transa, alpha, a.data(), beta, out, mu); } @@ -94,18 +97,18 @@ class LowRankMatrix { void mvprod(const CoefficientPrecision *const in, CoefficientPrecision *const out) const { - if (m_rank == 0) { + if (m_U.nb_cols() == 0) { std::fill(out, out + m_U.nb_cols(), 0); } else { - std::vector a(m_rank); + std::vector a(m_U.nb_cols()); m_V.mvprod(in, a.data()); m_U.mvprod(a.data(), out); } } void add_mvprod_row_major(const CoefficientPrecision *const in, CoefficientPrecision *const out, const int &mu, char transb = 'T', char op = 'N') const { - if (m_rank != 0) { - std::vector a(m_rank * mu); + if (m_U.nb_cols() != 0) { + std::vector a(m_U.nb_cols() * mu); if (op == 'N') { m_V.mvprod_row_major(in, a.data(), mu, transb, op); m_U.add_mvprod_row_major(a.data(), out, mu, transb, op); @@ -132,17 +135,17 @@ class LowRankMatrix { } double compression_ratio() const { - return (m_number_of_rows * m_number_of_columns) / (double)(m_rank * (m_number_of_rows + m_number_of_columns)); + return (m_U.nb_rows() * m_V.nb_cols()) / (double)(m_U.nb_cols() * (m_U.nb_rows() + m_V.nb_cols())); } double space_saving() const { - return (1 - (m_rank * (1. / double(m_number_of_rows) + 1. / double(m_number_of_columns)))); + return (1 - (m_U.nb_cols() * (1. / double(m_U.nb_rows()) + 1. / double(m_V.nb_cols())))); } friend std::ostream &operator<<(std::ostream &os, const LowRankMatrix &m) { os << "rank:\t" << m.rank << std::endl; - os << "number_of_rows:\t" << m.m_number_of_rows << std::endl; - os << "m_number_of_columns:\t" << m.m_number_of_columns << std::endl; + os << "number_of_rows:\t" << m.nb_rows() << std::endl; + os << "m_number_of_columns:\t" << m.nb_cols() << std::endl; os << "U:\n"; os << m.m_U << std::endl; os << m.m_V << std::endl; diff --git a/include/htool/hmatrix/lrmat/utils/recompression.hpp b/include/htool/hmatrix/lrmat/utils/recompression.hpp new file mode 100644 index 00000000..b6e8ea2c --- /dev/null +++ b/include/htool/hmatrix/lrmat/utils/recompression.hpp @@ -0,0 +1,182 @@ +#ifndef HTOOL_LRMAT_UTILS_RECOMPRESSION_HPP +#define HTOOL_LRMAT_UTILS_RECOMPRESSION_HPP +#include "../../../matrix/linalg/add_matrix_matrix_product.hpp" +#include "../../../matrix/utils/SVD_truncation.hpp" +#include "../../../wrappers/wrapper_lapack.hpp" +#include "../lrmat.hpp" + +namespace htool { + +template +void recompression(LowRankMatrix &lrmat) { + Matrix U = lrmat.get_U(); + Matrix V = lrmat.get_V(); + auto rank = lrmat.rank_of(); + auto epsilon = lrmat.get_epsilon(); + + if (rank > std::min(U.nb_rows(), V.nb_cols())) { + Matrix UV(U.nb_rows(), V.nb_cols()); + add_matrix_matrix_product('N', 'N', CoefficientPrecision(1), U, V, CoefficientPrecision(1), UV); + + // UV= u*S*vt + std::vector> singular_values(std::min(U.nb_rows(), V.nb_cols())); + Matrix u(U.nb_rows(), U.nb_rows()); + Matrix vt(V.nb_cols(), V.nb_cols()); + int truncated_rank = SVD_truncation(UV, epsilon, u, vt, singular_values); + + // new_U=u*sqrt(tildeS) and new_V=sqrt(tildeS)*vt in the right dimensions + Matrix &new_U = lrmat.get_U(); + Matrix &new_V = lrmat.get_V(); + { + int M = U.nb_rows(); + int N = V.nb_cols(); + int incx = 1; + new_U.resize(M, truncated_rank); + new_V.resize(truncated_rank, N); + CoefficientPrecision scaling_coef; + for (int r = 0; r < truncated_rank; r++) { + scaling_coef = std::sqrt(singular_values[r]); + std::copy_n(u.data() + r * u.nb_rows(), u.nb_rows(), new_U.data() + r * M); + Blas::scal(&M, &scaling_coef, new_U.data() + r * M, &incx); + } + for (int r = 0; r < vt.nb_cols(); r++) { + std::copy_n(vt.data() + r * vt.nb_rows(), truncated_rank, new_V.data() + r * truncated_rank); + } + + for (int r = 0; r < truncated_rank; r++) { + for (int j = 0; j < vt.nb_cols(); j++) { + new_V(r, j) = std::sqrt(singular_values[r]) * new_V(r, j); + } + } + } + + } else { + // U=Q1R + std::vector tau_QR; + { + int M = U.nb_rows(); + int N = U.nb_cols(); + int lda = U.nb_rows(); + int lwork = -1; + int info; + std::vector work(1); + tau_QR.resize(std::min(M, N)); + Lapack::geqrf(&M, &N, U.data(), &lda, tau_QR.data(), work.data(), &lwork, &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::geqrf(&M, &N, U.data(), &lda, tau_QR.data(), work.data(), &lwork, &info); + } + + // V=LQ2 + std::vector tau_LQ; + { + int M = V.nb_rows(); + int N = V.nb_cols(); + int lda = V.nb_rows(); + int lwork = -1; + int info; + std::vector work(1); + tau_LQ.resize(std::min(M, N)); + Lapack::gelqf(&M, &N, V.data(), &lda, tau_LQ.data(), work.data(), &lwork, &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::gelqf(&M, &N, V.data(), &lda, tau_LQ.data(), work.data(), &lwork, &info); + } + + // RL = R*L + Matrix RL(rank, rank); + { + Matrix R(rank, rank), L(rank, rank); + for (int j = 0; j < rank; j++) { + for (int i = 0; i <= j; i++) { + R(i, j) = U(i, j); + } + } + for (int j = 0; j < rank; j++) { + for (int i = j; i < rank; i++) { + L(i, j) = V(i, j); + } + } + + add_matrix_matrix_product('N', 'N', CoefficientPrecision(1), R, L, CoefficientPrecision(0), RL); + } + + // RL= u*S*vt + std::vector> singular_values(rank); + Matrix u(rank, rank); + Matrix vt(rank, rank); + int truncated_rank = SVD_truncation(RL, epsilon, u, vt, singular_values); + + // new_U=u*sqrt(tildeS) and new_V=sqrt(tildeS)*vt in the right dimensions + Matrix &new_U = lrmat.get_U(); + Matrix &new_V = lrmat.get_V(); + { + int M = U.nb_rows(); + int N = V.nb_cols(); + int incx = 1; + new_U.resize(M, truncated_rank); + new_V.resize(truncated_rank, N); + CoefficientPrecision scaling_coef; + for (int r = 0; r < truncated_rank; r++) { + scaling_coef = std::sqrt(singular_values[r]); + std::copy_n(u.data() + r * rank, rank, new_U.data() + r * M); + Blas::scal(&M, &scaling_coef, new_U.data() + r * M, &incx); + } + for (int r = 0; r < rank; r++) { + std::copy_n(vt.data() + r * rank, truncated_rank, new_V.data() + r * truncated_rank); + } + + for (int r = 0; r < truncated_rank; r++) { + for (int j = 0; j < rank; j++) { + new_V(r, j) = std::sqrt(singular_values[r]) * new_V(r, j); + } + } + } + + // new_U=Q1*new_U + { + char size = 'L'; + char trans = 'N'; + int M = new_U.nb_rows(); + int N = new_U.nb_cols(); + int K = tau_QR.size(); + int ldc = M; + int lwork = -1; + int info; + std::vector work(1); + Lapack::mqr(&size, &trans, &M, &N, &K, U.data(), &M, tau_QR.data(), new_U.data(), &ldc, work.data(), &lwork, &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::mqr(&size, &trans, &M, &N, &K, U.data(), &M, tau_QR.data(), new_U.data(), &ldc, work.data(), &lwork, &info); + } + + // new_V=new_V*Q2 + { + char size = 'R'; + char trans = 'N'; + int M = new_V.nb_rows(); + int N = new_V.nb_cols(); + int K = tau_LQ.size(); + int lda = V.nb_rows(); + int ldc = M; + int lwork = -1; + int info; + std::vector work(1); + Lapack::mlq(&size, &trans, &M, &N, &K, V.data(), &lda, tau_LQ.data(), new_V.data(), &ldc, work.data(), &lwork, &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::mlq(&size, &trans, &M, &N, &K, V.data(), &lda, tau_LQ.data(), new_V.data(), &ldc, work.data(), &lwork, &info); + } + } +} + +template +std::unique_ptr> recompression(const LowRankMatrix &lrmat) { + std::unique_ptr> new_lrmat_ptr = std::make_unique>(lrmat); + recompression(*new_lrmat_ptr); + return new_lrmat_ptr; +} + +} // namespace htool + +#endif diff --git a/include/htool/hmatrix/tree_builder/tree_builder.hpp b/include/htool/hmatrix/tree_builder/tree_builder.hpp index 325a3d21..4d50a87b 100644 --- a/include/htool/hmatrix/tree_builder/tree_builder.hpp +++ b/include/htool/hmatrix/tree_builder/tree_builder.hpp @@ -33,6 +33,7 @@ class HMatrixTreeBuilder { char m_symmetry_type{'N'}; char m_UPLO_type{'N'}; + int m_partition_number_for_symmetry{-1}; // Views mutable std::vector m_admissible_tasks{}; @@ -59,15 +60,15 @@ class HMatrixTreeBuilder { return (m_symmetry_type != 'N') && ((m_UPLO_type == 'U' && target_cluster.get_offset() >= (source_cluster.get_offset() + source_cluster.get_size()) - && ((m_target_partition_number == -1) - || source_cluster.get_offset() >= m_source_root_cluster.get_clusters_on_partition()[m_target_partition_number]->get_offset()) + && ((m_partition_number_for_symmetry == -1) + || (source_cluster.get_offset() >= m_source_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() and m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() <= target_cluster.get_offset() && target_cluster.get_offset() + target_cluster.get_size() <= m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() + m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_size())) // && ((m_target_partition_number != -1) // || source_cluster.get_offset() >= target_cluster.get_offset()) ) || (m_UPLO_type == 'L' && source_cluster.get_offset() >= (target_cluster.get_offset() + target_cluster.get_size()) - && ((m_target_partition_number == -1) - || source_cluster.get_offset() < m_source_root_cluster.get_clusters_on_partition()[m_target_partition_number]->get_offset() + m_source_root_cluster.get_clusters_on_partition()[m_target_partition_number]->get_size()) + && ((m_partition_number_for_symmetry == -1) + || (source_cluster.get_offset() < m_source_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() + m_source_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_size() && m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() <= target_cluster.get_offset() && target_cluster.get_offset() + target_cluster.get_size() <= m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_offset() + m_target_root_cluster.get_clusters_on_partition()[m_partition_number_for_symmetry]->get_size())) // && ((m_target_partition_number != -1) // || source_cluster.get_offset() < target_cluster.get_offset() + target_cluster.get_size()) )); @@ -95,7 +96,7 @@ class HMatrixTreeBuilder { } public: - explicit HMatrixTreeBuilder(const ClusterType &root_cluster_tree_target, const ClusterType &root_source_cluster_tree, underlying_type epsilon, CoordinatePrecision eta, char symmetry, char UPLO, int reqrank, int target_partition_number) : m_target_root_cluster(root_cluster_tree_target), m_source_root_cluster(root_source_cluster_tree), m_epsilon(epsilon), m_eta(eta), m_reqrank(reqrank), m_target_partition_number(target_partition_number), m_symmetry_type(symmetry), m_UPLO_type(UPLO), m_low_rank_generator(std::make_shared>()), m_admissibility_condition(std::make_shared>()) { + explicit HMatrixTreeBuilder(const ClusterType &root_cluster_tree_target, const ClusterType &root_source_cluster_tree, underlying_type epsilon, CoordinatePrecision eta, char symmetry, char UPLO, int reqrank, int target_partition_number, int partition_number_for_symmetry) : m_target_root_cluster(root_cluster_tree_target), m_source_root_cluster(root_source_cluster_tree), m_epsilon(epsilon), m_eta(eta), m_reqrank(reqrank), m_target_partition_number(target_partition_number), m_symmetry_type(symmetry), m_UPLO_type(UPLO), m_partition_number_for_symmetry(partition_number_for_symmetry), m_low_rank_generator(std::make_shared>()), m_admissibility_condition(std::make_shared>()) { if (!((m_symmetry_type == 'N' || m_symmetry_type == 'H' || m_symmetry_type == 'S') && (m_UPLO_type == 'N' || m_UPLO_type == 'L' || m_UPLO_type == 'U') && ((m_symmetry_type == 'N' && m_UPLO_type == 'N') || (m_symmetry_type != 'N' && m_UPLO_type != 'N')) @@ -105,11 +106,12 @@ class HMatrixTreeBuilder { error_message += " and m_UPLO_type="; error_message.push_back(m_UPLO_type); htool::Logger::get_instance().log(LogLevel::ERROR, error_message); // LCOV_EXCL_LINE - // throw std::invalid_argument(error_message); // LCOV_EXCL_LINE } if (target_partition_number != -1 && target_partition_number >= m_target_root_cluster.get_clusters_on_partition().size()) { htool::Logger::get_instance().log(LogLevel::ERROR, "Target partition number cannot exceed number of partitions"); // LCOV_EXCL_LINE - // throw std::logic_error("[Htool error] Target partition number cannot exceed number of partitions."); + } + if (partition_number_for_symmetry != -1 && partition_number_for_symmetry >= m_target_root_cluster.get_clusters_on_partition().size()) { + htool::Logger::get_instance().log(LogLevel::ERROR, "Partition number for symmetry cannot exceed number of partitions"); // LCOV_EXCL_LINE } } @@ -190,7 +192,10 @@ bool HMatrixTreeBuilder::build_block_ bool is_admissible = m_admissibility_condition->ComputeAdmissibility(target_cluster, source_cluster, m_eta); ///////////////////// Diagonal blocks - // std::cout << target_cluster.get_offset() << " " << target_cluster.get_size() << " " << source_cluster.get_offset() << " " << source_cluster.get_size() << " " << is_block_diagonal(*current_hmatrix) << " " << is_target_cluster_in_target_partition(target_cluster) << " " << target_cluster.get_rank() << "\n"; + // int rankWorld; + // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + // if (rankWorld == 0) + // std::cout << target_cluster.get_offset() << " " << target_cluster.get_size() << " " << source_cluster.get_offset() << " " << source_cluster.get_size() << " " << is_block_diagonal(*current_hmatrix) << " " << is_target_cluster_in_target_partition(target_cluster) << " " << target_cluster.get_rank() << " " << is_removed_by_symmetry(target_cluster, source_cluster) << "\n"; // if (is_block_diagonal(*current_hmatrix)) { // current_hmatrix->set_diagonal_hmatrix(current_hmatrix); // } @@ -203,7 +208,9 @@ bool HMatrixTreeBuilder::build_block_ && is_target_cluster_in_target_partition(target_cluster) && !is_removed_by_symmetry(target_cluster, source_cluster) && target_cluster.get_depth() >= m_mintargetdepth - && source_cluster.get_depth() >= m_minsourcedepth) { + && source_cluster.get_depth() >= m_minsourcedepth + && target_cluster.get_rank() >= 0 + && source_cluster.get_rank() >= 0) { m_admissible_tasks.push_back(current_hmatrix); return false; } else if (source_cluster.is_leaf()) { @@ -279,12 +286,12 @@ bool HMatrixTreeBuilder::build_block_ return false; } } else { - if ((target_cluster.get_size() > source_cluster.get_size()) || (target_cluster.get_rank() < 0 && source_cluster.get_rank() >= 0 && is_admissible)) { + if (target_cluster.get_rank() < 0 && source_cluster.get_rank() >= 0) { std::vector Blocks_not_pushed{}; std::vector child_blocks{}; - for (const auto &target_child : target_children) { + for (const auto &target_child : target_cluster.get_clusters_on_partition()) { if ((is_target_cluster_in_target_partition(*target_child) || target_cluster.get_rank() < 0) && !is_removed_by_symmetry(*target_child, source_cluster)) { - child_blocks.emplace_back(current_hmatrix->add_child(target_child.get(), &source_cluster)); + child_blocks.emplace_back(current_hmatrix->add_child(target_child, &source_cluster)); set_hmatrix_symmetry(*child_blocks.back()); Blocks_not_pushed.push_back(build_block_tree(child_blocks.back())); } @@ -310,12 +317,12 @@ bool HMatrixTreeBuilder::build_block_ return false; } - } else if (target_cluster.get_size() < source_cluster.get_size() || (source_cluster.get_rank() < 0 && target_cluster.get_rank() >= 0 && is_admissible)) { + } else if (source_cluster.get_rank() < 0 && target_cluster.get_rank() >= 0) { std::vector Blocks_not_pushed; std::vector child_blocks{}; - for (const auto &source_child : source_children) { + for (const auto &source_child : source_cluster.get_clusters_on_partition()) { if (!is_removed_by_symmetry(target_cluster, *source_child)) { - child_blocks.emplace_back(current_hmatrix->add_child(&target_cluster, source_child.get())); + child_blocks.emplace_back(current_hmatrix->add_child(&target_cluster, source_child)); set_hmatrix_symmetry(*child_blocks.back()); Blocks_not_pushed.push_back(build_block_tree(child_blocks.back())); } @@ -430,7 +437,7 @@ void HMatrixTreeBuilder::compute_bloc for (int p = 0; p < m_admissible_tasks.size(); p++) { m_admissible_tasks[p]->compute_low_rank_data(generator, *m_low_rank_generator, m_reqrank, m_epsilon); // local_low_rank_leaves.emplace_back(m_admissible_tasks[p]); - if (m_admissible_tasks[p]->get_low_rank_data()->rank_of() == -1) { + if (m_admissible_tasks[p]->get_low_rank_data()->get_U().nb_rows() != m_admissible_tasks[p]->get_target_cluster().get_size() || m_admissible_tasks[p]->get_low_rank_data()->get_V().nb_cols() != m_admissible_tasks[p]->get_source_cluster().get_size()) { // local_low_rank_leaves.pop_back(); m_admissible_tasks[p]->clear_low_rank_data(); // if (m_dense_blocks_generator.get() == nullptr) { diff --git a/include/htool/local_operators/local_dense_matrix.hpp b/include/htool/local_operators/local_dense_matrix.hpp index 1981dfbb..9e8a35bc 100644 --- a/include/htool/local_operators/local_dense_matrix.hpp +++ b/include/htool/local_operators/local_dense_matrix.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_LOCAL_DENSE_MATRIX_HPP #define HTOOL_TESTING_LOCAL_DENSE_MATRIX_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../clustering/cluster_node.hpp" #include "../hmatrix/interfaces/virtual_generator.hpp" #include "local_operator.hpp" diff --git a/include/htool/local_operators/local_hmatrix.hpp b/include/htool/local_operators/local_hmatrix.hpp index 2f8b1ea0..e86c80ae 100644 --- a/include/htool/local_operators/local_hmatrix.hpp +++ b/include/htool/local_operators/local_hmatrix.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_LOCAL_HMATRIX_HPP #define HTOOL_TESTING_LOCAL_HMATRIX_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../clustering/cluster_node.hpp" #include "../hmatrix/hmatrix.hpp" #include "../hmatrix/interfaces/virtual_generator.hpp" diff --git a/include/htool/local_operators/local_operator.hpp b/include/htool/local_operators/local_operator.hpp index fa833927..2138137f 100644 --- a/include/htool/local_operators/local_operator.hpp +++ b/include/htool/local_operators/local_operator.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_LOCAL_OPERATOR_HPP #define HTOOL_TESTING_LOCAL_OPERATOR_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../clustering/cluster_node.hpp" #include "../hmatrix/interfaces/virtual_generator.hpp" #include "../local_operators/virtual_local_operator.hpp" diff --git a/include/htool/matrix/linalg/add_matrix_matrix_product.hpp b/include/htool/matrix/linalg/add_matrix_matrix_product.hpp new file mode 100644 index 00000000..1b818088 --- /dev/null +++ b/include/htool/matrix/linalg/add_matrix_matrix_product.hpp @@ -0,0 +1,101 @@ +#ifndef HTOOL_MATRIX_LINALG_ADD_MATRIX_MATRIX_PRODUCT_HPP +#define HTOOL_MATRIX_LINALG_ADD_MATRIX_MATRIX_PRODUCT_HPP + +#include "../matrix.hpp" +namespace htool { + +template +void add_matrix_matrix_product(char transa, char transb, T alpha, const Matrix &A, const T *in, T beta, T *out, int mu) { + int nr = A.nb_rows(); + int nc = A.nb_cols(); + char trans = transa; + int lda = nr; + int M = nr; + int N = mu; + int K = nc; + int ldb = nc; + int ldc = nr; + if (trans != 'N') { + M = nc; + N = mu; + K = nr; + ldb = nr; + ldc = nc; + } + + Blas::gemm(&transa, &transb, &M, &N, &K, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); +} + +template +void add_matrix_matrix_product(char transa, char transb, T alpha, const Matrix &A, const Matrix &B, T beta, Matrix &C) { + add_matrix_matrix_product(transa, transb, alpha, A, B.data(), beta, C.data(), C.nb_cols()); +} + +template +void add_matrix_matrix_product_symmetric(char side, char, char transb, T alpha, const Matrix &A, const T *in, T beta, T *out, const int &mu, char UPLO, char) { + + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + int M = (side == 'L') ? A.nb_rows() : mu; + int N = (side == 'L') ? mu : A.nb_cols(); + int lda = A.nb_rows(); + int ldb = (side == 'L') ? A.nb_cols() : mu; + int ldc = ldb; + Blas::symm(&side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); +} + +template +void add_matrix_matrix_product_symmetric(char side, char transa, char transb, std::complex alpha, const Matrix> &A, const std::complex *in, std::complex beta, std::complex *out, const int &mu, char UPLO, char symmetry) { + int nr = A.nb_rows(); + + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + + if (nr) { + int lda = nr; + int M = nr; + int N = mu; + int ldb = A.nb_cols(); + int ldc = nr; + + if (symmetry == 'S' && (transa == 'N' || transa == 'T')) { + Blas>::symm(&side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); + } else if (symmetry == 'H' && (transa == 'N' || transa == 'C')) { + Blas>::hemm(&side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); + } else if (symmetry == 'S' && transa == 'C') { + std::vector> conjugate_in(nr * mu); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr * mu, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + conj_if_complex>(out, A.nb_cols() * mu); + Blas>::symm(&side, &UPLO, &M, &N, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &ldb, &conjugate_beta, out, &ldc); + conj_if_complex>(out, A.nb_cols() * mu); + } else if (symmetry == 'H' && transa == 'T') { + std::vector> conjugate_in(nr * mu); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr * mu, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + std::transform(out, out + nr * mu, out, [](const std::complex &c) { return std::conj(c); }); + Blas>::hemm(&side, &UPLO, &M, &N, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &ldb, &conjugate_beta, out, &ldc); + std::transform(out, out + nr * mu, out, [](const std::complex &c) { return std::conj(c); }); + } else { + htool::Logger::get_instance().log(LogLevel::ERROR, "Invalid arguments for add_matrix_product_symmetric: " + std::string(1, transa) + " with " + symmetry + ")\n"); // LCOV_EXCL_LINE + } + } +} + +template +void add_matrix_matrix_product_symmetric(char side, char transa, char transb, T alpha, const Matrix &A, const Matrix &B, T beta, Matrix &C, char UPLO, char symm) { + int mu = (side == 'L') ? B.nb_cols() : B.nb_rows(); + add_matrix_matrix_product_symmetric(side, transa, transb, alpha, A, B.data(), beta, C.data(), mu, UPLO, symm); +} + +} // namespace htool + +#endif diff --git a/include/htool/matrix/linalg/add_matrix_matrix_product_row_major.hpp b/include/htool/matrix/linalg/add_matrix_matrix_product_row_major.hpp new file mode 100644 index 00000000..da488071 --- /dev/null +++ b/include/htool/matrix/linalg/add_matrix_matrix_product_row_major.hpp @@ -0,0 +1,116 @@ +#ifndef HTOOL_MATRIX_LINALG_ADD_MATRIX_MATRIX_PRODUCT_ROW_MAJOR_HPP +#define HTOOL_MATRIX_LINALG_ADD_MATRIX_MATRIX_PRODUCT_ROW_MAJOR_HPP + +#include "../matrix.hpp" +namespace htool { + +template +void add_matrix_matrix_product_row_major(char transa, char transb, T alpha, const Matrix &A, const T *in, T beta, T *out, int mu) { + + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_row_major (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + int nr = A.nb_rows(); + int nc = A.nb_cols(); + char inverted_transa = 'N'; + char inverted_transb = 'T'; + int M = mu; + int N = nr; + int K = nc; + int lda = mu; + int ldb = nr; + int ldc = mu; + if (transa != 'N') { + inverted_transb = 'N'; + N = nc; + K = nr; + } + if (transa == 'C' && is_complex()) { + std::vector conjugate_in(nr * mu); + T conjugate_alpha = conj_if_complex(alpha); + T conjugate_beta = conj_if_complex(beta); + std::transform(in, in + nr * mu, conjugate_in.data(), [](const T &c) { return conj_if_complex(c); }); + conj_if_complex(out, nc * mu); + Blas::gemm(&inverted_transa, &inverted_transb, &M, &N, &K, &conjugate_alpha, conjugate_in.data(), &lda, A.data(), &ldb, &conjugate_beta, out, &ldc); + conj_if_complex(out, nc * mu); + return; + } + Blas::gemm(&inverted_transa, &inverted_transb, &M, &N, &K, &alpha, in, &lda, A.data(), &ldb, &beta, out, &ldc); +} + +template +void add_matrix_matrix_product_symmetric_row_major(char side, char, char transb, T alpha, const Matrix &A, const T *in, T beta, T *out, const int &mu, char UPLO, char) { + + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + + int nr = A.nb_rows(); + + if (nr) { + int lda = nr; + char inverted_side = 'R'; + int M = mu; + int N = nr; + int ldb = mu; + int ldc = mu; + + Blas::symm(&inverted_side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); + } +} + +template +void add_matrix_matrix_product_symmetric_row_major(char side, char transa, char transb, std::complex alpha, const Matrix> &A, const std::complex *in, std::complex beta, std::complex *out, const int &mu, char UPLO, char symmetry) { + int nr = A.nb_rows(); + + if (transb != 'N') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE + } + + if (side != 'L') { + htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_matrix_product_symmetric (side=" + std::string(1, side) + ")"); // LCOV_EXCL_LINE + } + + if (nr) { + int lda = nr; + char inverted_side = 'R'; + int M = mu; + int N = nr; + int ldb = mu; + int ldc = mu; + + if (symmetry == 'S' && (transa == 'N' || transa == 'T')) { + Blas>::symm(&inverted_side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); + } else if (symmetry == 'H' && transa == 'T') { + Blas>::hemm(&inverted_side, &UPLO, &M, &N, &alpha, A.data(), &lda, in, &ldb, &beta, out, &ldc); + } else if (symmetry == 'S' && transa == 'C') { + std::vector> conjugate_in(nr * mu); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr * mu, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + conj_if_complex>(out, A.nb_cols() * mu); + Blas>::symm(&inverted_side, &UPLO, &M, &N, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &ldb, &conjugate_beta, out, &ldc); + conj_if_complex>(out, A.nb_cols() * mu); + } else if (symmetry == 'H' && (transa == 'N' || transa == 'C')) { + std::vector> conjugate_in(nr * mu); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr * mu, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + conj_if_complex>(out, A.nb_cols() * mu); + Blas>::hemm(&inverted_side, &UPLO, &M, &N, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &ldb, &conjugate_beta, out, &ldc); + conj_if_complex>(out, A.nb_cols() * mu); + } else { + htool::Logger::get_instance().log(LogLevel::ERROR, "Invalid arguments for add_matrix_product_symmetric_row_major: " + std::string(1, transa) + " with " + symmetry + ")\n"); // LCOV_EXCL_LINE + // throw std::invalid_argument("[Htool error] Operation is not supported (" + std::string(1, trans) + " with " + symmetry + ")"); // LCOV_EXCL_LINE + } + } +} + +} // namespace htool + +#endif diff --git a/include/htool/matrix/linalg/add_matrix_vector_product.hpp b/include/htool/matrix/linalg/add_matrix_vector_product.hpp new file mode 100644 index 00000000..28300ee9 --- /dev/null +++ b/include/htool/matrix/linalg/add_matrix_vector_product.hpp @@ -0,0 +1,65 @@ +#ifndef HTOOL_MATRIX_LINALG_ADD_MATRIX_VECTOR_PRODUCT_HPP +#define HTOOL_MATRIX_LINALG_ADD_MATRIX_VECTOR_PRODUCT_HPP + +#include "../matrix.hpp" +namespace htool { + +template +void add_matrix_vector_product(char trans, T alpha, const Matrix &A, const T *in, T beta, T *out) { + int nr = A.nb_rows(); + int nc = A.nb_cols(); + int lda = nr; + int incx = 1; + int incy = 1; + Blas::gemv(&trans, &nr, &nc, &alpha, A.data(), &lda, in, &incx, &beta, out, &incy); +} + +template +void add_matrix_vector_product_symmetric(char, T alpha, const Matrix &A, const T *in, T beta, T *out, char UPLO, char) { + int nr = A.nb_rows(); + int lda = nr; + + if (nr) { + int incx = 1; + int incy = 1; + Blas::symv(&UPLO, &nr, &alpha, A.data(), &lda, in, &incx, &beta, out, &incy); + } +} + +template +void add_matrix_vector_product_symmetric(char trans, std::complex alpha, const Matrix> &A, const std::complex *in, std::complex beta, std::complex *out, char UPLO, char symmetry) { + int nr = A.nb_rows(); + if (nr) { + int lda = nr; + int incx = 1; + int incy = 1; + if (symmetry == 'S' && (trans == 'N' || trans == 'T')) { + Blas>::symv(&UPLO, &nr, &alpha, A.data(), &lda, in, &incx, &beta, out, &incy); + } else if (symmetry == 'H' && (trans == 'N' || trans == 'C')) { + Blas>::hemv(&UPLO, &nr, &alpha, A.data(), &lda, in, &incx, &beta, out, &incy); + } else if (symmetry == 'S' && trans == 'C') { + std::vector> conjugate_in(nr); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + std::transform(out, out + nr, out, [](const std::complex &c) { return std::conj(c); }); + Blas>::symv(&UPLO, &nr, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &incx, &conjugate_beta, out, &incy); + std::transform(out, out + nr, out, [](const std::complex &c) { return std::conj(c); }); + } else if (symmetry == 'H' && trans == 'T') { + std::vector> conjugate_in(nr); + std::complex conjugate_alpha = std::conj(alpha); + std::complex conjugate_beta = std::conj(beta); + std::transform(in, in + nr, conjugate_in.data(), [](const std::complex &c) { return std::conj(c); }); + std::transform(out, out + nr, out, [](const std::complex &c) { return std::conj(c); }); + Blas>::hemv(&UPLO, &nr, &conjugate_alpha, A.data(), &lda, conjugate_in.data(), &incx, &conjugate_beta, out, &incy); + std::transform(out, out + nr, out, [](const std::complex &c) { return std::conj(c); }); + + } else { + htool::Logger::get_instance().log(LogLevel::ERROR, "Invalid arguments for add_vector_product_symmetric: " + std::string(1, trans) + " with " + symmetry + ")\n"); // LCOV_EXCL_LINE + // throw std::invalid_argument("[Htool error] Invalid arguments for add_vector_product_symmetric"); // LCOV_EXCL_LINE + } + } +} + +} // namespace htool +#endif diff --git a/include/htool/matrix/linalg/interface.hpp b/include/htool/matrix/linalg/interface.hpp new file mode 100644 index 00000000..54500193 --- /dev/null +++ b/include/htool/matrix/linalg/interface.hpp @@ -0,0 +1,10 @@ +#ifndef HTOOL_MATRIX_LINALG_INTERFACE_HPP +#define HTOOL_MATRIX_LINALG_INTERFACE_HPP + +#include "add_matrix_matrix_product.hpp" +#include "add_matrix_matrix_product_row_major.hpp" +#include "add_matrix_vector_product.hpp" +#include "scale.hpp" +#include "transpose.hpp" + +#endif diff --git a/include/htool/matrix/linalg/scale.hpp b/include/htool/matrix/linalg/scale.hpp new file mode 100644 index 00000000..c69c7070 --- /dev/null +++ b/include/htool/matrix/linalg/scale.hpp @@ -0,0 +1,15 @@ +#ifndef HTOOL_MATRIX_LINALG_SCALE_HPP +#define HTOOL_MATRIX_LINALG_SCALE_HPP + +#include "../matrix.hpp" +namespace htool { + +template +void scale(T da, Matrix &A) { + int size = A.nb_cols() * A.nb_rows(); + int incx = 1; + Blas::scal(&size, &da, A.data(), &incx); +} + +} // namespace htool +#endif diff --git a/include/htool/matrix/linalg/transpose.hpp b/include/htool/matrix/linalg/transpose.hpp new file mode 100644 index 00000000..de720f44 --- /dev/null +++ b/include/htool/matrix/linalg/transpose.hpp @@ -0,0 +1,18 @@ +#ifndef HTOOL_MATRIX_LINALG_TRANSPOSE_HPP +#define HTOOL_MATRIX_LINALG_TRANSPOSE_HPP + +#include "../matrix.hpp" +namespace htool { + +template +void transpose(const Matrix &A, Matrix &B) { + B.resize(A.nb_cols(), A.nb_rows()); + for (int i = 0; i < A.nb_rows(); i++) { + for (int j = 0; j < A.nb_cols(); j++) { + B(j, i) = A(i, j); + } + } +} + +} // namespace htool +#endif diff --git a/include/htool/basic_types/matrix.hpp b/include/htool/matrix/matrix.hpp similarity index 97% rename from include/htool/basic_types/matrix.hpp rename to include/htool/matrix/matrix.hpp index 4ba2d817..9624441c 100644 --- a/include/htool/basic_types/matrix.hpp +++ b/include/htool/matrix/matrix.hpp @@ -1,10 +1,10 @@ #ifndef HTOOL_BASIC_TYPES_MATRIX_HPP #define HTOOL_BASIC_TYPES_MATRIX_HPP +#include "../basic_types/vector.hpp" #include "../misc/logger.hpp" #include "../misc/misc.hpp" #include "../wrappers/wrapper_blas.hpp" -#include "vector.hpp" #include #include #include @@ -21,9 +21,9 @@ class Matrix { public: Matrix() : m_number_of_rows(0), m_number_of_cols(0), m_data(nullptr), m_is_owning_data(true) {} - Matrix(int nbr, int nbc) : m_number_of_rows(nbr), m_number_of_cols(nbc), m_is_owning_data(true) { + Matrix(int nbr, int nbc, T value = 0) : m_number_of_rows(nbr), m_number_of_cols(nbc), m_is_owning_data(true) { m_data = new T[nbr * nbc]; - std::fill_n(m_data, nbr * nbc, 0); + std::fill_n(m_data, nbr * nbc, value); } Matrix(const Matrix &rhs) : m_number_of_rows(rhs.m_number_of_rows), m_number_of_cols(rhs.m_number_of_cols), m_is_owning_data(true) { m_data = new T[rhs.m_number_of_rows * rhs.m_number_of_cols](); @@ -38,7 +38,7 @@ class Matrix { std::copy_n(rhs.m_data, m_number_of_rows * m_number_of_cols, m_data); m_number_of_rows = rhs.m_number_of_rows; m_number_of_cols = rhs.m_number_of_cols; - m_is_owning_data = true; + // m_is_owning_data = true; } else { m_number_of_rows = rhs.m_number_of_rows; m_number_of_cols = rhs.m_number_of_cols; @@ -105,7 +105,7 @@ class Matrix { T *data() { return m_data; } - T *data() const { + const T *data() const { return m_data; } @@ -208,12 +208,15 @@ class Matrix { the number of rows is set to _nbr_ and the number of columns is set to _nbc_. */ - void resize(const int nbr, const int nbc, T value = 0) { - if (m_data != nullptr and m_is_owning_data) { + void resize(int nbr, int nbc, T value = 0) { + if (m_data != nullptr and m_is_owning_data and m_number_of_rows * m_number_of_cols != nbr * nbc) { delete[] m_data; m_data = nullptr; + m_data = new T[nbr * nbc]; + } else if (m_number_of_rows * m_number_of_cols != nbr * nbc) { + m_data = new T[nbr * nbc]; } - m_data = new T[nbr * nbc]; + m_number_of_rows = nbr; m_number_of_cols = nbc; std::fill_n(m_data, nbr * nbc, value); @@ -596,10 +599,6 @@ class Matrix { } } - void scale(T alpha) { - std::transform(m_data, m_data + m_number_of_rows * m_number_of_cols, m_data, std::bind(std::multiplies(), std::placeholders::_1, alpha)); - } - //! ### Looking for the entry of maximal modulus /*! Returns the number of row and column of the entry diff --git a/include/htool/matrix/utils/SVD_truncation.hpp b/include/htool/matrix/utils/SVD_truncation.hpp new file mode 100644 index 00000000..3bd13e0b --- /dev/null +++ b/include/htool/matrix/utils/SVD_truncation.hpp @@ -0,0 +1,52 @@ +#ifndef HTOOL_MATRIX_UTILS_SVD_TRUNCATION_HPP +#define HTOOL_MATRIX_UTILS_SVD_TRUNCATION_HPP +#include "../../wrappers/wrapper_lapack.hpp" +#include "../matrix.hpp" + +namespace htool { + +template +int SVD_truncation(Matrix &A, htool::underlying_type epsilon, Matrix &u, Matrix &vt, std::vector> &singular_values) { + // A= u*S*vt + singular_values.resize(std::min(A.nb_rows(), A.nb_cols())); + u.resize(A.nb_rows(), A.nb_rows()); + vt.resize(A.nb_cols(), A.nb_cols()); + { + int M = A.nb_rows(); + int N = A.nb_cols(); + int lda = M; + int ldu = M; + int ldvt = N; + int lwork = -1; + int info; + std::vector work(1); + std::vector> rwork(5 * std::min(M, N)); + + Lapack::gesvd("A", "A", &M, &N, A.data(), &lda, singular_values.data(), u.data(), &ldu, vt.data(), &ldvt, work.data(), &lwork, rwork.data(), &info); + lwork = (int)std::real(work[0]); + work.resize(lwork); + Lapack::gesvd("A", "A", &M, &N, A.data(), &lda, singular_values.data(), u.data(), &ldu, vt.data(), &ldvt, work.data(), &lwork, rwork.data(), &info); + } + + // Compute truncated rank to define tildeS + int truncated_rank; + { + int j = singular_values.size(); + underlying_type svd_norm = 0; + underlying_type error = 0; + for (auto &elt : singular_values) + svd_norm += elt * elt; + svd_norm = std::sqrt(svd_norm); + + do { + j = j - 1; + error += std::pow(std::abs(singular_values[j]), 2); + } while (j > 0 && std::sqrt(error) / svd_norm < epsilon); + + truncated_rank = j + 1; + } + + return truncated_rank; +} +} // namespace htool +#endif diff --git a/include/htool/misc/evp.hpp b/include/htool/misc/evp.hpp index e1d82b57..fc98e46d 100644 --- a/include/htool/misc/evp.hpp +++ b/include/htool/misc/evp.hpp @@ -1,7 +1,7 @@ #ifndef HTOOL_EVP_HPP #define HTOOL_EVP_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include #include #include diff --git a/include/htool/solvers/ddm.hpp b/include/htool/solvers/ddm.hpp index 7bf80f76..6bc95a0e 100644 --- a/include/htool/solvers/ddm.hpp +++ b/include/htool/solvers/ddm.hpp @@ -1,7 +1,7 @@ #ifndef HTOOL_DDM_HPP #define HTOOL_DDM_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../distributed_operator/distributed_operator.hpp" #include "../misc/logger.hpp" #include "../misc/misc.hpp" diff --git a/include/htool/solvers/geneo/coarse_space_builder.hpp b/include/htool/solvers/geneo/coarse_space_builder.hpp index 74229920..7635f388 100644 --- a/include/htool/solvers/geneo/coarse_space_builder.hpp +++ b/include/htool/solvers/geneo/coarse_space_builder.hpp @@ -1,7 +1,7 @@ #ifndef HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP #define HTOOL_GENEO_COARSE_SPACE_BUILDER_HPP -#include "../../basic_types/matrix.hpp" +#include "../../matrix/matrix.hpp" // #include "../../wrappers/wrapper_hpddm.hpp" #include "../../wrappers/wrapper_lapack.hpp" // #include "../hmatrix/hmatrix.hpp" diff --git a/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp b/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp index 33a1c0fc..1fd981ec 100644 --- a/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp +++ b/include/htool/solvers/interfaces/virtual_coarse_operator_builder.hpp @@ -1,7 +1,7 @@ #ifndef HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP #define HTOOL_VIRTUAL_OPERATOR_SPACE_BUILDER_HPP -#include "../../basic_types/matrix.hpp" +#include "../../matrix/matrix.hpp" namespace htool { diff --git a/include/htool/testing/generate_test_case.hpp b/include/htool/testing/generate_test_case.hpp new file mode 100644 index 00000000..2cbda45b --- /dev/null +++ b/include/htool/testing/generate_test_case.hpp @@ -0,0 +1,118 @@ + +#ifndef HTOOL_TESTING_GENERATE_TEST_CASE_HPP +#define HTOOL_TESTING_GENERATE_TEST_CASE_HPP +#include "../clustering/cluster_node.hpp" +#include "../clustering/tree_builder/tree_builder.hpp" +#include "geometry.hpp" +#include "partition.hpp" +namespace htool { + +template +class TestCase { + public: + char transa; + char transb; + char side; + char symmetry; + char UPLO; + std::vector> x1; + std::vector> x2; + std::vector> x3; + std::shared_ptr>> root_cluster_1 = nullptr; + std::shared_ptr>> root_cluster_2 = nullptr; + std::shared_ptr>> root_cluster_3 = nullptr; + const Cluster> *root_cluster_A_input = nullptr; + const Cluster> *root_cluster_A_output = nullptr; + const Cluster> *root_cluster_B_input = nullptr; + const Cluster> *root_cluster_B_output = nullptr; + const Cluster> *root_cluster_C_input = nullptr; + const Cluster> *root_cluster_C_output = nullptr; + std::unique_ptr operator_A = nullptr; + std::unique_ptr operator_B = nullptr; + std::unique_ptr operator_C = nullptr; + int ni_A; + int no_A; + int ni_B; + int no_B; + int ni_C; + int no_C; + + TestCase(char transa0, char transb0, int n1, int n2, int n3, underlying_type z_distance_A, underlying_type z_distance_B, char side0, char symmetry0, char UPLO0, int number_of_partition = -1) : transa(transa0), transb(transb0), side(side0), symmetry(symmetry0), UPLO(UPLO0), x1(3 * n1), x2(3 * n2), x3(3 * n3) { + + // Input sizes + ni_A = (transa == 'T' || transa == 'C') ? n1 : n2; + no_A = (transa == 'T' || transa == 'C') ? n2 : n1; + ni_B = (transb == 'T' || transb == 'C') ? n2 : n3; + no_B = (transb == 'T' || transb == 'C') ? n3 : n2; + ni_C = n3; + no_C = n1; + + ClusterTreeBuilder> recursive_build_strategy; + + // First geometry + create_disk(3, 0., n1, x1.data()); + if (number_of_partition > 0) { + std::vector partition; + test_partition(3, n1, x1, number_of_partition, partition); + root_cluster_1 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n1, 3, x1.data(), 2, number_of_partition, partition.data())); + } else { + root_cluster_1 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n1, 3, x1.data(), 2, 2)); + } + + // Second geometry + if (symmetry == 'N' || n1 != n2 || side != 'L') { + create_disk(3, z_distance_A, n2, x2.data()); + if (number_of_partition > 0) { + std::vector partition; + test_partition(3, n2, x2, number_of_partition, partition); + root_cluster_2 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n2, 3, x2.data(), 2, number_of_partition, partition.data())); + } else { + + root_cluster_2 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n2, 3, x2.data(), 2, 2)); + } + } else { + x2 = x1; + root_cluster_2 = root_cluster_1; + } + + // Third geometry + if (symmetry == 'N' || n2 != n3 || side != 'R') { + create_disk(3, z_distance_B, n3, x3.data()); + if (number_of_partition > 0) { + std::vector partition; + test_partition(3, n3, x3, number_of_partition, partition); + root_cluster_3 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n3, 3, x3.data(), 2, number_of_partition, partition.data())); + } else { + root_cluster_3 = std::make_shared>>(recursive_build_strategy.create_cluster_tree(n3, 3, x3.data(), 2, 2)); + } + } else { + x3 = x2; + root_cluster_3 = root_cluster_2; + } + + // Operators + if (transa == 'N') { + operator_A = std::make_unique(3, no_A, ni_A, x1, x2, *root_cluster_1, *root_cluster_2, true, true); + root_cluster_A_output = root_cluster_1.get(); + root_cluster_A_input = root_cluster_2.get(); + } else { + operator_A = std::make_unique(3, no_A, ni_A, x2, x1, *root_cluster_2, *root_cluster_1, true, true); + root_cluster_A_output = root_cluster_2.get(); + root_cluster_A_input = root_cluster_1.get(); + } + if (transb == 'N') { + operator_B = std::make_unique(3, no_B, ni_B, x2, x3, *root_cluster_2, *root_cluster_3, true, true); + root_cluster_B_output = root_cluster_2.get(); + root_cluster_B_input = root_cluster_3.get(); + } else { + operator_B = std::make_unique(3, no_B, ni_B, x3, x2, *root_cluster_3, *root_cluster_2, true, true); + root_cluster_B_output = root_cluster_3.get(); + root_cluster_B_input = root_cluster_2.get(); + } + operator_C = std::make_unique(3, no_C, ni_C, x1, x3, *root_cluster_1, *root_cluster_3, true, true); + root_cluster_C_input = root_cluster_3.get(); + root_cluster_C_output = root_cluster_1.get(); + } +}; +} // namespace htool +#endif diff --git a/include/htool/testing/generator_input.hpp b/include/htool/testing/generator_input.hpp index 9677435b..e95e906f 100644 --- a/include/htool/testing/generator_input.hpp +++ b/include/htool/testing/generator_input.hpp @@ -8,7 +8,7 @@ namespace htool { template -void generate_random_vector(std::vector &random_vector) { +void generate_random_array(T *ptr, size_t size) { T lower_bound = 0; T upper_bound = 1000; std::random_device rd; @@ -18,10 +18,10 @@ void generate_random_vector(std::vector &random_vector) { return dist(mersenne_engine); }; - std::generate(begin(random_vector), end(random_vector), gen); + std::generate(ptr, ptr + size, gen); } template <> -void generate_random_vector(std::vector &random_vector) { +void generate_random_array(int *ptr, size_t size) { int lower_bound = 0; int upper_bound = 10000; std::random_device rd; @@ -31,12 +31,12 @@ void generate_random_vector(std::vector &random_vector) { return dist(mersenne_engine); }; - std::generate(begin(random_vector), end(random_vector), gen); + std::generate(ptr, ptr + size, gen); } template -void generate_random_vector(std::vector> &random_vector) { - std::vector random_vector_real(random_vector.size()), random_vector_imag(random_vector.size()); +void generate_random_array(std::complex *ptr, size_t size) { + std::vector random_vector_real(size), random_vector_imag(size); T lower_bound = 0; T upper_bound = 10000; std::random_device rd; @@ -48,11 +48,16 @@ void generate_random_vector(std::vector> &random_vector) { std::generate(begin(random_vector_real), end(random_vector_real), gen); std::generate(begin(random_vector_imag), end(random_vector_imag), gen); - std::transform(random_vector_real.begin(), random_vector_real.end(), random_vector_imag.begin(), random_vector.begin(), [](double da, double db) { + std::transform(random_vector_real.begin(), random_vector_real.end(), random_vector_imag.begin(), ptr, [](double da, double db) { return std::complex(da, db); }); } +template +void generate_random_vector(std::vector &random_vector) { + generate_random_array(random_vector.data(), random_vector.size()); +} + template void generate_random_scalar(T &coefficient) { T lower_bound = 0; @@ -74,6 +79,11 @@ void generate_random_scalar(std::complex &coefficient) { coefficient.imag(dist(mersenne_engine)); } +template +void generate_random_matrix(Matrix &matrix) { + generate_random_array(matrix.data(), matrix.nb_cols() * matrix.nb_rows()); +} + // template <> // void generate_random_scalar(int &coefficient) { // int lower_bound = 0; diff --git a/include/htool/testing/generator_test.hpp b/include/htool/testing/generator_test.hpp index da584624..b52f10b5 100644 --- a/include/htool/testing/generator_test.hpp +++ b/include/htool/testing/generator_test.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_GENERATOR_TEST_HPP #define HTOOL_TESTING_GENERATOR_TEST_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../clustering/cluster_node.hpp" #include "../hmatrix/interfaces/virtual_generator.hpp" #include diff --git a/include/htool/testing/geometry.hpp b/include/htool/testing/geometry.hpp index 5fce6a97..946f05c3 100644 --- a/include/htool/testing/geometry.hpp +++ b/include/htool/testing/geometry.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_GEOMETRY_HPP #define HTOOL_TESTING_GEOMETRY_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include #include #include @@ -45,6 +45,7 @@ void create_sphere(int nr, T *const xt, std::array offset = {0, 0, 0}) { xt[3 * j + 2] = offset[2] + std::cbrt(rho) * std::cos(phi); } } + } // namespace htool #endif diff --git a/include/htool/testing/partition.hpp b/include/htool/testing/partition.hpp index 602abba9..19fad9f2 100644 --- a/include/htool/testing/partition.hpp +++ b/include/htool/testing/partition.hpp @@ -2,7 +2,7 @@ #ifndef HTOOL_TESTING_PARTITION_HPP #define HTOOL_TESTING_PARTITION_HPP -#include "../basic_types/matrix.hpp" +#include "../matrix/matrix.hpp" #include "../misc/evp.hpp" namespace htool { diff --git a/include/htool/wrappers/wrapper_blas.hpp b/include/htool/wrappers/wrapper_blas.hpp index a48a518c..ae58f34d 100644 --- a/include/htool/wrappers/wrapper_blas.hpp +++ b/include/htool/wrappers/wrapper_blas.hpp @@ -11,6 +11,7 @@ #define HTOOL_GENERATE_EXTERN_BLAS(C, T) \ void HTOOL_BLAS_F77(C##axpy)(const int *, const T *, const T *, const int *, T *, const int *); \ + void HTOOL_BLAS_F77(C##scal)(const int *, const T *, T *, const int *); \ void HTOOL_BLAS_F77(C##gemv)(const char *, const int *, const int *, const T *, const T *, const int *, const T *, const int *, const T *, T *, const int *); \ void HTOOL_BLAS_F77(C##gemm)(const char *, const char *, const int *, const int *, const int *, const T *, const T *, const int *, const T *, const int *, const T *, T *, const int *); \ void HTOOL_BLAS_F77(C##symv)(const char *, const int *, const T *, const T *, const int *, const T *, const int *, const T *, T *, const int *); \ @@ -58,6 +59,9 @@ struct Blas { /* Function: axpy * Computes a scalar-vector product and adds the result to a vector. */ static void axpy(const int *const, const K *const, const K *const, const int *const, K *const, const int *const); + /* Function: scal + * Computes the product of a vector by a scalar. */ + static void scal(const int *const, const K *const, K *const, const int *const); /* Function: nrm2 * Computes the Euclidean norm of a vector. */ static underlying_type nrm2(const int *const, const K *const, const int *const); @@ -109,6 +113,11 @@ struct Blas { (n, a, x, incx, y, incy); \ } \ template <> \ + inline void Blas::scal(const int *const n, const T *const a, T *const x, const int *const incx) { \ + HTOOL_BLAS_F77(C##scal) \ + (n, a, x, incx); \ + } \ + template <> \ inline T Blas::dot(const int *const n, const T *const x, const int *const incx, const T *const y, const int *const incy) { \ T sum = T(); \ for (int i = 0, j = 0, k = 0; i < *n; ++i, j += *incx, k += *incy) \ diff --git a/include/htool/wrappers/wrapper_lapack.hpp b/include/htool/wrappers/wrapper_lapack.hpp index dc0ecbf1..275cf6e0 100644 --- a/include/htool/wrappers/wrapper_lapack.hpp +++ b/include/htool/wrappers/wrapper_lapack.hpp @@ -9,17 +9,24 @@ # define HTOOL_LAPACK_F77(func) func##_ #endif -#define HTOOL_GENERATE_EXTERN_LAPACK(C, T, U, SYM, ORT) \ - void HTOOL_LAPACK_F77(C##SYM##gv)(const int *, const char *, const char *, const int *, T *, const int *, T *, const int *, T *, T *, const int *, int *); +#define HTOOL_GENERATE_EXTERN_LAPACK(C, T) \ + void HTOOL_LAPACK_F77(C##geqrf)(const int *, const int *, T *, const int *, T *, T *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##gelqf)(const int *, const int *, T *, const int *, T *, T *, const int *, int *); // const int *itype, const char *jobz, const char *UPLO, const int *n, T *A, const int *lda, T *B, const int *ldb, T *W, T *work, const int *lwork, U *, int *info) #define HTOOL_GENERATE_EXTERN_LAPACK_COMPLEX(C, T, B, U) \ + HTOOL_GENERATE_EXTERN_LAPACK(B, U) \ + HTOOL_GENERATE_EXTERN_LAPACK(C, T) \ void HTOOL_LAPACK_F77(B##gesvd)(const char *, const char *, const int *, const int *, U *, const int *, U *, U *, const int *, U *, const int *, U *, const int *, int *); \ void HTOOL_LAPACK_F77(C##gesvd)(const char *, const char *, const int *, const int *, T *, const int *, U *, T *, const int *, T *, const int *, T *, const int *, U *, int *); \ void HTOOL_LAPACK_F77(B##ggev)(const char *, const char *, const int *, U *, const int *, U *, const int *, U *, U *, U *, U *, const int *, U *, const int *, U *, const int *, int *); \ void HTOOL_LAPACK_F77(C##ggev)(const char *, const char *, const int *, T *, const int *, T *, const int *, T *, T *, T *, const int *, T *, const int *, T *, const int *, U *, int *); \ void HTOOL_LAPACK_F77(B##sygv)(const int *, const char *, const char *, const int *, U *, const int *, U *, const int *, U *, U *, const int *, int *); \ - void HTOOL_LAPACK_F77(C##hegv)(const int *, const char *, const char *, const int *, T *, const int *, T *, const int *, U *, T *, const int *, U *, int *); + void HTOOL_LAPACK_F77(C##hegv)(const int *, const char *, const char *, const int *, T *, const int *, T *, const int *, U *, T *, const int *, U *, int *); \ + void HTOOL_LAPACK_F77(B##ormlq)(const char *, const char *, const int *, const int *, const int *, const U *, const int *, const U *, U *, const int *, U *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##unmlq)(const char *, const char *, const int *, const int *, const int *, const T *, const int *, const T *, T *, const int *, T *, const int *, int *); \ + void HTOOL_LAPACK_F77(B##ormqr)(const char *, const char *, const int *, const int *, const int *, const U *, const int *, const U *, U *, const int *, U *, const int *, int *); \ + void HTOOL_LAPACK_F77(C##unmqr)(const char *, const char *, const int *, const int *, const int *, const T *, const int *, const T *, T *, const int *, T *, const int *, int *); #if !defined(PETSC_HAVE_BLASLAPACK) # ifndef _MKL_H_ @@ -45,6 +52,18 @@ namespace htool { * K - Scalar type. */ template struct Lapack { + /* Function: geqrf + * Computes a QR decomposition of a rectangular matrix. */ + static void geqrf(const int *, const int *, K *, const int *, K *, K *, const int *, int *); + /* Function: gelqf + * Computes a LQ decomposition of a rectangular matrix. */ + static void gelqf(const int *, const int *, K *, const int *, K *, K *, const int *, int *); + /* Function: mqr + * Multiplies a matrix by an orthogonal or unitary matrix obtained with geqrf. */ + static void mqr(const char *, const char *, const int *, const int *, const int *, const K *, const int *, const K *, K *, const int *, K *, const int *, int *); + /* Function: mlq + * Multiplies a matrix by an orthogonal or unitary matrix obtained with gelqf. */ + static void mlq(const char *, const char *, const int *, const int *, const int *, const K *, const int *, const K *, K *, const int *, K *, const int *, int *); /* Function: gesvd * computes the singular value decomposition (SVD). */ static void gesvd(const char *, const char *, const int *, const int *, K *, const int *, underlying_type *, K *, const int *, K *, const int *, K *, const int *, underlying_type *, int *); @@ -56,7 +75,41 @@ struct Lapack { static void gv(const int *, const char *, const char *, const int *, K *, const int *, K *, const int *, underlying_type *, K *, const int *, underlying_type *, int *); }; +# define HTOOL_GENERATE_LAPACK(C, T) \ + template <> \ + inline void Lapack::geqrf(const int *m, const int *n, T *a, const int *lda, T *tau, T *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(C##geqrf) \ + (m, n, a, lda, tau, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::gelqf(const int *m, const int *n, T *a, const int *lda, T *tau, T *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(C##gelqf) \ + (m, n, a, lda, tau, work, lwork, info); \ + } + # define HTOOL_GENERATE_LAPACK_COMPLEX(C, T, B, U) \ + HTOOL_GENERATE_LAPACK(B, U) \ + HTOOL_GENERATE_LAPACK(C, T) \ + template <> \ + inline void Lapack::mqr(const char *side, const char *trans, const int *m, const int *n, const int *k, const U *a, const int *lda, const U *tau, U *c, const int *ldc, U *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(B##ormqr) \ + (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::mqr(const char *side, const char *trans, const int *m, const int *n, const int *k, const T *a, const int *lda, const T *tau, T *c, const int *ldc, T *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(C##unmqr) \ + (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::mlq(const char *side, const char *trans, const int *m, const int *n, const int *k, const U *a, const int *lda, const U *tau, U *c, const int *ldc, U *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(B##ormlq) \ + (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info); \ + } \ + template <> \ + inline void Lapack::mlq(const char *side, const char *trans, const int *m, const int *n, const int *k, const T *a, const int *lda, const T *tau, T *c, const int *ldc, T *work, const int *lwork, int *info) { \ + HTOOL_LAPACK_F77(C##unmlq) \ + (side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info); \ + } \ template <> \ inline void Lapack::gesvd(const char *jobu, const char *jobvt, const int *m, const int *n, U *a, const int *lda, U *s, U *u, const int *ldu, U *vt, const int *ldvt, U *work, const int *lwork, U *, int *info) { \ HTOOL_LAPACK_F77(B##gesvd) \ diff --git a/tests/functional_tests/CMakeLists.txt b/tests/functional_tests/CMakeLists.txt index 5c73f642..8e762adf 100644 --- a/tests/functional_tests/CMakeLists.txt +++ b/tests/functional_tests/CMakeLists.txt @@ -10,6 +10,10 @@ add_custom_target(build-tests-clustering) add_dependencies(build-tests build-tests-clustering) add_subdirectory(clustering) +add_custom_target(build-tests-matrix) +add_dependencies(build-tests build-tests-matrix) +add_subdirectory(matrix) + add_custom_target(build-tests-hmatrix) add_dependencies(build-tests build-tests-hmatrix) add_subdirectory(hmatrix) diff --git a/tests/functional_tests/basic_types/CMakeLists.txt b/tests/functional_tests/basic_types/CMakeLists.txt index 22408edf..cb0d5602 100644 --- a/tests/functional_tests/basic_types/CMakeLists.txt +++ b/tests/functional_tests/basic_types/CMakeLists.txt @@ -2,20 +2,6 @@ #=========================== Executables =====================================# #=============================================================================# -add_custom_target(build-tests-basic-types-matrix-product) -add_dependencies(build-tests-basic-types build-tests-basic-types-matrix-product) -add_subdirectory(matrix_product) - -add_executable(Test_basic_types_matrix test_basic_types_matrix.cpp) -target_link_libraries(Test_basic_types_matrix htool) -add_dependencies(build-tests-basic-types Test_basic_types_matrix) -add_test(Test_basic_types_matrix Test_basic_types_matrix) - -add_executable(Test_basic_types_matrix_file test_basic_types_matrix_file.cpp) -target_link_libraries(Test_basic_types_matrix_file htool) -add_dependencies(build-tests-basic-types Test_basic_types_matrix_file) -add_test(Test_basic_types_matrix_file Test_basic_types_matrix_file) - add_executable(Test_basic_types_vector test_basic_types_vector.cpp) target_link_libraries(Test_basic_types_vector htool) add_dependencies(build-tests-basic-types Test_basic_types_vector) diff --git a/tests/functional_tests/basic_types/test_basic_type_matrix_product.hpp b/tests/functional_tests/basic_types/test_basic_type_matrix_product.hpp deleted file mode 100644 index 850acec8..00000000 --- a/tests/functional_tests/basic_types/test_basic_type_matrix_product.hpp +++ /dev/null @@ -1,132 +0,0 @@ -#include -#include - -using namespace std; -using namespace htool; - -template -bool test_matrix_product(int nr, int nc, int mu, char op, char symmetry, char UPLO) { - - bool is_error = false; - - // Generate random matrix - vector data(nr * nc); - generate_random_vector(data); - Matrix matrix; - matrix.assign(nr, nc, data.data(), false); - - if (symmetry == 'S') { - for (int i = 0; i < nr; i++) { - for (int j = 0; j < i; j++) { - matrix(i, j) = matrix(j, i); - } - } - } else if (symmetry == 'H') { - for (int i = 0; i < nr; i++) { - for (int j = 0; j < i; j++) { - matrix(i, j) = conj_if_complex(matrix(j, i)); - } - matrix(i, i) = std::real(matrix(i, i)); - } - } - - // Input - int ni = (op == 'T' || op == 'C') ? nr : nc; - int no = (op == 'T' || op == 'C') ? nc : nr; - vector x(ni * mu, 1), y(no * mu, 1), ref(no * mu, 0), out(ref); - T alpha, beta; - htool::underlying_type error; - generate_random_vector(x); - generate_random_vector(y); - generate_random_scalar(alpha); - generate_random_scalar(beta); - - // reference - if (op == 'N') { - for (int p = 0; p < mu; p++) { - for (int i = 0; i < no; i++) { - for (int j = 0; j < ni; j++) { - ref.at(i + p * no) += alpha * matrix(i, j) * x.at(j + p * ni); - } - ref.at(i + p * no) += beta * y.at(i + p * no); - } - } - } else if (op == 'T') { - for (int p = 0; p < mu; p++) { - for (int i = 0; i < no; i++) { - for (int j = 0; j < ni; j++) { - ref.at(i + p * no) += alpha * matrix(j, i) * x.at(j + p * ni); - } - ref.at(i + p * no) += beta * y.at(i + p * no); - } - } - } else if (op == 'C') { - for (int p = 0; p < mu; p++) { - for (int i = 0; i < no; i++) { - for (int j = 0; j < ni; j++) { - ref.at(i + p * no) += alpha * conj_if_complex(matrix(j, i)) * x.at(j + p * ni); - } - ref.at(i + p * no) += beta * y.at(i + p * no); - } - } - } - - // Row major inputs - vector x_row_major(ni * mu, 1), y_row_major(no * mu, 1), ref_row_major(no * mu, 0); - for (int i = 0; i < ni; i++) { - for (int j = 0; j < mu; j++) { - x_row_major[i * mu + j] = x[i + j * ni]; - } - } - - for (int i = 0; i < no; i++) { - for (int j = 0; j < mu; j++) { - y_row_major[i * mu + j] = y[i + j * no]; - ref_row_major[i * mu + j] = ref[i + j * no]; - } - } - - // Product - if (mu == 1) { - out = y; - matrix.add_vector_product(op, alpha, x.data(), beta, out.data()); - error = norm2(ref - out) / norm2(ref); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a matrix vector product: " << error << endl; - } - - if (mu == 1 && symmetry != 'N') { - out = y; - matrix.add_vector_product_symmetric(op, alpha, x.data(), beta, out.data(), UPLO, symmetry); - error = norm2(ref - out) / norm2(ref); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a symmetric matrix vector product: " << error << endl; - } - - out = y; - matrix.add_matrix_product(op, alpha, x.data(), beta, out.data(), mu); - error = norm2(ref - out) / norm2(ref); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a matrix matrix product: " << error << endl; - - out = y_row_major; - matrix.add_matrix_product_row_major(op, alpha, x_row_major.data(), beta, out.data(), mu); - error = norm2(ref_row_major - out) / norm2(ref_row_major); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a matrix matrix product with row major input: " << error << endl; - - if (symmetry != 'N') { - out = y; - matrix.add_matrix_product_symmetric(op, alpha, x.data(), beta, out.data(), mu, UPLO, symmetry); - error = norm2(ref - out) / norm2(ref); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a symmetric matrix matrix product: " << error << endl; - - out = y_row_major; - matrix.add_matrix_product_symmetric_row_major(op, alpha, x_row_major.data(), beta, out.data(), mu, UPLO, symmetry); - error = norm2(ref_row_major - out) / norm2(ref_row_major); - is_error = is_error || !(error < 1e-14); - cout << "> Errors on a symmetric matrix matrix product with row major input: " << error << endl; - } - return is_error; -} diff --git a/tests/functional_tests/clustering/CMakeLists.txt b/tests/functional_tests/clustering/CMakeLists.txt index f9c8cb1d..dff9bcf5 100755 --- a/tests/functional_tests/clustering/CMakeLists.txt +++ b/tests/functional_tests/clustering/CMakeLists.txt @@ -7,9 +7,13 @@ target_link_libraries(Test_cluster htool) add_dependencies(build-tests-clustering Test_cluster) add_test(NAME Test_cluster_1 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_cluster) +set_tests_properties(Test_cluster_1 PROPERTIES LABELS "mpi") add_test(NAME Test_cluster_2 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_cluster) +set_tests_properties(Test_cluster_2 PROPERTIES LABELS "mpi") add_test(NAME Test_cluster_3 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_cluster) +set_tests_properties(Test_cluster_3 PROPERTIES LABELS "mpi") add_test(NAME Test_cluster_4 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_cluster) +set_tests_properties(Test_cluster_4 PROPERTIES LABELS "mpi") # set(cluster_types "pca") # list(APPEND cluster_types "bounding_box") diff --git a/tests/functional_tests/clustering/test_cluster.hpp b/tests/functional_tests/clustering/test_cluster.hpp index 82577f22..b74c6263 100644 --- a/tests/functional_tests/clustering/test_cluster.hpp +++ b/tests/functional_tests/clustering/test_cluster.hpp @@ -97,9 +97,9 @@ bool test_cluster(int size, bool use_given_partition) { } // Testing save and read root cluster - save_cluster_tree(root_cluster, "test_save_" + NbrToStr(rankWorld)); - Cluster copied_cluster = read_cluster_tree("test_save_" + NbrToStr(rankWorld) + "_cluster_tree_properties.csv", "test_save_" + NbrToStr(rankWorld) + "_cluster_tree.csv"); - save_cluster_tree(copied_cluster, "test_save_2_" + NbrToStr(rankWorld)); + save_cluster_tree(root_cluster, "test_save_" + NbrToStr(rankWorld) + "_" + NbrToStr(sizeWorld)); + Cluster copied_cluster = read_cluster_tree("test_save_" + NbrToStr(rankWorld) + "_" + NbrToStr(sizeWorld) + "_cluster_tree_properties.csv", "test_save_" + NbrToStr(rankWorld) + "_" + NbrToStr(sizeWorld) + "_cluster_tree.csv"); + save_cluster_tree(copied_cluster, "test_save_2_" + NbrToStr(rankWorld) + "_" + NbrToStr(sizeWorld)); is_error = is_error || !(root_cluster.get_minclustersize() == copied_cluster.get_minclustersize()); is_error = is_error || !(root_cluster.get_maximal_depth() == copied_cluster.get_maximal_depth()); diff --git a/tests/functional_tests/distributed_operator/CMakeLists.txt b/tests/functional_tests/distributed_operator/CMakeLists.txt index 5899af7b..25c0faaa 100644 --- a/tests/functional_tests/distributed_operator/CMakeLists.txt +++ b/tests/functional_tests/distributed_operator/CMakeLists.txt @@ -15,10 +15,13 @@ foreach(type ${types}) add_dependencies(build-tests-distributed-operator Test_distributed_operator_product_${type}) add_test(NAME Test_distributed_operator_product_${type}_1 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_distributed_operator_product_${type}) set_tests_properties(Test_distributed_operator_product_${type}_1 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=4) + set_tests_properties(Test_distributed_operator_product_${type}_1 PROPERTIES LABELS "mpi") add_test(NAME Test_distributed_operator_product_${type}_2 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_distributed_operator_product_${type}) set_tests_properties(Test_distributed_operator_product_${type}_2 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=2) + set_tests_properties(Test_distributed_operator_product_${type}_2 PROPERTIES LABELS "mpi") add_test(NAME Test_distributed_operator_product_${type}_4 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_distributed_operator_product_${type}) set_tests_properties(Test_distributed_operator_product_${type}_4 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=1) + set_tests_properties(Test_distributed_operator_product_${type}_4 PROPERTIES LABELS "mpi") endforeach() # endforeach() diff --git a/tests/functional_tests/distributed_operator/test_distributed_operator.hpp b/tests/functional_tests/distributed_operator/test_distributed_operator.hpp index fba7e313..44d568b9 100644 --- a/tests/functional_tests/distributed_operator/test_distributed_operator.hpp +++ b/tests/functional_tests/distributed_operator/test_distributed_operator.hpp @@ -244,8 +244,8 @@ auto add_off_diagonal_operator(ClusterTreeBuilder> &re local_off_diagonal_operator_2 = make_unique>>(*off_diagonal_matrix_2, local_target_root_cluster, *local_off_diagonal_cluster_tree_2, 'N', 'N', false, true); } else if (data_type == DataType::HMatrix || data_type == DataType::DefaultHMatrix) { - HMatrixTreeBuilder> hmatrix_builder_1(local_target_root_cluster, *local_off_diagonal_cluster_tree_1, epsilon, eta, 'N', 'N', -1, -1); - HMatrixTreeBuilder> hmatrix_builder_2(local_target_root_cluster, *local_off_diagonal_cluster_tree_2, epsilon, eta, 'N', 'N', -1, -1); + HMatrixTreeBuilder> hmatrix_builder_1(local_target_root_cluster, *local_off_diagonal_cluster_tree_1, epsilon, eta, 'N', 'N', -1, -1, -1); + HMatrixTreeBuilder> hmatrix_builder_2(local_target_root_cluster, *local_off_diagonal_cluster_tree_2, epsilon, eta, 'N', 'N', -1, -1, -1); off_diagonal_hmatrix_1 = make_unique>>(hmatrix_builder_1.build(*generator_off_diagonal)); off_diagonal_hmatrix_2 = make_unique>>(hmatrix_builder_2.build(*generator_off_diagonal)); @@ -381,7 +381,7 @@ bool test_custom_distributed_operator(int nr, int nc, int mu, bool use_permutati } local_operator = std::make_unique>>(*matrix, *actual_target_cluster, *actual_source_cluster, symmetry, uplo); } else if (data_type == DataType::HMatrix) { - HMatrixTreeBuilder> hmatrix_builder(*actual_target_cluster, *actual_source_cluster, epsilon, eta, symmetry, uplo, -1, -1); + HMatrixTreeBuilder> hmatrix_builder(*actual_target_cluster, *actual_source_cluster, epsilon, eta, symmetry, uplo, -1, -1, rankWorld); hmatrix = std::make_unique>>(hmatrix_builder.build(generator_permuted)); local_operator = std::make_unique>>(*hmatrix, *actual_target_cluster, *actual_source_cluster, symmetry, uplo); } diff --git a/tests/functional_tests/hmatrix/hmatrix_build/CMakeLists.txt b/tests/functional_tests/hmatrix/hmatrix_build/CMakeLists.txt index 687f04b8..214d8504 100755 --- a/tests/functional_tests/hmatrix/hmatrix_build/CMakeLists.txt +++ b/tests/functional_tests/hmatrix/hmatrix_build/CMakeLists.txt @@ -11,8 +11,14 @@ foreach(type ${types}) add_dependencies(build-tests-hmatrix-build Test_hmatrix_build_${type}) add_test(NAME Test_hmatrix_build_${type}_1 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_build_${type}) set_tests_properties(Test_hmatrix_build_${type}_1 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=4) + set_tests_properties(Test_hmatrix_build_${type}_1 PROPERTIES LABELS "mpi") + add_test(NAME Test_hmatrix_build_${type}_2 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_build_${type}) set_tests_properties(Test_hmatrix_build_${type}_2 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=2) + set_tests_properties(Test_hmatrix_build_${type}_2 PROPERTIES LABELS "mpi") + add_test(NAME Test_hmatrix_build_${type}_4 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_build_${type}) set_tests_properties(Test_hmatrix_build_${type}_4 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=1) + set_tests_properties(Test_hmatrix_build_${type}_4 PROPERTIES LABELS "mpi") + endforeach() diff --git a/tests/functional_tests/hmatrix/hmatrix_product/CMakeLists.txt b/tests/functional_tests/hmatrix/hmatrix_product/CMakeLists.txt index 4b414f14..0cc5e2f4 100755 --- a/tests/functional_tests/hmatrix/hmatrix_product/CMakeLists.txt +++ b/tests/functional_tests/hmatrix/hmatrix_product/CMakeLists.txt @@ -11,8 +11,11 @@ foreach(type ${types}) add_dependencies(build-tests-hmatrix-product Test_hmatrix_product_${type}) add_test(NAME Test_hmatrix_product_${type}_1 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_product_${type}) set_tests_properties(Test_hmatrix_product_${type}_1 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=4) + set_tests_properties(Test_hmatrix_product_${type}_1 PROPERTIES LABELS "mpi") add_test(NAME Test_hmatrix_product_${type}_2 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_product_${type}) set_tests_properties(Test_hmatrix_product_${type}_2 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=2) + set_tests_properties(Test_hmatrix_product_${type}_2 PROPERTIES LABELS "mpi") add_test(NAME Test_hmatrix_product_${type}_4 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_hmatrix_product_${type}) set_tests_properties(Test_hmatrix_product_${type}_4 PROPERTIES ENVIRONMENT OMP_NUM_THREADS=1) + set_tests_properties(Test_hmatrix_product_${type}_4 PROPERTIES LABELS "mpi") endforeach() diff --git a/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_complex_double.cpp b/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_complex_double.cpp index 58f43eff..e8046d94 100644 --- a/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_complex_double.cpp +++ b/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_complex_double.cpp @@ -7,37 +7,39 @@ using namespace htool; int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; + bool is_error = false; + const int n1 = 400; + const int n1_increased = 600; + const int n2 = 400; + const int n2_increased = 600; + const double margin = 10; for (auto use_local_cluster : {true, false}) { for (auto epsilon : {1e-14, 1e-6}) { - for (auto number_of_rhs : {1, 5}) { + for (auto n3 : {100}) { for (auto operation : {'N', 'T'}) { // Square matrix - is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'S', 'L', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'S', 'U', epsilon); + is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(operation, 'N', n1, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(operation, 'N', n1, n2, n3, 'L', 'S', 'L', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(operation, 'N', n1, n2, n3, 'L', 'S', 'U', use_local_cluster, epsilon, margin); // Rectangle matrix - is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(number_of_rows_increased, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(number_of_rows, number_of_columns_increased, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); + is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(operation, 'N', n1_increased, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(operation, 'N', n1, n2_increased, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); } - for (auto operation : {'N', 'C'}) { + // TODO: fix 'C' operation, missing some conj operations somewhere + // for (auto operation : {'N', 'C'}) { - // Square matrix - is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplexHermitian>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'H', 'L', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplexHermitian>(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'H', 'U', epsilon); + // // Square matrix + // is_error = is_error || test_hmatrix_product, GeneratorTestComplexSymmetric>(operation, 'N', n1, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + // is_error = is_error || test_hmatrix_product, GeneratorTestComplexHermitian>(operation, 'N', n1, n2, n3, 'L', 'H', 'L', use_local_cluster, epsilon, margin); + // is_error = is_error || test_hmatrix_product, GeneratorTestComplexHermitian>(operation, 'N', n1, n2, n3, 'L', 'H', 'U', use_local_cluster, epsilon, margin); - // Rectangle matrix - is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(number_of_rows_increased, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(number_of_rows, number_of_columns_increased, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - } + // // Rectangle matrix + // is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(operation, 'N', n1_increased, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + // is_error = is_error || test_hmatrix_product, GeneratorTestComplex>(operation, 'N', n1, n2_increased, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + // } } } } diff --git a/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_double.cpp b/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_double.cpp index 8b50dee2..156c69b7 100644 --- a/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_double.cpp +++ b/tests/functional_tests/hmatrix/hmatrix_product/test_hmatrix_product_double.cpp @@ -7,26 +7,27 @@ using namespace htool; int main(int argc, char *argv[]) { MPI_Init(&argc, &argv); - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; + bool is_error = false; + const int n1 = 400; + const int n1_increased = 600; + const int n2 = 400; + const int n2_increased = 600; + const double margin = 10; for (auto use_local_cluster : {true, false}) { - for (auto epsilon : {1e-14, 1e-6}) { - for (auto number_of_rhs : {1, 5}) { + for (auto epsilon : {1e-6, 1e-10}) { + for (auto n3 : {300}) { for (auto operation : {'N', 'T'}) { - std::cout << use_local_cluster << " " << epsilon << " " << number_of_rhs << " " << operation << "\n"; + std::cout << use_local_cluster << " " << epsilon << " " << n3 << " " << operation << "\n"; // Square matrix - is_error = is_error || test_hmatrix_product(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'S', 'L', epsilon); - is_error = is_error || test_hmatrix_product(number_of_rows, number_of_columns, number_of_rhs, use_local_cluster, operation, 'S', 'U', epsilon); + is_error = is_error || test_hmatrix_product(operation, 'N', n1, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product(operation, 'N', n1, n2, n3, 'L', 'S', 'L', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product(operation, 'N', n1, n2, n3, 'L', 'S', 'U', use_local_cluster, epsilon, margin); // Rectangle matrix - is_error = is_error || test_hmatrix_product(number_of_rows_increased, number_of_columns, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); - is_error = is_error || test_hmatrix_product(number_of_rows, number_of_columns_increased, number_of_rhs, use_local_cluster, operation, 'N', 'N', epsilon); + is_error = is_error || test_hmatrix_product(operation, 'N', n1_increased, n2, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); + is_error = is_error || test_hmatrix_product(operation, 'N', n1, n2_increased, n3, 'N', 'N', 'N', use_local_cluster, epsilon, margin); } } } diff --git a/tests/functional_tests/hmatrix/lrmat/CMakeLists.txt b/tests/functional_tests/hmatrix/lrmat/CMakeLists.txt index be7792ea..ab91efe9 100755 --- a/tests/functional_tests/hmatrix/lrmat/CMakeLists.txt +++ b/tests/functional_tests/hmatrix/lrmat/CMakeLists.txt @@ -9,3 +9,7 @@ add_subdirectory(lrmat_build) add_custom_target(build-tests-lrmat-product) add_dependencies(build-tests-lrmat build-tests-lrmat-product) add_subdirectory(lrmat_product) + +add_custom_target(build-tests-lrmat-addition) +add_dependencies(build-tests-lrmat build-tests-lrmat-addition) +add_subdirectory(lrmat_addition) diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_addition/CMakeLists.txt b/tests/functional_tests/hmatrix/lrmat/lrmat_addition/CMakeLists.txt new file mode 100755 index 00000000..5dc85d99 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_addition/CMakeLists.txt @@ -0,0 +1,8 @@ +#=============================================================================# +#=========================== Executables =====================================# +#=============================================================================# + +add_executable(Test_lrmat_addition test_lrmat_addition.cpp) +target_link_libraries(Test_lrmat_addition htool) +add_dependencies(build-tests-lrmat-build Test_lrmat_addition) +add_test(Test_lrmat_addition Test_lrmat_addition) diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_addition/test_lrmat_addition.cpp b/tests/functional_tests/hmatrix/lrmat/lrmat_addition/test_lrmat_addition.cpp new file mode 100644 index 00000000..415bdaf9 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_addition/test_lrmat_addition.cpp @@ -0,0 +1,20 @@ +#include "../test_lrmat_lrmat_addition.hpp" +#include +#include +#include +int main(int, char const *[]) { + bool is_error = false; + const int greater_size = 400; + const int lower_size = 200; + const int dummy = 100; + + is_error = is_error || test_lrmat_lrmat_addition>('N', 'N', greater_size, lower_size, dummy, 1e-10); + is_error = is_error || test_lrmat_lrmat_addition>('N', 'N', lower_size, greater_size, dummy, 1e-10); + is_error = is_error || test_lrmat_lrmat_addition>('N', 'N', greater_size, lower_size, dummy, 1e-6); + is_error = is_error || test_lrmat_lrmat_addition>('N', 'N', lower_size, greater_size, dummy, 1e-6); + if (is_error) { + return 1; + } + + return 0; +} diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_SVD.cpp b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_SVD.cpp index 8c7f8139..4ee6033d 100644 --- a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_SVD.cpp +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_SVD.cpp @@ -6,36 +6,30 @@ using namespace htool; int main(int, char *[]) { - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; - const int number_of_rhs = 5; - const double margin = 0; - - // Square matrix - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - // Rectangle matrix - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - + bool is_error = false; + const int n1 = 200; + const int n1_increased = 400; + const int n2 = 200; + const int n2_increased = 400; + const int n3 = 100; + const double additional_compression_tolerance = 0; + const std::array additional_lrmat_sum_tolerances{1., 1., 1., 1.}; + + for (auto epsilon : {1e-6, 1e-10}) { + for (auto operation : {'N', 'T'}) { + std::cout << epsilon << " " << operation << "\n"; + // Square matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + // Rectangle matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, SVD>>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + } + } if (is_error) { return 1; } diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_fullACA.cpp b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_fullACA.cpp index ac16d2ad..4a9f5792 100644 --- a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_fullACA.cpp +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_fullACA.cpp @@ -6,35 +6,30 @@ using namespace htool; int main(int, char *[]) { - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; - const int number_of_rhs = 5; - const double margin = 0; + bool is_error = false; + const int n1 = 200; + const int n1_increased = 400; + const int n2 = 200; + const int n2_increased = 400; + const int n3 = 100; + const double additional_compression_tolerance = 0; + const std::array additional_lrmat_sum_tolerances{1., 1., 1., 1.}; - // Square matrix - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); + for (auto epsilon : {1e-6, 1e-10}) { + for (auto operation : {'N', 'T'}) { + std::cout << epsilon << " " << operation << "\n"; + // Square matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); + // Rectangle matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); - // Rectangle matrix - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, fullACA>>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + } + } if (is_error) { return 1; } diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_partialACA.cpp b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_partialACA.cpp index a68fb329..4e2af615 100644 --- a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_partialACA.cpp +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_partialACA.cpp @@ -6,36 +6,30 @@ using namespace htool; int main(int, char *[]) { - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; - const int number_of_rhs = 5; - const double margin = 0; - - // Square matrix - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - // Rectangle matrix - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - + bool is_error = false; + const int n1 = 200; + const int n1_increased = 400; + const int n2 = 200; + const int n2_increased = 400; + const int n3 = 100; + const double additional_compression_tolerance = 0; + const std::array additional_lrmat_sum_tolerances{1., 1., 1., 1.}; + + for (auto epsilon : {1e-6, 1e-10}) { + for (auto operation : {'N', 'T'}) { + std::cout << epsilon << " " << operation << "\n"; + // Square matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + // Rectangle matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, partialACA>>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + } + } if (is_error) { return 1; } diff --git a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_sympartialACA.cpp b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_sympartialACA.cpp index c05c1de1..9a0654a4 100644 --- a/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_sympartialACA.cpp +++ b/tests/functional_tests/hmatrix/lrmat/lrmat_product/test_lrmat_product_sympartialACA.cpp @@ -6,36 +6,30 @@ using namespace htool; int main(int, char *[]) { - bool is_error = false; - const int number_of_rows = 200; - const int number_of_rows_increased = 400; - const int number_of_columns = 200; - const int number_of_columns_increased = 400; - const int number_of_rhs = 5; - const double margin = 0; - - // Square matrix - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns, 1, 'T', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns, number_of_rhs, 'T', margin); - - // Rectangle matrix - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows_increased, number_of_columns, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows_increased, number_of_columns, number_of_rhs, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns_increased, 1, 'N', margin); - is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(number_of_rows, number_of_columns_increased, number_of_rhs, 'N', margin); - + bool is_error = false; + const int n1 = 200; + const int n1_increased = 400; + const int n2 = 200; + const int n2_increased = 400; + const int n3 = 100; + const double additional_compression_tolerance = 0; + const std::array additional_lrmat_sum_tolerances{1., 1., 1., 1.}; + + for (auto epsilon : {1e-6, 1e-10}) { + for (auto operation : {'N', 'T'}) { + std::cout << epsilon << " " << operation << "\n"; + // Square matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(operation, 'N', n1, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + // Rectangle matrix + is_error = is_error || test_lrmat_product>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + + is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(operation, 'N', n1_increased, n2, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + is_error = is_error || test_lrmat_product, GeneratorTestComplex, sympartialACA>>(operation, 'N', n1, n2_increased, n3, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances); + } + } if (is_error) { return 1; } diff --git a/tests/functional_tests/hmatrix/lrmat/test_lrmat_build.hpp b/tests/functional_tests/hmatrix/lrmat/test_lrmat_build.hpp index 455f34d2..a2a9997a 100644 --- a/tests/functional_tests/hmatrix/lrmat/test_lrmat_build.hpp +++ b/tests/functional_tests/hmatrix/lrmat/test_lrmat_build.hpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include #include @@ -11,8 +13,8 @@ using namespace std; using namespace htool; -template -bool test_lrmat(const Cluster &target_cluster, const Cluster &source_cluster, const GeneratorTestDouble &A, const LowRankMatrix &Fixed_approximation, const LowRankMatrix &Auto_approximation, std::pair fixed_compression_interval, std::pair auto_compression_interval) { +template +bool test_lrmat(const Cluster &target_cluster, const Cluster &source_cluster, const GeneratorTestDouble &A, const LowRankMatrix &Fixed_approximation, const LowRankMatrix &Auto_approximation, std::pair fixed_compression_interval, std::pair auto_compression_interval) { bool test = 0; @@ -36,6 +38,27 @@ bool test_lrmat(const Cluster &target_cluster, const Cluster &so test = test || !(fixed_compression_interval.first < Fixed_approximation.space_saving() && Fixed_approximation.space_saving() < fixed_compression_interval.second); cout << "> Compression rate : " << Fixed_approximation.space_saving() << endl; + // Recompression with fixed rank + auto recompressed_fixed_approximation = recompression(Fixed_approximation); + if (recompressed_fixed_approximation) { + std::vector fixed_errors_after_recompression; + for (int k = 0; k < recompressed_fixed_approximation->rank_of() + 1; k++) { + fixed_errors_after_recompression.push_back(Frobenius_absolute_error(target_cluster, source_cluster, *recompressed_fixed_approximation, A, k)); + } + + // Test rank + cout << "Recompression with fixed rank" << endl; + cout << "> rank : " << recompressed_fixed_approximation->rank_of() << endl; + + // Test Frobenius errors + test = test || !(fixed_errors_after_recompression.back() < recompressed_fixed_approximation->get_epsilon()); + cout << "> Errors with Frobenius norm : " << fixed_errors_after_recompression << endl; + + // Test compression + test = test || !(Fixed_approximation.space_saving() <= recompressed_fixed_approximation->space_saving()); + cout << "> Compression rate : " << recompressed_fixed_approximation->space_saving() << endl; + } + // ACA automatic building std::vector auto_errors; for (int k = 0; k < Auto_approximation.rank_of() + 1; k++) { @@ -51,5 +74,26 @@ bool test_lrmat(const Cluster &target_cluster, const Cluster &so test = test || !(auto_compression_interval.first < Auto_approximation.space_saving() && Auto_approximation.space_saving() < auto_compression_interval.second); cout << "> Compression rate : " << Auto_approximation.space_saving() << endl; + // Recompression with automatic rank + auto recompressed_auto_approximation = recompression(Auto_approximation); + if (recompressed_auto_approximation) { + std::vector fixed_errors_after_recompression; + for (int k = 0; k < recompressed_auto_approximation->rank_of() + 1; k++) { + fixed_errors_after_recompression.push_back(Frobenius_absolute_error(target_cluster, source_cluster, *recompressed_auto_approximation, A, k)); + } + + // Test rank + cout << "Recompression with auto rank" << endl; + cout << "> rank : " << recompressed_auto_approximation->rank_of() << endl; + + // Test Frobenius errors + test = test || !(fixed_errors_after_recompression.back() < recompressed_auto_approximation->get_epsilon()); + cout << "> Errors with Frobenius norm : " << fixed_errors_after_recompression << endl; + + // Test compression + test = test || !(Auto_approximation.space_saving() <= recompressed_auto_approximation->space_saving()); + cout << "> Compression rate : " << recompressed_auto_approximation->space_saving() << endl; + } + return test; } diff --git a/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_addition.hpp b/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_addition.hpp new file mode 100644 index 00000000..a74b3ace --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_addition.hpp @@ -0,0 +1,62 @@ +#include +#include +#include +using namespace std; +using namespace htool; + +template +bool test_lrmat_lrmat_addition(char transa, char transb, int n1, int n2, int n3, htool::underlying_type epsilon) { + bool is_error = false; + TestCase test_case(transa, transb, n1, n2, n3, 2, 4, 'N', 'N', 'N'); + + // lrmat + Compressor compressor; + LowRankMatrix A_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, -1, epsilon); + LowRankMatrix zero_A_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, 0, epsilon); + + // Sub lrmat two level deep + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(0, 1); + int first_target_index = dist(gen); + int second_target_index = dist(gen); + int first_source_index = dist(gen); + int second_source_index = dist(gen); + + const Cluster> &sub_target_cluster = *test_case.root_cluster_A_output->get_children()[first_target_index]->get_children()[second_target_index]; + const Cluster> &sub_source_cluster = *test_case.root_cluster_A_input->get_children()[first_source_index]->get_children()[second_source_index]; + + LowRankMatrix sub_A_approximation(*test_case.operator_A, compressor, sub_target_cluster, sub_source_cluster, -1, epsilon); + + // Reference + Matrix A_dense(A_approximation.nb_rows(), A_approximation.nb_cols(), 0); + A_approximation.copy_to_dense(A_dense.data()); + Matrix sub_A_dense(sub_A_approximation.nb_rows(), sub_A_approximation.nb_cols(), 0); + sub_A_approximation.copy_to_dense(sub_A_dense.data()); + Matrix sub_A_dense_extended(A_approximation.nb_rows(), A_approximation.nb_cols(), 0); + + for (int i = 0; i < sub_A_dense.nb_rows(); i++) { + for (int j = 0; j < sub_A_dense.nb_cols(); j++) { + sub_A_dense_extended(i + sub_target_cluster.get_offset(), j + sub_source_cluster.get_offset()) = sub_A_dense(i, j); + } + } + Matrix matrix_result_w_sum, matrix_result_wo_sum, matrix_test(A_dense); + matrix_result_w_sum = A_dense + sub_A_dense_extended; + matrix_result_wo_sum = sub_A_dense_extended; + + // Addition + htool::underlying_type error; + add_lrmat_lrmat(A_approximation, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, sub_A_approximation, sub_target_cluster, sub_source_cluster); + A_approximation.copy_to_dense(matrix_test.data()); + error = normFrob(matrix_result_w_sum - matrix_test) / normFrob(matrix_result_w_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a lrmat lrmat addition with sum: " << error << endl; + + add_lrmat_lrmat(zero_A_approximation, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, sub_A_approximation, sub_target_cluster, sub_source_cluster); + zero_A_approximation.copy_to_dense(matrix_test.data()); + error = normFrob(matrix_result_wo_sum - matrix_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a lrmat lrmat addition without sum: " << error << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_product.hpp b/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_product.hpp new file mode 100644 index 00000000..9c369cb7 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/test_lrmat_lrmat_product.hpp @@ -0,0 +1,105 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_lrmat_lrmat_product(const TestCase &test_case, htool::underlying_type epsilon, htool::underlying_type additional_compression_tolerance, htool::underlying_type additional_lrmat_sum_tolerance) { + + bool is_error = false; + char transa = test_case.transa; + // char transb = test_case.transb; + + // ACA automatic building + Compressor compressor; + LowRankMatrix A_auto_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, -1, epsilon); + LowRankMatrix B_auto_approximation(*test_case.operator_B, compressor, *test_case.root_cluster_B_output, *test_case.root_cluster_B_input, -1, epsilon); + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, -1, epsilon); + + // partialACA fixed rank + int reqrank_max = 10; + LowRankMatrix A_fixed_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, std::max(A_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix B_fixed_approximation(*test_case.operator_B, compressor, *test_case.root_cluster_B_output, *test_case.root_cluster_B_input, std::max(B_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix C_fixed_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, std::max(C_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix lrmat_test(epsilon); + + // Random Input + htool::underlying_type error; + T alpha(1), beta(1); + generate_random_scalar(alpha); + generate_random_scalar(beta); + + // Reference matrix + Matrix A_dense(test_case.no_A, test_case.ni_A), B_dense(test_case.no_B, test_case.ni_B), C_dense(test_case.no_C, test_case.ni_C); + test_case.operator_A->copy_submatrix(test_case.no_A, test_case.ni_A, 0, 0, A_dense.data()); + test_case.operator_B->copy_submatrix(test_case.no_B, test_case.ni_B, 0, 0, B_dense.data()); + test_case.operator_C->copy_submatrix(test_case.no_C, test_case.ni_C, 0, 0, C_dense.data()); + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), dense_lrmat_test, matrix_test, matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_lrmat_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + + std::cout << "test A " << A_fixed_approximation.rank_of() << " " << A_auto_approximation.rank_of() << "\n"; + std::cout << "test B " << B_fixed_approximation.rank_of() << " " << B_auto_approximation.rank_of() << "\n"; + std::cout << "test C " << C_fixed_approximation.rank_of() << " " << C_auto_approximation.rank_of() << "\n"; + // Product with fixed rank + matrix_test = C_dense; + add_lrmat_lrmat_product(transa, 'N', alpha, A_fixed_approximation, B_fixed_approximation, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < A_fixed_approximation.get_epsilon() * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat lrmat product to matrix with fixed approximation: " << error << endl; + + lrmat_test = C_fixed_approximation; + add_lrmat_lrmat_product(transa, 'N', alpha, A_fixed_approximation, B_fixed_approximation, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < A_fixed_approximation.get_epsilon() * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat lrmat product to lrmat with fixed approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_lrmat_product(transa, 'N', alpha, A_fixed_approximation, B_fixed_approximation, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < A_fixed_approximation.get_epsilon() * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a lrmat lrmat product to lrmat with fixed approximation and with lrmat sum: " << error << endl; + + // Product with automatic rank + matrix_test = C_dense; + add_lrmat_lrmat_product(transa, 'N', alpha, A_auto_approximation, B_auto_approximation, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < A_auto_approximation.get_epsilon() * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat lrmat product to matrix with auto approximation: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_lrmat_product(transa, 'N', alpha, A_auto_approximation, B_auto_approximation, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < A_auto_approximation.get_epsilon() * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat lrmat product to lrmat with auto approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_lrmat_product(transa, 'N', alpha, A_auto_approximation, B_auto_approximation, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < A_auto_approximation.get_epsilon() * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a lrmat lrmat product to lrmat with auto approximation and with lrmat sum: " << error << endl; + + cout << "test : " << is_error << endl + << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/lrmat/test_lrmat_matrix_product.hpp b/tests/functional_tests/hmatrix/lrmat/test_lrmat_matrix_product.hpp new file mode 100644 index 00000000..8d61c6d2 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/test_lrmat_matrix_product.hpp @@ -0,0 +1,147 @@ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_lrmat_matrix_product(const TestCase &test_case, htool::underlying_type epsilon, htool::underlying_type additional_compression_tolerance, htool::underlying_type additional_lrmat_sum_tolerance) { + bool is_error = false; + char transa = test_case.transa; + + // ACA automatic building + Compressor compressor; + LowRankMatrix A_auto_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, -1, epsilon); + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, -1, epsilon); + + // partialACA fixed rank + int reqrank_max = 10; + LowRankMatrix A_fixed_approximation(*test_case.operator_A, compressor, *test_case.root_cluster_A_output, *test_case.root_cluster_A_input, std::max(A_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix C_fixed_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, std::max(C_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix lrmat_test(epsilon); + + // Reference matrix + Matrix A_dense(test_case.no_A, test_case.ni_A), B_dense(test_case.no_B, test_case.ni_B), C_dense(test_case.no_C, test_case.ni_C); + test_case.operator_A->copy_submatrix(test_case.no_A, test_case.ni_A, 0, 0, A_dense.data()); + test_case.operator_B->copy_submatrix(test_case.no_B, test_case.ni_B, 0, 0, B_dense.data()); + test_case.operator_C->copy_submatrix(test_case.no_C, test_case.ni_C, 0, 0, C_dense.data()); + Matrix matrix_test, dense_lrmat_test, transposed_B_dense, transposed_C_dense; + transpose(B_dense, transposed_B_dense); + transpose(C_dense, transposed_C_dense); + + // Random Input + htool::underlying_type error; + std::vector B_vec, C_vec, test_vec; + B_vec = B_dense.get_col(0); + C_vec = C_dense.get_col(0); + T alpha(1), beta(1), scaling_coefficient; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // Reference matrix + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + Matrix transposed_matrix_result_w_sum(transposed_C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_lrmat_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + transpose(matrix_result_w_matrix_sum, transposed_matrix_result_w_sum); + + Matrix scaled_matrix_result_w_matrix_sum(matrix_result_w_matrix_sum); + scale(scaling_coefficient, scaled_matrix_result_w_matrix_sum); + + // Tests for fixed rank + test_vec = C_vec; + add_lrmat_vector_product(transa, alpha, A_fixed_approximation, B_vec.data(), beta, test_vec.data()); + error = norm2(matrix_result_w_matrix_sum.get_col(0) - test_vec) / norm2(matrix_result_w_matrix_sum.get_col(0)); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat vector product with fixed approximation: " << error << endl; + + matrix_test = C_dense; + add_lrmat_matrix_product(transa, 'N', alpha, A_fixed_approximation, B_dense, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to matrix with fixed approximation: " << error << endl; + + lrmat_test = C_fixed_approximation; + add_lrmat_matrix_product(transa, 'N', alpha, A_fixed_approximation, B_dense, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to lrmat with fixed approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_fixed_approximation; + add_lrmat_matrix_product(transa, 'N', alpha, A_fixed_approximation, B_dense, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a lrmat matrix product to lrmat with fixed approximation and with lrmat sum: " << error << endl; + + matrix_test = transposed_C_dense; + add_lrmat_matrix_product_row_major(transa, 'N', alpha, A_fixed_approximation, transposed_B_dense.data(), beta, matrix_test.data(), C_dense.nb_cols()); + error = normFrob(transposed_matrix_result_w_sum - matrix_test) / normFrob(transposed_matrix_result_w_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to matrix with fixed approximation and row major input: " << error << endl; + + matrix_test = C_dense; + scale(scaling_coefficient, A_fixed_approximation); + add_lrmat_matrix_product(transa, 'N', alpha, A_fixed_approximation, B_dense, scaling_coefficient * beta, matrix_test); + error = normFrob(scaled_matrix_result_w_matrix_sum - matrix_test) / normFrob(scaled_matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a scaled lrmat matrix product with fixed approximation: " << error << endl; + + // Tests for automatic rank + test_vec = C_vec; + add_lrmat_vector_product(transa, alpha, A_auto_approximation, B_vec.data(), beta, test_vec.data()); + error = norm2(matrix_result_w_matrix_sum.get_col(0) - test_vec) / norm2(matrix_result_w_matrix_sum.get_col(0)); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat vector product with auto approximation: " << error << endl; + + matrix_test = C_dense; + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to matrix with auto approximation: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to lrmat with auto approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a lrmat matrix product to lrmat with auto approximation and with lrmat sum: " << error << endl; + + matrix_test = transposed_C_dense; + add_lrmat_matrix_product_row_major(transa, 'N', alpha, A_auto_approximation, transposed_B_dense.data(), beta, matrix_test.data(), C_dense.nb_cols()); + error = normFrob(transposed_matrix_result_w_sum - matrix_test) / normFrob(transposed_matrix_result_w_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a lrmat matrix product to matrix with auto approximation and row major input: " << error << endl; + + matrix_test = C_dense; + scale(scaling_coefficient, A_auto_approximation); + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, scaling_coefficient * beta, matrix_test); + error = normFrob(scaled_matrix_result_w_matrix_sum - matrix_test) / normFrob(scaled_matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance)); + cout << "> Errors on a scaled lrmat matrix product with auto approximation: " << error << endl; + cout << "test : " << is_error << endl + << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/lrmat/test_lrmat_product.hpp b/tests/functional_tests/hmatrix/lrmat/test_lrmat_product.hpp index 7c28fb5c..1bbd1b93 100644 --- a/tests/functional_tests/hmatrix/lrmat/test_lrmat_product.hpp +++ b/tests/functional_tests/hmatrix/lrmat/test_lrmat_product.hpp @@ -1,140 +1,28 @@ - -#include -#include -#include -#include -#include +#include "test_lrmat_lrmat_product.hpp" +#include "test_lrmat_matrix_product.hpp" +#include "test_matrix_lrmat_product.hpp" +#include "test_matrix_matrix_product.hpp" +#include using namespace std; using namespace htool; template -bool test_lrmat_product(int nr, int nc, int mu, char op, htool::underlying_type margin) { +bool test_lrmat_product(char transa, char transb, int n1, int n2, int n3, htool::underlying_type epsilon, htool::underlying_type additional_compression_tolerance, std::array, 4> additional_lrmat_sum_tolerances) { + bool is_error = false; const int ndistance = 4; htool::underlying_type distance[ndistance]; distance[0] = 15; distance[1] = 20; distance[2] = 30; distance[3] = 40; - - htool::underlying_type epsilon = 0.0001; - std::vector> xt(3 * nr); - std::vector> xs(3 * nc); - std::vector tabt(500); - std::vector tabs(100); - bool is_error = 0; for (int idist = 0; idist < ndistance; idist++) { - - create_disk(3, 0., nr, xt.data()); - create_disk(3, distance[idist], nc, xs.data()); - - ClusterTreeBuilder> recursive_build_strategy; - Cluster> target_root_cluster = recursive_build_strategy.create_cluster_tree(nr, 3, xt.data(), 2, 2); - Cluster> source_root_cluster = recursive_build_strategy.create_cluster_tree(nc, 3, xs.data(), 2, 2); - - GeneratorTestType A(3, nr, nc, xt, xs, target_root_cluster, source_root_cluster, true, true); - - // partialACA fixed rank - int reqrank_max = 10; - Compressor compressor; - LowRankMatrix Fixed_approximation(A, compressor, target_root_cluster, source_root_cluster, reqrank_max, epsilon); - - // ACA automatic building - LowRankMatrix Auto_approximation(A, compressor, target_root_cluster, source_root_cluster, -1, epsilon); - - // Input sizes - int ni = (op == 'T' || op == 'C') ? nr : nc; - int no = (op == 'T' || op == 'C') ? nc : nr; - - // Random vector - vector x(ni * mu, 1), y(no * mu, 1), ref(no * mu, 1); - T alpha, beta; - htool::underlying_type error; - generate_random_vector(x); - generate_random_vector(y); - generate_random_scalar(alpha); - generate_random_scalar(beta); - if (op == 'N') { - std::vector temp(no * mu, 0); - A.mvprod(x.data(), temp.data(), mu); - ref = mult(alpha, temp) + mult(beta, y); - } else { - std::vector temp(no * mu, 0); - A.mvprod_transp(x.data(), temp.data(), mu); - ref = mult(alpha, temp) + mult(beta, y); - } - - // Permutation - vector x_perm(x), y_perm(y), ref_perm(ref), out_perm(ref), x_perm_row_major(x), y_perm_row_major(y), ref_perm_row_major(ref); - for (int j = 0; j < mu; j++) { - if (op == 'T' || op == 'C') { - global_to_root_cluster(target_root_cluster, x.data() + ni * j, x_perm.data() + ni * j); - global_to_root_cluster(source_root_cluster, y.data() + no * j, y_perm.data() + no * j); - global_to_root_cluster(source_root_cluster, ref.data() + no * j, ref_perm.data() + no * j); - } else { - global_to_root_cluster(source_root_cluster, x.data() + ni * j, x_perm.data() + ni * j); - global_to_root_cluster(target_root_cluster, y.data() + no * j, y_perm.data() + no * j); - global_to_root_cluster(target_root_cluster, ref.data() + no * j, ref_perm.data() + no * j); - } - } - - for (int i = 0; i < ni; i++) { - for (int j = 0; j < mu; j++) { - x_perm_row_major[i * mu + j] = x_perm[i + j * ni]; - } - } - - for (int i = 0; i < no; i++) { - for (int j = 0; j < mu; j++) { - y_perm_row_major[i * mu + j] = y_perm[i + j * no]; - ref_perm_row_major[i * mu + j] = ref_perm[i + j * no]; - } - } - - // Tests for fixed rank - if (mu == 1) { - out_perm = y_perm; - Fixed_approximation.add_vector_product(op, alpha, x_perm.data(), beta, out_perm.data()); - error = norm2(ref_perm - out_perm) / norm2(ref_perm); - is_error = is_error || !(error < Fixed_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix vector product with fixed approximation: " << error << endl; - } - - out_perm = y_perm; - Fixed_approximation.add_matrix_product(op, alpha, x_perm.data(), beta, out_perm.data(), mu); - error = norm2(ref_perm - out_perm) / norm2(ref_perm); - is_error = is_error || !(error < Fixed_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix matrix product with fixed approximation: " << error << endl; - - out_perm = y_perm_row_major; - Fixed_approximation.add_matrix_product_row_major(op, alpha, x_perm_row_major.data(), beta, out_perm.data(), mu); - error = norm2(ref_perm_row_major - out_perm) / norm2(ref_perm_row_major); - is_error = is_error || !(error < Fixed_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix matrix product with fixed approximation and row major input: " << error << endl; - - // Tests for automatic rank - if (mu == 1) { - out_perm = y_perm; - Auto_approximation.add_vector_product(op, alpha, x_perm.data(), beta, out_perm.data()); - error = norm2(ref_perm - out_perm) / norm2(ref_perm); - is_error = is_error || !(error < Auto_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix vector product with automatic approximation: " << error << endl; - } - - out_perm = y_perm; - Auto_approximation.add_matrix_product(op, alpha, x_perm.data(), beta, out_perm.data(), mu); - error = norm2(ref_perm - out_perm) / norm2(ref_perm); - is_error = is_error || !(error < Auto_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix matrix product with automatic approximation: " << error << endl; - - out_perm = y_perm_row_major; - Auto_approximation.add_matrix_product_row_major(op, alpha, x_perm_row_major.data(), beta, out_perm.data(), mu); - error = norm2(ref_perm_row_major - out_perm) / norm2(ref_perm_row_major); - is_error = is_error || !(error < Fixed_approximation.get_epsilon() * (1 + margin)); - cout << "> Errors on a matrix matrix product with fixed approximation and row major input: " << error << endl; - - cout << "test : " << is_error << endl - << endl; + TestCase test_case(transa, transb, n1, n2, n3, distance[idist], distance[idist] + 10, 'N', 'N', 'N'); + is_error = is_error || test_lrmat_lrmat_product(test_case, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances[0]); + is_error = is_error || test_lrmat_matrix_product(test_case, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances[1]); + is_error = is_error || test_matrix_lrmat_product(test_case, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances[2]); + is_error = is_error || test_matrix_matrix_product(test_case, epsilon, additional_compression_tolerance, additional_lrmat_sum_tolerances[3]); } + return is_error; } diff --git a/tests/functional_tests/hmatrix/lrmat/test_matrix_lrmat_product.hpp b/tests/functional_tests/hmatrix/lrmat/test_matrix_lrmat_product.hpp new file mode 100644 index 00000000..d558bf02 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/test_matrix_lrmat_product.hpp @@ -0,0 +1,96 @@ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_matrix_lrmat_product(const TestCase &test_case, htool::underlying_type epsilon, htool::underlying_type additional_compression_tolerance, htool::underlying_type additional_lrmat_sum_tolerance) { + bool is_error = false; + char transa = test_case.transa; + + // ACA automatic building + Compressor compressor; + LowRankMatrix B_auto_approximation(*test_case.operator_B, compressor, *test_case.root_cluster_B_output, *test_case.root_cluster_B_input, -1, epsilon); + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, -1, epsilon); + + // partialACA fixed rank + int reqrank_max = 10; + LowRankMatrix B_fixed_approximation(*test_case.operator_B, compressor, *test_case.root_cluster_B_output, *test_case.root_cluster_B_input, std::max(B_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix C_fixed_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, std::max(C_auto_approximation.rank_of(), reqrank_max), epsilon); + LowRankMatrix lrmat_test(epsilon); + + // Random input matrix + T alpha, beta; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + + // Reference matrix + Matrix A_dense(test_case.no_A, test_case.ni_A), B_dense(test_case.no_B, test_case.ni_B), C_dense(test_case.no_C, test_case.ni_C); + test_case.operator_A->copy_submatrix(test_case.no_A, test_case.ni_A, 0, 0, A_dense.data()); + test_case.operator_B->copy_submatrix(test_case.no_B, test_case.ni_B, 0, 0, B_dense.data()); + test_case.operator_C->copy_submatrix(test_case.no_C, test_case.ni_C, 0, 0, C_dense.data()); + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), matrix_test, dense_lrmat_test, matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_lrmat_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + + // Products with fixed approximation + matrix_test = C_dense; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_fixed_approximation, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix lrmat product to matrix with fixed approximation: " << error << endl; + + lrmat_test = C_fixed_approximation; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_fixed_approximation, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix lrmat product to lrmat with fixed approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_fixed_approximation; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_fixed_approximation, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a matrix lrmat product to lrmat with fixed approximation and with lrmat sum: " << error << endl; + + // Products with auto approximation + matrix_test = C_dense; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix lrmat product to matrix with automatic approximation: " << error << endl; + + lrmat_test = C_auto_approximation; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix lrmat product to lrmat with automatic approximation and without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a matrix lrmat product to lrmat with automatic approximation and with lrmat sum: " << error << endl; + + cout << "test : " << is_error << endl + << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/lrmat/test_matrix_matrix_product.hpp b/tests/functional_tests/hmatrix/lrmat/test_matrix_matrix_product.hpp new file mode 100644 index 00000000..ad2ce592 --- /dev/null +++ b/tests/functional_tests/hmatrix/lrmat/test_matrix_matrix_product.hpp @@ -0,0 +1,63 @@ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_matrix_matrix_product(const TestCase &test_case, htool::underlying_type epsilon, htool::underlying_type additional_compression_tolerance, htool::underlying_type additional_lrmat_sum_tolerance) { + bool is_error = false; + + char transa = test_case.transa; + + // ACA automatic building + Compressor compressor; + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *test_case.root_cluster_C_output, *test_case.root_cluster_C_input, -1, epsilon), lrmat_test(epsilon); + + // Dense Matrices + Matrix matrix_test, dense_lrmat_test; + + // Random Input + htool::underlying_type error; + T alpha(1), beta(1); + generate_random_scalar(alpha); + generate_random_scalar(beta); + + // Reference matrix + Matrix A_dense(test_case.no_A, test_case.ni_A), B_dense(test_case.no_B, test_case.ni_B), C_dense(test_case.no_C, test_case.ni_C); + test_case.operator_A->copy_submatrix(test_case.no_A, test_case.ni_A, 0, 0, A_dense.data()); + test_case.operator_B->copy_submatrix(test_case.no_B, test_case.ni_B, 0, 0, B_dense.data()); + test_case.operator_C->copy_submatrix(test_case.no_C, test_case.ni_C, 0, 0, C_dense.data()); + Matrix matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_lrmat_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + + // Product + lrmat_test = C_auto_approximation; + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix matrix product to lrmat without sum: " << error << " vs " << epsilon << endl; + + lrmat_test = C_auto_approximation; + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance)); + cout << "> Errors on a matrix matrix product to lrmat with sum: " << error << " vs " << epsilon * (1 + additional_compression_tolerance + additional_lrmat_sum_tolerance) << endl; + + cout << "test : " << is_error << endl + << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/test_hmatrix_build.hpp b/tests/functional_tests/hmatrix/test_hmatrix_build.hpp index 3471dc76..7a26d967 100644 --- a/tests/functional_tests/hmatrix/test_hmatrix_build.hpp +++ b/tests/functional_tests/hmatrix/test_hmatrix_build.hpp @@ -91,9 +91,9 @@ bool test_hmatrix_build(int nr, int nc, bool use_local_cluster, char Symmetry, c std::unique_ptr>> hmatrix_tree_builder; if (use_local_cluster) { - hmatrix_tree_builder = std::make_unique>>(target_root_cluster->get_cluster_on_partition(rankWorld), source_root_cluster->get_cluster_on_partition(rankWorld), epsilon, eta, Symmetry, UPLO, -1, -1); + hmatrix_tree_builder = std::make_unique>>(target_root_cluster->get_cluster_on_partition(rankWorld), source_root_cluster->get_cluster_on_partition(rankWorld), epsilon, eta, Symmetry, UPLO, -1, -1, -1); } else { - hmatrix_tree_builder = std::make_unique>>(*target_root_cluster, *source_root_cluster, epsilon, eta, Symmetry, UPLO, -1, rankWorld); + hmatrix_tree_builder = std::make_unique>>(*target_root_cluster, *source_root_cluster, epsilon, eta, Symmetry, UPLO, -1, rankWorld, rankWorld); } std::shared_ptr> dense_blocks_generator; diff --git a/tests/functional_tests/hmatrix/test_hmatrix_hmatrix_product.hpp b/tests/functional_tests/hmatrix/test_hmatrix_hmatrix_product.hpp new file mode 100644 index 00000000..ba217c9e --- /dev/null +++ b/tests/functional_tests/hmatrix/test_hmatrix_hmatrix_product.hpp @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_hmatrix_hmatrix_product(const TestCase &test_case, bool use_local_cluster, htool::underlying_type epsilon, htool::underlying_type margin) { + // Logger::get_instance().set_current_log_level(LogLevel::INFO); + + int rankWorld; + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + bool is_error = false; + double eta = 10; + char side = test_case.side; + char Symmetry = test_case.symmetry; + char UPLO = test_case.UPLO; + char transa = test_case.transa; + + char left_hmatrix_symmetry = side == 'L' ? Symmetry : 'N'; + char left_hmatrix_UPLO = side == 'L' ? UPLO : 'N'; + char right_hmatrix_symmetry = side == 'R' ? Symmetry : 'N'; + char right_hmatrix_UPLO = side == 'R' ? UPLO : 'N'; + + const Cluster> *root_cluster_A_output, *root_cluster_A_input, *root_cluster_B_output, *root_cluster_B_input, *root_cluster_C_output, *root_cluster_C_input; + if (use_local_cluster) { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = &test_case.root_cluster_B_output->get_cluster_on_partition(rankWorld); + root_cluster_B_input = &test_case.root_cluster_B_input->get_cluster_on_partition(rankWorld); + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = &test_case.root_cluster_C_input->get_cluster_on_partition(rankWorld); + } else { + if (transa == 'N') { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = test_case.root_cluster_A_input; + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } else { + root_cluster_A_output = test_case.root_cluster_A_output; + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } + } + + HMatrixTreeBuilder> hmatrix_tree_builder_A(*root_cluster_A_output, *root_cluster_A_input, epsilon, eta, left_hmatrix_symmetry, left_hmatrix_UPLO, -1, -1, rankWorld); + HMatrixTreeBuilder> hmatrix_tree_builder_B(*root_cluster_B_output, *root_cluster_B_input, epsilon, eta, right_hmatrix_symmetry, right_hmatrix_UPLO, -1, -1, rankWorld); + HMatrixTreeBuilder> hmatrix_tree_builder_C(*root_cluster_C_output, *root_cluster_C_input, epsilon, eta, 'N', 'N', -1, -1, rankWorld); + hmatrix_tree_builder_C.set_minimal_source_depth(2); + + // build + HMatrix> A = hmatrix_tree_builder_A.build(*test_case.operator_A); + HMatrix> B = hmatrix_tree_builder_B.build(*test_case.operator_B); + HMatrix> C = hmatrix_tree_builder_C.build(*test_case.operator_C); + HMatrix> hmatrix_test(C); + save_leaves_with_rank(A, "A_leaves_" + std::to_string(rankWorld)); + save_leaves_with_rank(B, "B_leaves_" + std::to_string(rankWorld)); + save_leaves_with_rank(C, "C_leaves_" + std::to_string(rankWorld)); + + // Dense matrices + int ni_A = root_cluster_A_input->get_size(); + int no_A = root_cluster_A_output->get_size(); + int ni_B = root_cluster_B_input->get_size(); + int no_B = root_cluster_B_output->get_size(); + int ni_C = root_cluster_C_input->get_size(); + int no_C = root_cluster_C_output->get_size(); + + Matrix A_dense(no_A, ni_A), B_dense(no_B, ni_B), C_dense(no_C, ni_C), matrix_test; + test_case.operator_A->copy_submatrix(no_A, ni_A, root_cluster_A_output->get_offset(), root_cluster_A_input->get_offset(), A_dense.data()); + test_case.operator_B->copy_submatrix(no_B, ni_B, root_cluster_B_output->get_offset(), root_cluster_B_input->get_offset(), B_dense.data()); + test_case.operator_C->copy_submatrix(no_C, ni_C, root_cluster_C_output->get_offset(), root_cluster_C_input->get_offset(), C_dense.data()); + + Matrix HA_dense(A.get_target_cluster().get_size(), A.get_source_cluster().get_size()); + Matrix HB_dense(B.get_target_cluster().get_size(), B.get_source_cluster().get_size()); + Matrix HC_dense(C.get_target_cluster().get_size(), C.get_source_cluster().get_size()); + copy_to_dense(A, HA_dense.data()); + copy_to_dense(B, HB_dense.data()); + copy_to_dense(C, HC_dense.data()); + + // lrmat + SVD compressor; + htool::underlying_type lrmat_tol = 1e-6; + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *root_cluster_C_output, *root_cluster_C_input, -1, lrmat_tol), lrmat_test(lrmat_tol); + + // Random Input + T alpha(1), beta(0), scaling_coefficient; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // References + Matrix matrix_result_w_matrix_sum(C_dense), densified_hmatrix_test(C_dense), matrix_result_w_lrmat_sum(C_dense), matrix_result_wo_sum(C_dense), dense_lrmat_test(C_dense), test(HC_dense); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + add_matrix_matrix_product(transa, 'N', alpha, HA_dense, HB_dense, beta, test); + + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_lrmat_sum); + + // Product + lrmat_test = C_auto_approximation; + if (A.get_symmetry() == 'N') { + add_hmatrix_hmatrix_product(transa, 'N', alpha, A, B, beta, lrmat_test); + } else { + add_hmatrix_hmatrix_product_symmetry('L', transa, 'N', alpha, A, B, beta, lrmat_test, A.get_UPLO(), A.get_symmetry()); + } + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tol) * margin); + cout << "> Errors on a hmatrix hmatrix product to lrmat: " << error << endl; + + matrix_test = C_dense; + add_hmatrix_hmatrix_product(transa, 'N', alpha, A, B, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tol) * margin); + cout << "> Errors on a hmatrix hmatrix product to matrix: " << error << endl; + + hmatrix_test = C; + if (A.get_symmetry() == 'N') { + add_hmatrix_hmatrix_product(transa, 'N', alpha, A, B, beta, hmatrix_test); + } else { + add_hmatrix_hmatrix_product_symmetry('L', transa, 'N', alpha, A, B, beta, hmatrix_test, A.get_UPLO(), A.get_symmetry()); + } + copy_to_dense(hmatrix_test, densified_hmatrix_test.data()); + error = normFrob(matrix_result_w_matrix_sum - densified_hmatrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon * margin); + cout << "> Errors on a hmatrix hmatrix product to hmatrix: " << error << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/test_hmatrix_lrmat_product.hpp b/tests/functional_tests/hmatrix/test_hmatrix_lrmat_product.hpp new file mode 100644 index 00000000..d94d5b64 --- /dev/null +++ b/tests/functional_tests/hmatrix/test_hmatrix_lrmat_product.hpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_hmatrix_lrmat_product(const TestCase &test_case, bool use_local_cluster, htool::underlying_type epsilon) { + + int rankWorld; + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + bool is_error = false; + double eta = 10; + char side = test_case.side; + char Symmetry = test_case.symmetry; + char UPLO = test_case.UPLO; + char transa = test_case.transa; + + char hmatrix_symmetry = side == 'L' ? Symmetry : 'N'; + char hmatrix_UPLO = side == 'L' ? UPLO : 'N'; + + const Cluster> *root_cluster_A_output, *root_cluster_A_input, *root_cluster_B_output, *root_cluster_B_input, *root_cluster_C_output, *root_cluster_C_input; + if (use_local_cluster) { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = &test_case.root_cluster_B_output->get_cluster_on_partition(rankWorld); + root_cluster_B_input = &test_case.root_cluster_B_input->get_cluster_on_partition(rankWorld); + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = &test_case.root_cluster_C_input->get_cluster_on_partition(rankWorld); + } else { + if (transa == 'N') { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = test_case.root_cluster_A_input; + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } else { + root_cluster_A_output = test_case.root_cluster_A_output; + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } + } + + HMatrixTreeBuilder> hmatrix_tree_builder(*root_cluster_A_output, *root_cluster_A_input, epsilon, eta, hmatrix_symmetry, hmatrix_UPLO, -1, -1, rankWorld); + hmatrix_tree_builder.set_low_rank_generator(std::make_shared>()); + + // build + HMatrix> root_hmatrix = hmatrix_tree_builder.build(*test_case.operator_A); + + // Dense matrix + int ni_A = root_cluster_A_input->get_size(); + int no_A = root_cluster_A_output->get_size(); + int ni_B = root_cluster_B_input->get_size(); + int no_B = root_cluster_B_output->get_size(); + int ni_C = root_cluster_C_input->get_size(); + int no_C = root_cluster_C_output->get_size(); + + Matrix A_dense(no_A, ni_A), B_dense(no_B, ni_B), C_dense(no_C, ni_C); + test_case.operator_A->copy_submatrix(no_A, ni_A, root_cluster_A_output->get_offset(), root_cluster_A_input->get_offset(), A_dense.data()); + test_case.operator_B->copy_submatrix(no_B, ni_B, root_cluster_B_output->get_offset(), root_cluster_B_input->get_offset(), B_dense.data()); + test_case.operator_C->copy_submatrix(no_C, ni_C, root_cluster_C_output->get_offset(), root_cluster_C_input->get_offset(), C_dense.data()); + Matrix matrix_test, dense_lrmat_test; + + // lrmat + SVD compressor; + htool::underlying_type lrmat_tolerance = 1e-6; + LowRankMatrix B_auto_approximation(*test_case.operator_B, compressor, *root_cluster_B_output, *root_cluster_B_input, -1, lrmat_tolerance); + LowRankMatrix lrmat_test(epsilon); + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *root_cluster_C_output, *root_cluster_C_input, -1, lrmat_tolerance); + + // Random Input matrix + T alpha(1), beta(1), scaling_coefficient; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // References + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, beta, matrix_result_w_matrix_sum); + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, beta, matrix_result_w_lrmat_sum); + add_matrix_lrmat_product(transa, 'N', alpha, A_dense, B_auto_approximation, T(0), matrix_result_wo_sum); + + // Product + matrix_test = C_dense; + add_hmatrix_lrmat_product(transa, 'N', alpha, root_hmatrix, B_auto_approximation, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a hmatrix lrmat product to matrix: " << error << endl; + + lrmat_test = C_auto_approximation; + add_hmatrix_lrmat_product(transa, 'N', alpha, root_hmatrix, B_auto_approximation, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a lrmat hmatrix product to lrmat without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_hmatrix_lrmat_product(transa, 'N', alpha, root_hmatrix, B_auto_approximation, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance)); + cout << "> Errors on a lrmat hmatrix product to lrmat with lrmat sum: " << error << endl; + cout << "> is_error: " << is_error << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/test_hmatrix_matrix_product.hpp b/tests/functional_tests/hmatrix/test_hmatrix_matrix_product.hpp new file mode 100644 index 00000000..f26f5423 --- /dev/null +++ b/tests/functional_tests/hmatrix/test_hmatrix_matrix_product.hpp @@ -0,0 +1,160 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_hmatrix_matrix_product(const TestCase &test_case, bool use_local_cluster, htool::underlying_type epsilon, htool::underlying_type margin) { + + int rankWorld; + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + bool is_error = false; + double eta = 10; + char side = test_case.side; + char Symmetry = test_case.symmetry; + char UPLO = test_case.UPLO; + char transa = test_case.transa; + + char hmatrix_symmetry = side == 'L' ? Symmetry : 'N'; + char hmatrix_UPLO = side == 'L' ? UPLO : 'N'; + + const Cluster> *root_cluster_A_output, *root_cluster_A_input, *root_cluster_B_output, *root_cluster_B_input, *root_cluster_C_output, *root_cluster_C_input; + if (use_local_cluster) { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = &test_case.root_cluster_B_output->get_cluster_on_partition(rankWorld); + root_cluster_B_input = &test_case.root_cluster_B_input->get_cluster_on_partition(rankWorld); + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = &test_case.root_cluster_C_input->get_cluster_on_partition(rankWorld); + } else { + if (transa == 'N') { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = test_case.root_cluster_A_input; + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } else { + root_cluster_A_output = test_case.root_cluster_A_output; + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } + } + + HMatrixTreeBuilder> hmatrix_tree_builder(*root_cluster_A_output, *root_cluster_A_input, epsilon, eta, hmatrix_symmetry, hmatrix_UPLO, -1, -1, rankWorld); + hmatrix_tree_builder.set_low_rank_generator(std::make_shared>()); + + // build + HMatrix> root_hmatrix = hmatrix_tree_builder.build(*test_case.operator_A); + + // Dense matrix + int ni_A = root_cluster_A_input->get_size(); + int no_A = root_cluster_A_output->get_size(); + int ni_B = root_cluster_B_input->get_size(); + int no_B = root_cluster_B_output->get_size(); + int ni_C = root_cluster_C_input->get_size(); + int no_C = root_cluster_C_output->get_size(); + + Matrix A_dense(no_A, ni_A), B_dense(no_B, ni_B), C_dense(no_C, ni_C); + test_case.operator_A->copy_submatrix(no_A, ni_A, root_cluster_A_output->get_offset(), root_cluster_A_input->get_offset(), A_dense.data()); + test_case.operator_B->copy_submatrix(no_B, ni_B, root_cluster_B_output->get_offset(), root_cluster_B_input->get_offset(), B_dense.data()); + test_case.operator_C->copy_submatrix(no_C, ni_C, root_cluster_C_output->get_offset(), root_cluster_C_input->get_offset(), C_dense.data()); + Matrix matrix_test, dense_lrmat_test, transposed_B_dense, transposed_C_dense; + transpose(B_dense, transposed_B_dense); + transpose(C_dense, transposed_C_dense); + + Matrix HA_dense(root_hmatrix.get_target_cluster().get_size(), root_hmatrix.get_source_cluster().get_size()); + copy_to_dense(root_hmatrix, HA_dense.data()); + + // lrmat + SVD compressor; + htool::underlying_type lrmat_tolerance = 0.0001; + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *root_cluster_C_output, *root_cluster_C_input, -1, lrmat_tolerance), lrmat_test(lrmat_tolerance); + + // Random Input matrix + std::vector B_vec, C_vec, test_vec; + B_vec = B_dense.get_col(0); + C_vec = C_dense.get_col(0); + T alpha(0), beta(0), scaling_coefficient; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // References + Matrix matrix_result_w_matrix_sum(C_dense), transposed_matrix_result_w_matrix_sum, matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + transpose(matrix_result_w_matrix_sum, transposed_matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + add_matrix_matrix_product(transa, 'N', alpha, HA_dense, B_dense, beta, matrix_result_w_lrmat_sum); + + Matrix scaled_matrix_result_w_matrix_sum(matrix_result_w_matrix_sum); + scale(scaling_coefficient, scaled_matrix_result_w_matrix_sum); + + // Product + test_vec = C_vec; + add_hmatrix_vector_product(transa, alpha, root_hmatrix, B_vec.data(), beta, test_vec.data()); + error = norm2(matrix_result_w_matrix_sum.get_col(0) - test_vec) / norm2(matrix_result_w_matrix_sum.get_col(0)); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a hmatrix vector product: " << error << endl; + + matrix_test = C_dense; + add_hmatrix_matrix_product(transa, 'N', alpha, root_hmatrix, B_dense, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a hmatrix matrix product: " << error << endl; + + matrix_test = transposed_C_dense; + openmp_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, root_hmatrix, transposed_B_dense.data(), beta, matrix_test.data(), C_dense.nb_cols()); + error = normFrob(transposed_matrix_result_w_matrix_sum - matrix_test) / normFrob(transposed_matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a openmp hmatrix matrix product with row major input: " << error << endl; + + matrix_test = transposed_C_dense; + sequential_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, root_hmatrix, transposed_B_dense.data(), beta, matrix_test.data(), C_dense.nb_cols()); + error = normFrob(transposed_matrix_result_w_matrix_sum - matrix_test) / normFrob(transposed_matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a sequential hmatrix matrix product with row major input: " << error << endl; + + // Product + lrmat_test = C_auto_approximation; + add_hmatrix_matrix_product(transa, 'N', alpha, root_hmatrix, B_dense, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance)); + cout << "> Errors on a hmatrix matrix product to lrmat without sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_hmatrix_matrix_product(transa, 'N', alpha, root_hmatrix, B_dense, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance) * margin); + cout << "> Errors on a hmatrix matrix product to lrmat with sum: " << error << endl; + + matrix_test = C_dense; + scale(scaling_coefficient, root_hmatrix); + add_hmatrix_matrix_product(transa, 'N', alpha, root_hmatrix, B_dense, scaling_coefficient * beta, matrix_test); + error = normFrob(scaled_matrix_result_w_matrix_sum - matrix_test) / normFrob(scaled_matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a scaled hmatrix matrix product: " << error << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/test_hmatrix_product.hpp b/tests/functional_tests/hmatrix/test_hmatrix_product.hpp index a92d585e..b3cbc284 100644 --- a/tests/functional_tests/hmatrix/test_hmatrix_product.hpp +++ b/tests/functional_tests/hmatrix/test_hmatrix_product.hpp @@ -1,7 +1,14 @@ +#include "test_hmatrix_hmatrix_product.hpp" +#include "test_hmatrix_lrmat_product.hpp" +#include "test_hmatrix_matrix_product.hpp" +#include "test_lrmat_hmatrix_product.hpp" +#include "test_matrix_hmatrix_product.hpp" #include #include #include +#include #include +#include #include #include #include @@ -11,8 +18,7 @@ using namespace std; using namespace htool; template -bool test_hmatrix_product(int nr, int nc, int mu, bool use_local_cluster, char op, char Symmetry, char UPLO, htool::underlying_type epsilon) { - +bool test_hmatrix_product(char transa, char transb, int n1, int n2, int n3, char side, char Symmetry, char UPLO, bool use_local_cluster, htool::underlying_type epsilon, htool::underlying_type margin) { // Get the number of processes int sizeWorld; MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); @@ -21,137 +27,15 @@ bool test_hmatrix_product(int nr, int nc, int mu, bool use_local_cluster, char o int rankWorld; MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); - srand(1); bool is_error = false; + TestCase test_case(transa, transb, n1, n2, n3, 1, 2, side, Symmetry, UPLO, sizeWorld); - // Geometry - double z1 = 1; - vector p1(3 * nr), p1_permuted, off_diagonal_p1; - vector p2(Symmetry == 'N' ? 3 * nc : 1), p2_permuted, off_diagonal_p2; - create_disk(3, z1, nr, p1.data()); - - // Partition - std::vector partition{}; - test_partition(3, nr, p1, sizeWorld, partition); - - // Clustering - ClusterTreeBuilder> recursive_build_strategy; - // target_recursive_build_strategy.set_minclustersize(2); - - std::shared_ptr>> source_root_cluster; - std::shared_ptr>> target_root_cluster = make_shared>>(recursive_build_strategy.create_cluster_tree(nr, 3, p1.data(), 2, sizeWorld, partition.data())); - - if (Symmetry == 'N' && nr != nc) { - // Geometry - double z2 = 1 + 0.1; - create_disk(3, z2, nc, p2.data()); - - // partition - test_partition(3, nc, p2, sizeWorld, partition); - - // Clustering - // source_recursive_build_strategy.set_minclustersize(2); - - source_root_cluster = make_shared>>(recursive_build_strategy.create_cluster_tree(nc, 3, p2.data(), 2, sizeWorld, partition.data())); - } else { - source_root_cluster = target_root_cluster; - p2 = p1; - } - - // Permutation on geometry - p1_permuted.resize(3 * nr); - const auto &target_permutation = target_root_cluster->get_permutation(); - for (int i = 0; i < target_permutation.size(); i++) { - p1_permuted[i * 3 + 0] = p1[target_permutation[i] * 3 + 0]; - p1_permuted[i * 3 + 1] = p1[target_permutation[i] * 3 + 1]; - p1_permuted[i * 3 + 2] = p1[target_permutation[i] * 3 + 2]; - } - p2_permuted.resize(3 * nc); - if (Symmetry == 'N' && nr != nc) { - const auto &source_permutation = source_root_cluster->get_permutation(); - for (int i = 0; i < source_permutation.size(); i++) { - p2_permuted[i * 3 + 0] = p2[source_permutation[i] * 3 + 0]; - p2_permuted[i * 3 + 1] = p2[source_permutation[i] * 3 + 1]; - p2_permuted[i * 3 + 2] = p2[source_permutation[i] * 3 + 2]; - } - } else { - p2_permuted = p1_permuted; - } - - // Generator - GeneratorTestType generator(3, nr, nc, p1_permuted, p2_permuted, *target_root_cluster, *source_root_cluster, false, false); - - // HMatrix - double eta = 10; - - std::unique_ptr>> hmatrix_tree_builder; - if (use_local_cluster) { - hmatrix_tree_builder = std::make_unique>>(target_root_cluster->get_cluster_on_partition(rankWorld), source_root_cluster->get_cluster_on_partition(rankWorld), epsilon, eta, Symmetry, UPLO, -1, -1); - } else { - hmatrix_tree_builder = std::make_unique>>(*target_root_cluster, *source_root_cluster, epsilon, eta, Symmetry, UPLO, -1, rankWorld); - } - - // build - HMatrix> root_hmatrix = hmatrix_tree_builder->build(generator); - save_leaves_with_rank(root_hmatrix, "leaves_" + htool::NbrToStr(rankWorld)); - // Dense matrix - const auto &hmatrix_target_cluster = root_hmatrix.get_target_cluster(); - const auto &hmatrix_source_cluster = root_hmatrix.get_source_cluster(); - std::vector dense_data(hmatrix_target_cluster.get_size() * hmatrix_source_cluster.get_size()); - Matrix dense_matrix; - dense_matrix.assign(hmatrix_target_cluster.get_size(), hmatrix_source_cluster.get_size(), dense_data.data(), false); - - generator.copy_submatrix(hmatrix_target_cluster.get_size(), hmatrix_source_cluster.get_size(), hmatrix_target_cluster.get_offset(), hmatrix_source_cluster.get_offset(), dense_data.data()); - - // Input - int ni = (op == 'T' || op == 'C') ? hmatrix_target_cluster.get_size() : hmatrix_source_cluster.get_size(); - int no = (op == 'T' || op == 'C') ? hmatrix_source_cluster.get_size() : hmatrix_target_cluster.get_size(); - vector x(ni * mu, 1), y(no * mu, 1), ref(no * mu, 0), out(ref); - T alpha(1), beta(1); - htool::underlying_type error; - generate_random_vector(x); - generate_random_vector(y); - generate_random_scalar(alpha); - generate_random_scalar(beta); - - ref = y; - dense_matrix.add_matrix_product(op, alpha, x.data(), beta, ref.data(), mu); - - // Row major inputs - vector x_row_major(ni * mu, 1), y_row_major(no * mu, 1), ref_row_major(no * mu, 0); - for (int i = 0; i < ni; i++) { - for (int j = 0; j < mu; j++) { - x_row_major[i * mu + j] = x[i + j * ni]; - } + is_error = test_hmatrix_matrix_product(test_case, use_local_cluster, epsilon, margin); + is_error = test_matrix_hmatrix_product(test_case, use_local_cluster, epsilon, margin); + is_error = test_hmatrix_lrmat_product(test_case, use_local_cluster, epsilon); + if (!(side == 'L' && Symmetry != 'N')) { + is_error = test_lrmat_hmatrix_product(test_case, use_local_cluster, epsilon); } - - for (int i = 0; i < no; i++) { - for (int j = 0; j < mu; j++) { - y_row_major[i * mu + j] = y[i + j * no]; - ref_row_major[i * mu + j] = ref[i + j * no]; - } - } - - // Product - if (mu == 1) { - out = y; - root_hmatrix.add_vector_product(op, alpha, x.data(), beta, out.data()); - error = norm2(ref - out) / norm2(ref); - is_error = is_error || !(error < epsilon); - cout << "> Errors on a hmatrix vector product: " << error << endl; - } - - out = y_row_major; - root_hmatrix.add_matrix_product_row_major(op, alpha, x_row_major.data(), beta, out.data(), mu); - error = norm2(ref_row_major - out) / norm2(ref_row_major); - is_error = is_error || !(error < epsilon); - cout << "> Errors on a hmatrix matrix product: " << error << endl; - - // vector hmatrix_to_dense(nr * nc); - // hmatrix->copy_to_dense(hmatrix_to_dense.data()); - // Matrix hmatrix_to_matrix; - // hmatrix_to_matrix.assign(nr, nc, hmatrix_to_dense.data(), false); - - // std::cout << normFrob(hmatrix_to_matrix - dense_matrix) << "\n"; + is_error = test_hmatrix_hmatrix_product(test_case, use_local_cluster, epsilon, margin); return is_error; } diff --git a/tests/functional_tests/hmatrix/test_lrmat_hmatrix_product.hpp b/tests/functional_tests/hmatrix/test_lrmat_hmatrix_product.hpp new file mode 100644 index 00000000..4ab8a0e6 --- /dev/null +++ b/tests/functional_tests/hmatrix/test_lrmat_hmatrix_product.hpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_lrmat_hmatrix_product(const TestCase &test_case, bool use_local_cluster, htool::underlying_type epsilon) { + + int rankWorld; + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + bool is_error = false; + double eta = 10; + char side = test_case.side; + char Symmetry = test_case.symmetry; + char UPLO = test_case.UPLO; + char transa = test_case.transa; + + char hmatrix_symmetry = side == 'R' ? Symmetry : 'N'; + char hmatrix_UPLO = side == 'R' ? UPLO : 'N'; + + const Cluster> *root_cluster_A_output, *root_cluster_A_input, *root_cluster_B_output, *root_cluster_B_input, *root_cluster_C_output, *root_cluster_C_input; + if (use_local_cluster) { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = &test_case.root_cluster_B_output->get_cluster_on_partition(rankWorld); + root_cluster_B_input = &test_case.root_cluster_B_input->get_cluster_on_partition(rankWorld); + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = &test_case.root_cluster_C_input->get_cluster_on_partition(rankWorld); + } else { + if (transa == 'N') { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = test_case.root_cluster_A_input; + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } else { + root_cluster_A_output = test_case.root_cluster_A_output; + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } + } + + HMatrixTreeBuilder> hmatrix_tree_builder(*root_cluster_B_output, *root_cluster_B_input, epsilon, eta, hmatrix_symmetry, hmatrix_UPLO, -1, -1, rankWorld); + hmatrix_tree_builder.set_low_rank_generator(std::make_shared>()); + + // build + HMatrix> root_hmatrix = hmatrix_tree_builder.build(*test_case.operator_B); + + // Dense matrix + int ni_A = root_cluster_A_input->get_size(); + int no_A = root_cluster_A_output->get_size(); + int ni_B = root_cluster_B_input->get_size(); + int no_B = root_cluster_B_output->get_size(); + int ni_C = root_cluster_C_input->get_size(); + int no_C = root_cluster_C_output->get_size(); + + Matrix A_dense(no_A, ni_A), B_dense(no_B, ni_B), C_dense(no_C, ni_C); + test_case.operator_A->copy_submatrix(no_A, ni_A, root_cluster_A_output->get_offset(), root_cluster_A_input->get_offset(), A_dense.data()); + test_case.operator_B->copy_submatrix(no_B, ni_B, root_cluster_B_output->get_offset(), root_cluster_B_input->get_offset(), B_dense.data()); + test_case.operator_C->copy_submatrix(no_C, ni_C, root_cluster_C_output->get_offset(), root_cluster_C_input->get_offset(), C_dense.data()); + Matrix matrix_test, dense_lrmat_test; + + // lrmat + SVD compressor; + htool::underlying_type lrmat_tolerance = 1e-6; + // std::unique_ptr> A_auto_approximation, C_auto_approximation; + LowRankMatrix lrmat_test(epsilon); + LowRankMatrix A_auto_approximation(*test_case.operator_A, compressor, *root_cluster_A_output, *root_cluster_A_input, -1, lrmat_tolerance); + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *root_cluster_C_output, *root_cluster_C_input, -1, lrmat_tolerance); + + // Random Input matrix + T alpha(1), beta(1), scaling_coefficient; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // References + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + C_auto_approximation.copy_to_dense(matrix_result_w_lrmat_sum.data()); + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, beta, matrix_result_w_matrix_sum); + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, beta, matrix_result_w_lrmat_sum); + add_lrmat_matrix_product(transa, 'N', alpha, A_auto_approximation, B_dense, T(0), matrix_result_wo_sum); + + // Product + matrix_test = C_dense; + add_lrmat_hmatrix_product(transa, 'N', alpha, A_auto_approximation, root_hmatrix, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a lrmat hmatrix product to matrix: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_hmatrix_product(transa, 'N', alpha, A_auto_approximation, root_hmatrix, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a lrmat hmatrix product to lrmat without lrmat sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_lrmat_hmatrix_product(transa, 'N', alpha, A_auto_approximation, root_hmatrix, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance)); + cout << "> Errors on a lrmat hmatrix product to lrmat with lrmat sum: " << error << endl; + cout << "> is_error: " << is_error << endl; + + return is_error; +} diff --git a/tests/functional_tests/hmatrix/test_matrix_hmatrix_product.hpp b/tests/functional_tests/hmatrix/test_matrix_hmatrix_product.hpp new file mode 100644 index 00000000..3c5aa8a7 --- /dev/null +++ b/tests/functional_tests/hmatrix/test_matrix_hmatrix_product.hpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_matrix_hmatrix_product(const TestCase &test_case, bool use_local_cluster, htool::underlying_type epsilon, htool::underlying_type margin) { + + int rankWorld; + MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); + bool is_error = false; + double eta = 1; + char side = test_case.side; + char Symmetry = test_case.symmetry; + char UPLO = test_case.UPLO; + char transa = test_case.transa; + + char hmatrix_symmetry = side == 'R' ? Symmetry : 'N'; + char hmatrix_UPLO = side == 'R' ? UPLO : 'N'; + + const Cluster> *root_cluster_A_output, *root_cluster_A_input, *root_cluster_B_output, *root_cluster_B_input, *root_cluster_C_output, *root_cluster_C_input; + if (use_local_cluster) { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = &test_case.root_cluster_B_output->get_cluster_on_partition(rankWorld); + root_cluster_B_input = &test_case.root_cluster_B_input->get_cluster_on_partition(rankWorld); + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = &test_case.root_cluster_C_input->get_cluster_on_partition(rankWorld); + } else { + if (transa == 'N') { + root_cluster_A_output = &test_case.root_cluster_A_output->get_cluster_on_partition(rankWorld); + root_cluster_A_input = test_case.root_cluster_A_input; + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } else { + root_cluster_A_output = test_case.root_cluster_A_output; + root_cluster_A_input = &test_case.root_cluster_A_input->get_cluster_on_partition(rankWorld); + root_cluster_B_output = test_case.root_cluster_B_output; + root_cluster_B_input = test_case.root_cluster_B_input; + root_cluster_C_output = &test_case.root_cluster_C_output->get_cluster_on_partition(rankWorld); + root_cluster_C_input = test_case.root_cluster_C_input; + } + } + + HMatrixTreeBuilder> hmatrix_tree_builder(*root_cluster_B_output, *root_cluster_B_input, epsilon, eta, hmatrix_symmetry, hmatrix_UPLO, -1, -1, rankWorld); + hmatrix_tree_builder.set_low_rank_generator(std::make_shared>()); + + // build + HMatrix> root_hmatrix = hmatrix_tree_builder.build(*test_case.operator_B); + + // Dense matrix + int ni_A = root_cluster_A_input->get_size(); + int no_A = root_cluster_A_output->get_size(); + int ni_B = root_cluster_B_input->get_size(); + int no_B = root_cluster_B_output->get_size(); + int ni_C = root_cluster_C_input->get_size(); + int no_C = root_cluster_C_output->get_size(); + + Matrix A_dense(no_A, ni_A), B_dense(no_B, ni_B), C_dense(no_C, ni_C); + test_case.operator_A->copy_submatrix(no_A, ni_A, root_cluster_A_output->get_offset(), root_cluster_A_input->get_offset(), A_dense.data()); + test_case.operator_B->copy_submatrix(no_B, ni_B, root_cluster_B_output->get_offset(), root_cluster_B_input->get_offset(), B_dense.data()); + test_case.operator_C->copy_submatrix(no_C, ni_C, root_cluster_C_output->get_offset(), root_cluster_C_input->get_offset(), C_dense.data()); + Matrix matrix_test, dense_lrmat_test; + + Matrix HB_dense(root_hmatrix.get_target_cluster().get_size(), root_hmatrix.get_source_cluster().get_size()); + copy_to_dense(root_hmatrix, HB_dense.data()); + + // lrmat + SVD compressor; + htool::underlying_type lrmat_tolerance = 0.0001; + LowRankMatrix C_auto_approximation(*test_case.operator_C, compressor, *root_cluster_C_output, *root_cluster_C_input, -1, lrmat_tolerance), lrmat_test(lrmat_tolerance); + + // Random Input matrix + std::vector B_vec, C_vec, test_vec; + B_vec = B_dense.get_col(0); + C_vec = C_dense.get_col(0); + T alpha(1), beta(1), scaling_coefficient; + htool::underlying_type error; + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + + // References + Matrix matrix_result_w_matrix_sum(C_dense), matrix_result_wo_sum(C_dense), matrix_result_w_lrmat_sum(C_dense); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, beta, matrix_result_w_matrix_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, B_dense, T(0), matrix_result_wo_sum); + add_matrix_matrix_product(transa, 'N', alpha, A_dense, HB_dense, beta, matrix_result_w_lrmat_sum); + + // Products + matrix_test = C_dense; + add_matrix_hmatrix_product(transa, 'N', alpha, A_dense, root_hmatrix, beta, matrix_test); + error = normFrob(matrix_result_w_matrix_sum - matrix_test) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < epsilon); + cout << "> Errors on a matrix hmatrix product: " << error << endl; + + lrmat_test = C_auto_approximation; + add_matrix_hmatrix_product(transa, 'N', alpha, A_dense, root_hmatrix, T(0), lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_wo_sum - dense_lrmat_test) / normFrob(matrix_result_wo_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance)); + cout << "> Errors on a matrix hmatrix product to lrmat without sum: " << error << endl; + + lrmat_test = C_auto_approximation; + add_matrix_hmatrix_product(transa, 'N', alpha, A_dense, root_hmatrix, beta, lrmat_test); + dense_lrmat_test.resize(lrmat_test.get_U().nb_rows(), lrmat_test.get_V().nb_cols()); + lrmat_test.copy_to_dense(dense_lrmat_test.data()); + error = normFrob(matrix_result_w_lrmat_sum - dense_lrmat_test) / normFrob(matrix_result_w_lrmat_sum); + is_error = is_error || !(error < std::max(epsilon, lrmat_tolerance) * margin); + cout << "> Errors on a matrix hmatrix product to lrmat with sum: " << error << endl; + + return is_error; +} diff --git a/tests/functional_tests/matrix/CMakeLists.txt b/tests/functional_tests/matrix/CMakeLists.txt new file mode 100644 index 00000000..84eca4a0 --- /dev/null +++ b/tests/functional_tests/matrix/CMakeLists.txt @@ -0,0 +1,13 @@ +add_custom_target(build-tests-matrix-product) +add_dependencies(build-tests build-tests-matrix-product) +add_subdirectory(matrix_product) + +add_executable(Test_matrix test_matrix.cpp) +target_link_libraries(Test_matrix htool) +add_dependencies(build-tests Test_matrix) +add_test(Test_matrix Test_matrix) + +add_executable(Test_matrix_file test_matrix_file.cpp) +target_link_libraries(Test_matrix_file htool) +add_dependencies(build-tests Test_matrix_file) +add_test(Test_matrix_file Test_matrix_file) diff --git a/tests/functional_tests/basic_types/matrix_product/CMakeLists.txt b/tests/functional_tests/matrix/matrix_product/CMakeLists.txt similarity index 85% rename from tests/functional_tests/basic_types/matrix_product/CMakeLists.txt rename to tests/functional_tests/matrix/matrix_product/CMakeLists.txt index 6c14e91a..6770ac4b 100755 --- a/tests/functional_tests/basic_types/matrix_product/CMakeLists.txt +++ b/tests/functional_tests/matrix/matrix_product/CMakeLists.txt @@ -8,6 +8,6 @@ list(APPEND types "complex_double") foreach(type ${types}) add_executable(Test_matrix_product_${type} test_matrix_product_${type}.cpp) target_link_libraries(Test_matrix_product_${type} htool) - add_dependencies(build-tests-basic-types-matrix-product Test_matrix_product_${type}) + add_dependencies(build-tests-matrix-product Test_matrix_product_${type}) add_test(Test_matrix_product_${type} Test_matrix_product_${type}) endforeach() diff --git a/tests/functional_tests/basic_types/matrix_product/test_matrix_product_complex_double.cpp b/tests/functional_tests/matrix/matrix_product/test_matrix_product_complex_double.cpp similarity index 98% rename from tests/functional_tests/basic_types/matrix_product/test_matrix_product_complex_double.cpp rename to tests/functional_tests/matrix/matrix_product/test_matrix_product_complex_double.cpp index 2daaee16..65a50176 100644 --- a/tests/functional_tests/basic_types/matrix_product/test_matrix_product_complex_double.cpp +++ b/tests/functional_tests/matrix/matrix_product/test_matrix_product_complex_double.cpp @@ -1,4 +1,4 @@ -#include "../test_basic_type_matrix_product.hpp" +#include "../test_matrix_product.hpp" using namespace std; using namespace htool; diff --git a/tests/functional_tests/basic_types/matrix_product/test_matrix_product_double.cpp b/tests/functional_tests/matrix/matrix_product/test_matrix_product_double.cpp similarity index 98% rename from tests/functional_tests/basic_types/matrix_product/test_matrix_product_double.cpp rename to tests/functional_tests/matrix/matrix_product/test_matrix_product_double.cpp index 392a6466..c545f596 100644 --- a/tests/functional_tests/basic_types/matrix_product/test_matrix_product_double.cpp +++ b/tests/functional_tests/matrix/matrix_product/test_matrix_product_double.cpp @@ -1,4 +1,4 @@ -#include "../test_basic_type_matrix_product.hpp" +#include "../test_matrix_product.hpp" using namespace std; using namespace htool; diff --git a/tests/functional_tests/basic_types/test_basic_types_matrix.cpp b/tests/functional_tests/matrix/test_matrix.cpp similarity index 99% rename from tests/functional_tests/basic_types/test_basic_types_matrix.cpp rename to tests/functional_tests/matrix/test_matrix.cpp index 41e8efe1..23e7b9f6 100644 --- a/tests/functional_tests/basic_types/test_basic_types_matrix.cpp +++ b/tests/functional_tests/matrix/test_matrix.cpp @@ -1,4 +1,4 @@ -#include "htool/basic_types/matrix.hpp" +#include "htool/matrix/matrix.hpp" using namespace std; using namespace htool; diff --git a/tests/functional_tests/basic_types/test_basic_types_matrix_file.cpp b/tests/functional_tests/matrix/test_matrix_file.cpp similarity index 96% rename from tests/functional_tests/basic_types/test_basic_types_matrix_file.cpp rename to tests/functional_tests/matrix/test_matrix_file.cpp index 4b385928..8c08180b 100644 --- a/tests/functional_tests/basic_types/test_basic_types_matrix_file.cpp +++ b/tests/functional_tests/matrix/test_matrix_file.cpp @@ -1,4 +1,4 @@ -#include "htool/basic_types/matrix.hpp" +#include "htool/matrix/matrix.hpp" using namespace std; using namespace htool; diff --git a/tests/functional_tests/matrix/test_matrix_product.hpp b/tests/functional_tests/matrix/test_matrix_product.hpp new file mode 100644 index 00000000..de638cb0 --- /dev/null +++ b/tests/functional_tests/matrix/test_matrix_product.hpp @@ -0,0 +1,139 @@ +#include +#include +#include + +using namespace std; +using namespace htool; + +template +bool test_matrix_product(int nr, int nc, int mu, char transa, char symmetry, char UPLO) { + + bool is_error = false; + + // Generate random matrix + vector data(nr * nc); + generate_random_vector(data); + Matrix A; + A.assign(nr, nc, data.data(), false); + + if (symmetry == 'S') { + for (int i = 0; i < nr; i++) { + for (int j = 0; j < i; j++) { + A(i, j) = A(j, i); + } + } + } else if (symmetry == 'H') { + for (int i = 0; i < nr; i++) { + for (int j = 0; j < i; j++) { + A(i, j) = conj_if_complex(A(j, i)); + } + A(i, i) = std::real(A(i, i)); + } + } + + // Input sizes + int ni = (transa == 'T' || transa == 'C') ? nr : nc; + int no = (transa == 'T' || transa == 'C') ? nc : nr; + + // Random input matrices + Matrix X(ni, mu), Y(no, mu), Xt(mu, ni), Yt(mu, no), C; + T alpha, beta, scaling_coefficient; + htool::underlying_type error; + generate_random_array(X.data(), X.nb_cols() * X.nb_rows()); + generate_random_array(Y.data(), Y.nb_cols() * Y.nb_rows()); + generate_random_scalar(alpha); + generate_random_scalar(beta); + generate_random_scalar(scaling_coefficient); + transpose(X, Xt); + transpose(Y, Yt); + + // reference + Matrix matrix_result_w_matrix_sum(no, mu), transposed_matrix_result_w_matrix_sum; + if (transa == 'N') { + for (int p = 0; p < mu; p++) { + for (int i = 0; i < no; i++) { + for (int j = 0; j < ni; j++) { + matrix_result_w_matrix_sum(i, p) += alpha * A(i, j) * X(j, p); + } + matrix_result_w_matrix_sum(i, p) += beta * Y(i, p); + } + } + } else if (transa == 'T') { + for (int p = 0; p < mu; p++) { + for (int i = 0; i < no; i++) { + for (int j = 0; j < ni; j++) { + matrix_result_w_matrix_sum(i, p) += alpha * A(j, i) * X(j, p); + } + matrix_result_w_matrix_sum(i, p) += beta * Y(i, p); + } + } + } else if (transa == 'C') { + for (int p = 0; p < mu; p++) { + for (int i = 0; i < no; i++) { + for (int j = 0; j < ni; j++) { + matrix_result_w_matrix_sum(i, p) += alpha * conj_if_complex(A(j, i)) * X(j, p); + } + matrix_result_w_matrix_sum(i, p) += beta * Y(i, p); + } + } + } + transpose(matrix_result_w_matrix_sum, transposed_matrix_result_w_matrix_sum); + Matrix scaled_matrix_result_w_matrix_sum(matrix_result_w_matrix_sum); + scale(scaling_coefficient, scaled_matrix_result_w_matrix_sum); + + // Product + if (mu == 1) { + C = Y; + add_matrix_vector_product(transa, alpha, A, X.data(), beta, C.data()); + error = normFrob(matrix_result_w_matrix_sum - C) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a matrix vector product: " << error << endl; + } + + if (mu == 1 && symmetry != 'N') { + C = Y; + add_matrix_vector_product_symmetric(transa, alpha, A, X.data(), beta, C.data(), UPLO, symmetry); + error = normFrob(matrix_result_w_matrix_sum - C) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a symmetric matrix vector product: " << error << endl; + } + + C = Y; + add_matrix_matrix_product(transa, 'N', alpha, A, X, beta, C); + error = normFrob(matrix_result_w_matrix_sum - C) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a matrix matrix product: " << error << endl; + + C = Yt; + add_matrix_matrix_product_row_major(transa, 'N', alpha, A, Xt.data(), beta, C.data(), mu); + error = normFrob(transposed_matrix_result_w_matrix_sum - C) / normFrob(transposed_matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a matrix matrix product with row major input: " << error << endl; + + if (symmetry != 'N') { + C = Y; + add_matrix_matrix_product_symmetric('L', transa, 'N', alpha, A, X, beta, C, UPLO, symmetry); + error = normFrob(matrix_result_w_matrix_sum - C) / normFrob(matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a symmetric matrix matrix product: " << error << endl; + + C = Yt; + add_matrix_matrix_product_symmetric_row_major('L', transa, 'N', alpha, A, Xt.data(), beta, C.data(), mu, UPLO, symmetry); + error = normFrob(transposed_matrix_result_w_matrix_sum - C) / normFrob(transposed_matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a symmetric matrix matrix product with row major input: " << error << endl; + } + + C = Y; + T local_scaling_coefficient = scaling_coefficient; + if (transa == 'C') { + local_scaling_coefficient = conj_if_complex(scaling_coefficient); + } + scale(local_scaling_coefficient, A); + add_matrix_matrix_product(transa, 'N', alpha, A, X, scaling_coefficient * beta, C); + error = normFrob(scaled_matrix_result_w_matrix_sum - C) / normFrob(scaled_matrix_result_w_matrix_sum); + is_error = is_error || !(error < 1e-14); + cout << "> Errors on a scaled matrix matrix product: " << error << endl; + + return is_error; +} diff --git a/tests/functional_tests/solvers/CMakeLists.txt b/tests/functional_tests/solvers/CMakeLists.txt index 5eafcdd5..ac17472d 100644 --- a/tests/functional_tests/solvers/CMakeLists.txt +++ b/tests/functional_tests/solvers/CMakeLists.txt @@ -23,6 +23,10 @@ target_link_libraries(Test_solver htool) add_dependencies(build-tests-solvers Test_solver) add_test(NAME Test_solver_1 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_solver ${Test_solver_ARGS}) +set_tests_properties(Test_solver_1 PROPERTIES LABELS "mpi") add_test(NAME Test_solver_2 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_solver ${Test_solver_ARGS}) +set_tests_properties(Test_solver_2 PROPERTIES LABELS "mpi") add_test(NAME Test_solver_3 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_solver ${Test_solver_ARGS}) +set_tests_properties(Test_solver_3 PROPERTIES LABELS "mpi") add_test(NAME Test_solver_4 COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/Test_solver ${Test_solver_ARGS}) +set_tests_properties(Test_solver_4 PROPERTIES LABELS "mpi") diff --git a/tests/functional_tests/solvers/test_solver_ddm.hpp b/tests/functional_tests/solvers/test_solver_ddm.hpp index eb18d4c0..33bd909a 100644 --- a/tests/functional_tests/solvers/test_solver_ddm.hpp +++ b/tests/functional_tests/solvers/test_solver_ddm.hpp @@ -113,11 +113,11 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym LocalGeneratorFromMatrix> local_generator(A, local_cluster.get_permutation(), local_cluster.get_permutation(), local_to_global_numbering, local_to_global_numbering); - HMatrixTreeBuilder, double> local_hmatrix_builder(local_cluster, local_cluster, epsilon, eta, symmetric, UPLO, -1, -1); + HMatrixTreeBuilder, double> local_hmatrix_builder(local_cluster, local_cluster, epsilon, eta, symmetric, UPLO, -1, -1, -1); HMatrix, double> local_hmatrix = local_hmatrix_builder.build(local_generator); print_distributed_hmatrix_information(local_hmatrix, std::cout, MPI_COMM_WORLD); - + save_leaves_with_rank(local_hmatrix, "local_hmatrix_" + std::to_string(rank)); // Errors double error2;