From ba9e3aa2a1402787425b9255c2d53a7879909d2a Mon Sep 17 00:00:00 2001 From: nournadar Date: Thu, 9 Nov 2023 22:51:45 +0300 Subject: [PATCH] 1.0.0 --- .gitignore | 662 +++++++++++- .gitmodules | 6 + CMakeLists.txt | 185 ++++ Jenkinsfile | 143 +++ LICENSE | 2 +- README.md | 5 +- clean_build.sh | 38 + cmake/CheckFunctionExists.cmake | 121 +++ cmake/FindBLASEXT.cmake | 382 +++++++ cmake/FindCBLAS.cmake | 466 +++++++++ cmake/FindLAPACK.cmake | 801 +++++++++++++++ cmake/FindLAPACKE.cmake | 389 +++++++ cmake/FindLAPACKEXT.cmake | 352 +++++++ cmake/FindPackageHandleStandardArgs.cmake | 605 +++++++++++ cmake/FindPackageMessage.cmake | 48 + .../FindPkgconfigLibrariesAbsolutePath.cmake | 99 ++ cmake/FindSTARPU.cmake | 952 ++++++++++++++++++ cmake/FindSTARSH.cmake | 424 ++++++++ cmake/ImportCatch2.cmake | 19 + cmake/ImportLAPACKE.cmake | 43 + cmake/ImportSTARPU.cmake | 80 ++ cmake/ImportSTARSH.cmake | 86 ++ cmake/MKLConfig.cmake | 850 ++++++++++++++++ cmake/macros/BuildDependency.cmake | 110 ++ cmake/macros/BuildSTARPU.cmake | 50 + cmake/macros/FindBLASEXT.cmake | 382 +++++++ config.sh | 165 +++ docs/CMakeLists.txt | 4 + docs/config.in | 19 + docs/index.md | 54 + examples/CMakeLists.txt | 34 + examples/compressed_data_generation.cpp | 122 +++ examples/data_generation.cpp | 83 ++ examples/dgemm_example.cpp | 194 ++++ examples/main.cpp | 66 ++ examples/mpi_example.cpp | 462 +++++++++ examples/mpi_generation_gemm_example.cpp | 416 ++++++++ examples/par_fixed_rank_main.cpp | 375 +++++++ examples/potrf_example.cpp | 190 ++++ examples/simple_example.cpp | 97 ++ hicmappConfig.cmake.in | 62 ++ include/hicmapp/api/Hicmapp.hpp | 55 + include/hicmapp/common/definitions.h | 76 ++ .../interface/MatrixOperations.hpp | 65 ++ include/hicmapp/primitives/ProblemManager.hpp | 123 +++ .../concrete/slowest_dimension_decomposer.hpp | 57 ++ .../two_dimension_cyclic_decomposer.hpp | 51 + .../decomposer/matrix_decomposer.hpp | 139 +++ include/hicmapp/primitives/matrix.hpp | 287 ++++++ include/hicmapp/primitives/submatrix.hpp | 168 ++++ .../hicmapp/problem-manager/StarshManager.hpp | 28 + .../runtime/communicator/mpi/Communicator.hpp | 34 + .../communicator/serial/Communicator.hpp | 18 + .../concrete/default/default_runtime.hpp | 89 ++ .../runtime/concrete/starpu/factory.hpp | 69 ++ .../starpu/hicma_runtime_workspace.hpp | 54 + .../runtime/concrete/starpu/hicma_starpu.hpp | 105 ++ .../starpu/starpu-codelets/gemm-codelet.hpp | 31 + .../generate-dgytlr-codelet.hpp | 30 + .../generate-dgytlr-diag-codelet.hpp | 30 + .../starpu-codelets/generate_codelet.hpp | 31 + .../generate_compressed_data_codelet.hpp | 33 + .../starpu/starpu-codelets/lacpy-codelet.hpp | 29 + .../starpu/starpu-codelets/potrf-codelet.hpp | 30 + .../starpu/starpu-codelets/syrk-codelet.hpp | 31 + .../starpu/starpu-codelets/trsm-codelet.hpp | 30 + .../starpu-codelets/uncompress-codelet.hpp | 28 + .../runtime/concrete/starpu/starpu.hpp | 110 ++ .../runtime/interface/HicmaCommunicator.hpp | 11 + .../runtime/interface/HicmaContext.hpp | 88 ++ .../runtime/interface/HicmaHardware.hpp | 22 + .../runtime/interface/RunTimeFactory.hpp | 37 + .../runtime/interface/RunTimeInterface.hpp | 237 +++++ .../runtime/interface/RunTimeSingleton.hpp | 39 + .../runtime/interface/StarpuCodelet.hpp | 28 + .../tile-operations/TileOperations.hpp | 187 ++++ include/hicmapp/utils/MatrixHelpers.hpp | 37 + libs/hcorepp | 1 + libs/stars-h | 1 + package.pc.in | 10 + scripts/benchmark_script.sh | 21 + scripts/multithreading_benchmark_script.sh | 25 + src/CMakeLists.txt | 34 + src/api/CMakeLists.txt | 5 + src/api/Hicmapp.cpp | 77 ++ src/matrix-operations/CMakeLists.txt | 6 + src/matrix-operations/omp/CMakeLists.txt | 5 + .../omp/MatrixOperations.cpp | 811 +++++++++++++++ src/primitives/CMakeLists.txt | 9 + src/primitives/ProblemManager.cpp | 347 +++++++ src/primitives/decomposer/CMakeLists.txt | 6 + .../slowest_dimension_decomposer.cpp | 95 ++ .../two_dimension_cyclic_decomposer.cpp | 64 ++ src/primitives/matrix.cpp | 613 +++++++++++ src/primitives/submatrix.cpp | 416 ++++++++ src/problem-manager/CMakeLists.txt | 5 + src/problem-manager/StarshManager.cpp | 105 ++ src/runtime/CMakeLists.txt | 12 + src/runtime/HicmaContext.cpp | 43 + src/runtime/RunTimeSingleton.cpp | 37 + src/runtime/default/CMakeLists.txt | 5 + src/runtime/default/default_runtime.cpp | 228 +++++ src/runtime/starpu/CMakeLists.txt | 7 + src/runtime/starpu/factory.cpp | 50 + .../starpu/starpu-codelets/CMakeLists.txt | 14 + .../GenerateCompressedDataCodelet.cpp | 60 ++ .../starpu/starpu-codelets/gemm-codelet.cpp | 83 ++ .../starpu-codelets/generate-codelet.cpp | 57 ++ .../generate-dgytlr-codelet.cpp | 87 ++ .../generate-dgytlr-diag-codelet.cpp | 92 ++ .../starpu/starpu-codelets/lacpy-codelet.cpp | 74 ++ .../starpu/starpu-codelets/potrf-codelet.cpp | 73 ++ .../starpu/starpu-codelets/syrk-codelet.cpp | 77 ++ .../starpu/starpu-codelets/trsm-codelet.cpp | 81 ++ .../starpu-codelets/uncompress-codelet.cpp | 92 ++ src/runtime/starpu/starpu.cpp | 841 ++++++++++++++++ src/tile-operations/CMakeLists.txt | 5 + src/tile-operations/TileOperations.cpp | 308 ++++++ src/utils/CMakeLists.txt | 5 + src/utils/MatrixHelpers.cpp | 59 ++ tests/CMakeLists.txt | 11 + tests/README.md | 5 + tests/primitives/CMakeLists.txt | 7 + tests/primitives/TestDecomposer.cpp | 587 +++++++++++ tests/primitives/TestMatrix.cpp | 944 +++++++++++++++++ tests/primitives/TestSubMatrix.cpp | 355 +++++++ tests/test_main.cpp | 33 + 127 files changed, 19135 insertions(+), 13 deletions(-) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 Jenkinsfile create mode 100755 clean_build.sh create mode 100644 cmake/CheckFunctionExists.cmake create mode 100644 cmake/FindBLASEXT.cmake create mode 100644 cmake/FindCBLAS.cmake create mode 100644 cmake/FindLAPACK.cmake create mode 100644 cmake/FindLAPACKE.cmake create mode 100644 cmake/FindLAPACKEXT.cmake create mode 100644 cmake/FindPackageHandleStandardArgs.cmake create mode 100644 cmake/FindPackageMessage.cmake create mode 100644 cmake/FindPkgconfigLibrariesAbsolutePath.cmake create mode 100644 cmake/FindSTARPU.cmake create mode 100644 cmake/FindSTARSH.cmake create mode 100644 cmake/ImportCatch2.cmake create mode 100644 cmake/ImportLAPACKE.cmake create mode 100644 cmake/ImportSTARPU.cmake create mode 100644 cmake/ImportSTARSH.cmake create mode 100644 cmake/MKLConfig.cmake create mode 100644 cmake/macros/BuildDependency.cmake create mode 100644 cmake/macros/BuildSTARPU.cmake create mode 100644 cmake/macros/FindBLASEXT.cmake create mode 100755 config.sh create mode 100644 docs/CMakeLists.txt create mode 100644 docs/config.in create mode 100644 docs/index.md create mode 100644 examples/CMakeLists.txt create mode 100644 examples/compressed_data_generation.cpp create mode 100644 examples/data_generation.cpp create mode 100644 examples/dgemm_example.cpp create mode 100644 examples/main.cpp create mode 100644 examples/mpi_example.cpp create mode 100644 examples/mpi_generation_gemm_example.cpp create mode 100644 examples/par_fixed_rank_main.cpp create mode 100644 examples/potrf_example.cpp create mode 100644 examples/simple_example.cpp create mode 100644 hicmappConfig.cmake.in create mode 100644 include/hicmapp/api/Hicmapp.hpp create mode 100644 include/hicmapp/common/definitions.h create mode 100644 include/hicmapp/matrix-operations/interface/MatrixOperations.hpp create mode 100644 include/hicmapp/primitives/ProblemManager.hpp create mode 100644 include/hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp create mode 100644 include/hicmapp/primitives/decomposer/concrete/two_dimension_cyclic_decomposer.hpp create mode 100644 include/hicmapp/primitives/decomposer/matrix_decomposer.hpp create mode 100644 include/hicmapp/primitives/matrix.hpp create mode 100644 include/hicmapp/primitives/submatrix.hpp create mode 100644 include/hicmapp/problem-manager/StarshManager.hpp create mode 100644 include/hicmapp/runtime/communicator/mpi/Communicator.hpp create mode 100644 include/hicmapp/runtime/communicator/serial/Communicator.hpp create mode 100644 include/hicmapp/runtime/concrete/default/default_runtime.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/factory.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/hicma_runtime_workspace.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/hicma_starpu.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/gemm-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-diag-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_compressed_data_codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/lacpy-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/potrf-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/syrk-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/trsm-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu-codelets/uncompress-codelet.hpp create mode 100644 include/hicmapp/runtime/concrete/starpu/starpu.hpp create mode 100644 include/hicmapp/runtime/interface/HicmaCommunicator.hpp create mode 100644 include/hicmapp/runtime/interface/HicmaContext.hpp create mode 100644 include/hicmapp/runtime/interface/HicmaHardware.hpp create mode 100644 include/hicmapp/runtime/interface/RunTimeFactory.hpp create mode 100644 include/hicmapp/runtime/interface/RunTimeInterface.hpp create mode 100644 include/hicmapp/runtime/interface/RunTimeSingleton.hpp create mode 100644 include/hicmapp/runtime/interface/StarpuCodelet.hpp create mode 100644 include/hicmapp/tile-operations/TileOperations.hpp create mode 100644 include/hicmapp/utils/MatrixHelpers.hpp create mode 160000 libs/hcorepp create mode 160000 libs/stars-h create mode 100644 package.pc.in create mode 100755 scripts/benchmark_script.sh create mode 100755 scripts/multithreading_benchmark_script.sh create mode 100644 src/CMakeLists.txt create mode 100644 src/api/CMakeLists.txt create mode 100644 src/api/Hicmapp.cpp create mode 100644 src/matrix-operations/CMakeLists.txt create mode 100644 src/matrix-operations/omp/CMakeLists.txt create mode 100644 src/matrix-operations/omp/MatrixOperations.cpp create mode 100644 src/primitives/CMakeLists.txt create mode 100644 src/primitives/ProblemManager.cpp create mode 100644 src/primitives/decomposer/CMakeLists.txt create mode 100644 src/primitives/decomposer/slowest_dimension_decomposer.cpp create mode 100644 src/primitives/decomposer/two_dimension_cyclic_decomposer.cpp create mode 100644 src/primitives/matrix.cpp create mode 100644 src/primitives/submatrix.cpp create mode 100644 src/problem-manager/CMakeLists.txt create mode 100644 src/problem-manager/StarshManager.cpp create mode 100644 src/runtime/CMakeLists.txt create mode 100644 src/runtime/HicmaContext.cpp create mode 100644 src/runtime/RunTimeSingleton.cpp create mode 100644 src/runtime/default/CMakeLists.txt create mode 100644 src/runtime/default/default_runtime.cpp create mode 100644 src/runtime/starpu/CMakeLists.txt create mode 100644 src/runtime/starpu/factory.cpp create mode 100644 src/runtime/starpu/starpu-codelets/CMakeLists.txt create mode 100644 src/runtime/starpu/starpu-codelets/GenerateCompressedDataCodelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/gemm-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/generate-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/generate-dgytlr-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/generate-dgytlr-diag-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/lacpy-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/potrf-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/syrk-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/trsm-codelet.cpp create mode 100644 src/runtime/starpu/starpu-codelets/uncompress-codelet.cpp create mode 100644 src/runtime/starpu/starpu.cpp create mode 100644 src/tile-operations/CMakeLists.txt create mode 100644 src/tile-operations/TileOperations.cpp create mode 100644 src/utils/CMakeLists.txt create mode 100644 src/utils/MatrixHelpers.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/README.md create mode 100644 tests/primitives/CMakeLists.txt create mode 100644 tests/primitives/TestDecomposer.cpp create mode 100644 tests/primitives/TestMatrix.cpp create mode 100644 tests/primitives/TestSubMatrix.cpp create mode 100644 tests/test_main.cpp diff --git a/.gitignore b/.gitignore index 259148f..80c4820 100644 --- a/.gitignore +++ b/.gitignore @@ -1,20 +1,69 @@ +# Created by https://www.toptal.com/developers/gitignore/api/clion,intellij,eclipse,sublimetext,cmake,c++,c,cuda,codeblocks,opencv,jetbrains,python,pycharm +# Edit at https://www.toptal.com/developers/gitignore?templates=clion,intellij,eclipse,sublimetext,cmake,c++,c,cuda,codeblocks,opencv,jetbrains,python,pycharm + +### C ### # Prerequisites *.d -# Compiled Object files -*.slo -*.lo +# Object files *.o +*.ko *.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp # Precompiled Headers *.gch *.pch -# Compiled Dynamic libraries +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll *.so +*.so.* *.dylib -*.dll + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +### C++ ### +# Prerequisites + +# Compiled Object files +*.slo + +# Precompiled Headers + +# Compiled Dynamic libraries # Fortran module files *.mod @@ -22,11 +71,602 @@ # Compiled Static libraries *.lai -*.la -*.a -*.lib # Executables -*.exe -*.out -*.app + +### CLion ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### CLion Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +### CMake ### +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + +### CMake Patch ### +# External projects +*-prefix/ + +### CodeBlocks ### +# specific to CodeBlocks IDE +*.layout +*.depend +# generated directories +bin/ +obj/ + +### CUDA ### +*.i +*.ii +*.gpu +*.ptx +*.cubin +*.fatbin + +### Eclipse ### +.metadata +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# PyDev specific (Python IDE for Eclipse) +*.pydevproject + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# Java annotation processor (APT) +.factorypath + +# PDT-specific (PHP Development Tools) +.buildpath + +# sbteclipse plugin +.target + +# Tern plugin +.tern-project + +# TeXlipse plugin +.texlipse + +# STS (Spring Tool Suite) +.springBeans + +# Code Recommenders +.recommenders/ + +# Annotation Processing +.apt_generated/ +.apt_generated_test/ + +# Scala IDE specific (Scala & Java development for Eclipse) +.cache-main +.scala_dependencies +.worksheet + +# Uncomment this line if you wish to ignore the project description file. +# Typically, this file would be tracked if it contains build/dependency configurations: +#.project + +### Eclipse Patch ### +# Spring Boot Tooling +.sts4-cache/ + +### Intellij ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### Intellij Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream + +### JetBrains ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### JetBrains Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream + +### OpenCV ### +#OpenCV for Mac and Linux +#build and release folders +*/CMakeFiles +*/CMakeCache.txt +*/Makefile +*/cmake_install.cmake +.DS_Store + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ +docs/html/ +docs/latex/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +### SublimeText ### +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +# Sesimic Toolbox Results Specifics +results/ +*.trace +*.segy +*.sgy +*.png +*.bin +data/*.segy +data/*.sgy +data/ +.idea/ +*.tar.bz2 +boost**/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..7769118 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "libs/stars-h"] + path = libs/stars-h + url = https://github.com/ecrc/stars-h.git +[submodule "libs/hcorepp"] + path = libs/hcorepp + url = https://github.com/ecrc/hcorepp-dev.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..29c2f46 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,185 @@ +# set minimum cmake version +cmake_minimum_required(VERSION 3.20 FATAL_ERROR) +cmake_policy(SET CMP0048 NEW) +# Project Options. +option(USE_CUDA "Use Cuda, if available" false) +option(HICMAPP_USE_TIMER "Enable Timer" false) +option(HICMAPP_BUILD_TESTS "Option to enable building tests" ON) +option(HICMAPP_BUILD_EXAMPLES "Option to enable building examples" ON) +option(HICMAPP_BUILD_DOCS "Build documentation in docs directory" ON) +option(USE_MPI "Use MPI, if available" OFF) +option(USE_OMP "Use OMP, if available" OFF) + +if (NOT DEFINED HICMAPP_SCHED) + set(HICMAPP_SCHED "Default") +endif() +string(TOLOWER "${HICMAPP_SCHED}" HICMAPP_SCHED_LOWER) + +#enable_language(CXX) +# Add Module Paths(Own cmake modules/hcore++) +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/libs/hcorepp/cmake) +#list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/libs/stars-h/cmake) +set(CMAKE_CXX_FLAGS_RELEASE "-g -fPIC -fno-common -O0 -Wall") +set(CMAKE_CXX_FLAGS_DEBUG "-g -fPIC -fno-common -O0 -Wall") +set(HCOREPP_BUILD_DOCS OFF) # This is to prevent having multiple targets with the + #name docs and to make each library responsible for its own documentation +if(HICMAPP_USE_TIMER) + add_definitions(-DHICMAPP_USE_TIMER) +endif () + +# Toolchain selection +# Check if only one toolchain is set to true at a time +if (USE_CUDA AND (USE_SYCL OR USE_OMP)) + message(FATAL_ERROR "Only one toolchain USE_CUDA, USE_SYCL, and USE_OMP can be set to true at a time.") +elseif (USE_SYCL AND (USE_CUDA OR USE_OMP)) + message(FATAL_ERROR "Only one toolchain USE_CUDA, USE_SYCL, and USE_OMP can be set to true at a time.") +elseif (USE_OMP AND (USE_CUDA OR USE_SYCL)) + message(FATAL_ERROR "Only one toolchain USE_CUDA, USE_SYCL, and USE_OMP can be set to true at a time.") +endif() + +if((${USE_OMP}) AND NOT (${HICMAPP_SCHED_LOWER} EQUAL "default")) + message(FATAL_ERROR "Only one runtime/resource management toolchain can be set to true at a time. STARPU and OMP can't be used simultaneously") +endif() + +if (USE_CUDA) + add_definitions(-DUSE_CUDA=TRUE) + enable_language(CUDA) + include(toolchains/CudaToolchain) + set(BLA_VENDOR NVHPC) + list(APPEND STARPU_COMPONENT_LIST "CUDA") +elseif(USE_SYCL) + add_definitions(-DUSE_SYCL=TRUE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L$ENV{MKLROOT}/lib/intel64 -fsycl -lsycl -lOpenCL -lpthread -lm -ldl") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -qmkl=parallel -DSYCL_DEVICES_cpu -O3 -Wno-return-type-c-linkage -fsycl-unnamed-lambda -qopenmp") + add_definitions(-DMKL_ILP64) + add_definitions(-DLAPACK_ILP64) + add_definitions(-DLAPACK_COMPLEX_CPP) + set(CMAKE_C_COMPILER icx) + set(CMAKE_CXX_COMPILER icpx) + set(CMAKE_CXX_COMPILER_ID IntelLLVM) +elseif(USE_OMP) + add_definitions(-DUSE_OMP=TRUE) +else () + include(toolchains/GccToolchain) +endif () + +# Add HCorePP library +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/hcorepp) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libs/hcorepp/include) +list(APPEND LIBS hcorepp) + + +# Project settings. +project(hicmapp VERSION 1.0.0 DESCRIPTION "HICMA C++ implementation") +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/libs/hcorepp/cmake) + +if (HICMAPP_USE_MPI) + add_definitions(-DSTARPU_USE_MPI) + add_definitions(-DHICMAPP_USE_MPI=TRUE) + find_package(MPI REQUIRED) + list(APPEND STARPU_COMPONENT_LIST "MPI") +endif () + +set(USE_MPI ${HICMAPP_USE_MPI}) + +# HICMA depends on a runtime +# ------------------------------- +if ("${HICMAPP_SCHED_LOWER}" STREQUAL "starpu") + include(ImportSTARPU) + add_definitions(-DHICMAPP_STARPU) + if(USE_CUDA) + add_definitions(-DSTARPU_USE_CUDA) + endif() +else() + set(HICMAPP_SCHED_LOWER "default") +endif() + +#add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/stars-h) +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libs/stars-h/include) +#list(APPEND LIBS starsh) + +include(ImportSTARSH) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/prerequisites) + +add_subdirectory(src) + +# Add tests if enabled +if (${HICMAPP_BUILD_TESTS}) + message(STATUS "Building Tests") + include(ImportCatch2) + include(CTest) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) + enable_testing() + +endif () + +# Add examples if enabled +if (${HICMAPP_BUILD_EXAMPLES}) + message(STATUS "Building Examples") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples) +endif () + +message("---------------------------BUILDING HiCMA DOCS") +find_package(Doxygen) +if (DOXYGEN_FOUND) + add_subdirectory("docs") +else () + message(STATUS "Doxygen NOT found, skipping it") +endif () + +# Installation actions +install(DIRECTORY include/${PROJECT_NAME} DESTINATION include) +## Install cmake find package. +include(CMakePackageConfigHelpers) +write_basic_package_version_file("${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" COMPATIBILITY ExactVersion) +install( + FILES + "${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION lib/cmake/${PROJECT_NAME} +) + +configure_file(${PROJECT_NAME}Config.cmake.in + ${PROJECT_NAME}Config.cmake @ONLY) + +install( + FILES + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + DESTINATION lib/cmake/${PROJECT_NAME} +) + +install( + DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" + DESTINATION lib/cmake/${PROJECT_NAME}/Modules +) + +## Generate pkg-config file +configure_file(package.pc.in + lib/pkgconfig/${PROJECT_NAME}.pc @ONLY) +install( + FILES + "${PROJECT_BINARY_DIR}/lib/pkgconfig/${PROJECT_NAME}.pc" + DESTINATION lib/pkgconfig/ +) + + +################## +# Release source # +################## +set(CPACK_SOURCE_GENERATOR "TGZ") +set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") +set(CPACK_PACKAGE_DESCRIPTION_FILE ${CMAKE_CURRENT_SOURCE_DIR}/README.md) +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Hicma++ library ") +set(CPACK_PACKAGE_VERSION "${${PROJECT_NAME}_VERSION}") +set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}") +set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}") +set(CPACK_PACKAGE_VENDOR "KAUST") +set(CPACK_PACKAGE_CONTACT "hatem.ltaief@kaust.edu.sa") +set(CPACK_RESOURCE_FILE_README ${CMAKE_CURRENT_SOURCE_DIR}/README.md) +set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE) +set(CPACK_SOURCE_IGNORE_FILES "bin;.git;.gitmodules;Jenkinsfile") +include(CPack) diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..873a96d --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,143 @@ +pipeline { + agent { label 'jenkinsfile' } + triggers { + pollSCM('H/10 * * * *') + } + + options { + disableConcurrentBuilds() + buildDiscarder(logRotator(numToKeepStr: '50')) + timestamps() + } + + stages { + stage ('mkl') { + stages { + stage ('build without StarPu-MPI') { + steps { + sh '''#!/bin/bash -le + #################################################### + # Configure and build + #################################################### + module purge + module load gcc/10.2.0 + module load cmake/3.21.2 + module load hwloc/2.4.0-gcc-10.2.0 + #################################################### + # BLAS/LAPACK + #################################################### + module load mkl/2020.0.166 + #################################################### + set -x + git submodule update --init --recursive + ./config.sh -t -e + ./clean_build.sh + ''' + } + } + stage ('test without StarPu-MPI') { + steps { + + sh '''#!/bin/bash -le + #################################################### + # Run tester + #################################################### + echo "========================================" + module purge + module load gcc/10.2.0 + module load cmake/3.21.2 + module load hwloc/2.4.0-gcc-10.2.0 + #################################################### + # BLAS/LAPACK + #################################################### + module load mkl/2020.0.166 + cd bin/ + ctest --no-compress-output --verbose + ''' + } + } + stage ('build with StarPu-MPI') { + steps { + sh '''#!/bin/bash -le + #################################################### + # Configure and build + #################################################### + module purge + module load gcc/10.2.0 + module load cmake/3.21.2 + module load hwloc/2.4.0-gcc-10.2.0 + #################################################### + # BLAS/LAPACK/MPI + #################################################### + source /opt/ecrc/hpc-toolkit/ub18/setvars.sh + #################################################### + set -x + git submodule update --init --recursive + ./config.sh -t -e -r starpu -m + ./clean_build.sh + ''' + } + } + stage ('test with StarPu-MPI') { + steps { + + sh '''#!/bin/bash -le + #################################################### + # Run tester + #################################################### + echo "========================================" + module purge + module load gcc/10.2.0 + module load cmake/3.21.2 + module load hwloc/2.4.0-gcc-10.2.0 + #################################################### + # BLAS/LAPACK/MPI + #################################################### + source /opt/ecrc/hpc-toolkit/ub18/setvars.sh + #################################################### + cd bin/ + ctest --no-compress-output --verbose + ''' + } + } + } + } + stage('documentation') { + agent { label 'jenkinsfile'} + steps { + sh '''#!/bin/bash -le + module purge + module load gcc/10.2.0 + module load cmake/3.21.2 + #################################################### + # BLAS/LAPACK + #################################################### + module load mkl/2020.0.166 + ./config.sh -t -e + ./clean_build.sh + cd bin + make docs + ''' + publishHTML( target: [allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: 'docs/html', reportFiles: 'index.html', reportName: 'Doxygen Documentation', reportTitles: ''] ) + } + } + } + + // Post build actions + post { + //always { + //} + //success { + //} + //unstable { + //} + //failure { + //} + unstable { + emailext body: "${env.JOB_NAME} - Please go to ${env.BUILD_URL}", subject: "Jenkins Pipeline build is UNSTABLE", recipientProviders: [[$class: 'CulpritsRecipientProvider'], [$class: 'RequesterRecipientProvider']] + } + failure { + emailext body: "${env.JOB_NAME} - Please go to ${env.BUILD_URL}", subject: "Jenkins Pipeline build FAILED", recipientProviders: [[$class: 'CulpritsRecipientProvider'], [$class: 'RequesterRecipientProvider']] + } + } +} diff --git a/LICENSE b/LICENSE index c8bcd3f..769cefc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2021, Extreme Computing Research Center +Copyright (c) 2017-2021, King Abdullah University of Science and Technology All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md index e0cc19d..ea28e8c 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ -# hicmapp-dev \ No newline at end of file +# HICMAPP + +C++ implementation of KAUST ECRC HiCMA library. +The original C version of HiCMA : https://github.com/ecrc/hicma diff --git a/clean_build.sh b/clean_build.sh new file mode 100755 index 0000000..8c1409e --- /dev/null +++ b/clean_build.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +verbose= +num_proc="-j $(nproc)" + +while getopts "vj:h" opt; do + case $opt in + v) + verbose="VERBOSE=1" + echo "Using verbose mode" + ;; + j) + num_proc="-j $OPTARG" + echo "Using $OPTARG threads to build" + ;; + h) + echo "Usage of $(basename "$0"):" + echo " to clean the bin directory then builds the code and run it " + echo "" + echo "-v : to print the output of make in details" + echo "" + echo "-j : to with a specific number of threads" + echo "" + exit 1 + ;; + *) + echo "Invalid flags entered. run using the -h flag for help" + exit 1 + ;; + esac +done + +cd bin/ || exit +make clean +make all $num_proc $verbose diff --git a/cmake/CheckFunctionExists.cmake b/cmake/CheckFunctionExists.cmake new file mode 100644 index 0000000..489b2c8 --- /dev/null +++ b/cmake/CheckFunctionExists.cmake @@ -0,0 +1,121 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +CheckFunctionExists +------------------- + +Check if a C function can be linked + +.. command:: check_function_exists + + .. code-block:: cmake + + check_function_exists( ) + + Checks that the ```` is provided by libraries on the system and store + the result in a ````, which will be created as an internal + cache variable. + +The following variables may be set before calling this macro to modify the +way the check is run: + +``CMAKE_REQUIRED_FLAGS`` + string of compile command line flags. +``CMAKE_REQUIRED_DEFINITIONS`` + a :ref:`;-list ` of macros to define (-DFOO=bar). +``CMAKE_REQUIRED_INCLUDES`` + a :ref:`;-list ` of header search paths to pass to + the compiler. +``CMAKE_REQUIRED_LINK_OPTIONS`` + .. versionadded:: 3.14 + a :ref:`;-list ` of options to add to the link command. +``CMAKE_REQUIRED_LIBRARIES`` + a :ref:`;-list ` of libraries to add to the link + command. See policy :policy:`CMP0075`. +``CMAKE_REQUIRED_QUIET`` + .. versionadded:: 3.1 + execute quietly without messages. + +.. note:: + + Prefer using :Module:`CheckSymbolExists` instead of this module, + for the following reasons: + + * ``check_function_exists()`` can't detect functions that are inlined + in headers or specified as a macro. + + * ``check_function_exists()`` can't detect anything in the 32-bit + versions of the Win32 API, because of a mismatch in calling conventions. + + * ``check_function_exists()`` only verifies linking, it does not verify + that the function is declared in system headers. +#]=======================================================================] + +include_guard(GLOBAL) + +macro(CHECK_FUNCTION_EXISTS FUNCTION VARIABLE) + if(NOT DEFINED "${VARIABLE}" OR "x${${VARIABLE}}" STREQUAL "x${VARIABLE}") + set(MACRO_CHECK_FUNCTION_DEFINITIONS + "-DCHECK_FUNCTION_EXISTS=${FUNCTION} ${CMAKE_REQUIRED_FLAGS}") + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_START "Looking for ${FUNCTION}") + endif() + if(CMAKE_REQUIRED_LINK_OPTIONS) + set(CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS + LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS}) + else() + set(CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS) + endif() + if(CMAKE_REQUIRED_LIBRARIES) + set(CHECK_FUNCTION_EXISTS_ADD_LIBRARIES + LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + else() + set(CHECK_FUNCTION_EXISTS_ADD_LIBRARIES) + endif() + if(CMAKE_REQUIRED_INCLUDES) + set(CHECK_FUNCTION_EXISTS_ADD_INCLUDES + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}") + else() + set(CHECK_FUNCTION_EXISTS_ADD_INCLUDES) + endif() + + if(CMAKE_C_COMPILER_LOADED) + set(_cfe_source ${CMAKE_ROOT}/Modules/CheckFunctionExists.c) + elseif(CMAKE_CXX_COMPILER_LOADED) + set(_cfe_source ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CheckFunctionExists/CheckFunctionExists.cxx) + configure_file(${CMAKE_ROOT}/Modules/CheckFunctionExists.c "${_cfe_source}" COPYONLY) + else() + message(FATAL_ERROR "CHECK_FUNCTION_EXISTS needs either C or CXX language enabled") + endif() + + try_compile(${VARIABLE} + ${CMAKE_BINARY_DIR} + ${_cfe_source} + COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} + ${CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS} + ${CHECK_FUNCTION_EXISTS_ADD_LIBRARIES} + CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS} + "${CHECK_FUNCTION_EXISTS_ADD_INCLUDES}" + OUTPUT_VARIABLE OUTPUT) + unset(_cfe_source) + + if(${VARIABLE}) + set(${VARIABLE} 1 CACHE INTERNAL "Have function ${FUNCTION}") + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_PASS "found") + endif() + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log + "Determining if the function ${FUNCTION} exists passed with the following output:\n" + "${OUTPUT}\n\n") + else() + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_FAIL "not found") + endif() + set(${VARIABLE} "" CACHE INTERNAL "Have function ${FUNCTION}") + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Determining if the function ${FUNCTION} exists failed with the following output:\n" + "${OUTPUT}\n\n") + endif() + endif() +endmacro() diff --git a/cmake/FindBLASEXT.cmake b/cmake/FindBLASEXT.cmake new file mode 100644 index 0000000..fb4b0fe --- /dev/null +++ b/cmake/FindBLASEXT.cmake @@ -0,0 +1,382 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find BLAS EXTENDED for ECRC projects: find include dirs and libraries +# +# This module allows to find BLAS libraries by calling the official FindBLAS module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# BLAS_INCLUDE_DIRS - BLAS include directories +# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries +# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) +# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +# macro to factorize this call +macro(find_package_blas) + if(BLASEXT_FIND_REQUIRED) + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS REQUIRED QUIET) + else() + find_package(BLAS REQUIRED) + endif() + else() + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS QUIET) + else() + find_package(BLAS) + endif() + endif() +endmacro() + +# add a cache variable to let the user specify the BLAS vendor +set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: + Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, + Intel10_32 (intel mkl v10 32 bit), + Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), + Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), + Intel( older versions of mkl 32 and 64 bit), + ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + +if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "In FindBLASEXT") + message(STATUS "If you want to force the use of one specific library, " + "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" + "\n at cmake configure.") + message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " + "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," + "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," + "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "\n Intel( older versions of mkl 32 and 64 bit)," + "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") +endif() + +if (NOT BLAS_FOUND) + # First try to detect two cases: + # 1: only SEQ libs are handled + # 2: both SEQ and PAR libs are handled + find_package_blas() +endif () + +# detect the cases where SEQ and PAR libs are handled +if(BLA_VENDOR STREQUAL "All" AND + (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) + ) + set(BLA_VENDOR "Intel") + if(BLAS_mkl_intel_LIBRARY) + set(BLA_VENDOR "Intel10_32") + endif() + if(BLAS_mkl_intel_lp64_LIBRARY) + set(BLA_VENDOR "Intel10_64lp") + endif() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the MKL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) + set(BLA_VENDOR "ACML") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ACML." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) + set(BLA_VENDOR "IBMESSL") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +endif() + +# Intel case +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the BLAS vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + # find mkl.h inside known include paths + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + if(BLAS_INCDIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES include) + else() + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND ECRC_VERBOSE) + # Print_Find_Header_Status(blas mkl.h) + #endif () + set(BLAS_INCLUDE_DIRS "") + if(BLAS_mkl.h_INCLUDE_DIRS) + list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + # if Intel 10 64 bit -> look for sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the sequential version Intel10_64lp_seq") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "Intel10_64lp") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the multithreaded version Intel10_64lp") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + else() + + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + endif() + + # ACML case +elseif(BLA_VENDOR MATCHES "ACML*") + + ## look for the sequential version + set(BLA_VENDOR "ACML") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "ACML_MP") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + # IBMESSL case +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + +else() + + if(BLAS_FOUND) + # define the SEQ libs as the BLAS_LIBRARIES + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + + +if(BLAS_SEQ_LIBRARIES) + set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(BLAS) +set(BLAS_LIBRARY_DIRS "") +string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") +foreach(blas_lib ${BLAS_LIBRARIES}) + if (EXISTS "${blas_lib}") + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + else() + string(REPLACE "-L" "" blas_lib "${blas_lib}") + if (EXISTS "${blas_lib}") + list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) + else() + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + if (EXISTS "${a_blas_lib_dir}") + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (BLAS_LIBRARY_DIRS) + list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) +endif () + +# check that BLAS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() + else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ESSL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) +endif() diff --git a/cmake/FindCBLAS.cmake b/cmake/FindCBLAS.cmake new file mode 100644 index 0000000..92d4592 --- /dev/null +++ b/cmake/FindCBLAS.cmake @@ -0,0 +1,466 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find CBLAS include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(CBLAS +# [REQUIRED] # Fail with error if cblas is not found +# [COMPONENTS ...] # dependencies +# ) +# +# CBLAS depends on the following libraries: +# - BLAS +# +# This module finds headers and cblas library. +# Results are reported in variables: +# CBLAS_FOUND - True if headers and requested libraries were found +# CBLAS_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# CBLAS_INCLUDE_DIRS - cblas include directories +# CBLAS_LIBRARY_DIRS - Link directories for cblas libraries +# CBLAS_LIBRARIES - cblas component libraries to be linked +# CBLAS_INCLUDE_DIRS_DEP - cblas + dependencies include directories +# CBLAS_LIBRARY_DIRS_DEP - cblas + dependencies link directories +# CBLAS_LIBRARIES_DEP - cblas libraries + dependencies +# CBLAS_HAS_ZGEMM3M - True if cblas contains zgemm3m fast complex mat-mat product +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DCBLAS_DIR=path/to/cblas): +# CBLAS_DIR - Where to find the base directory of cblas +# CBLAS_INCDIR - Where to find the header files +# CBLAS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: CBLAS_DIR, CBLAS_INCDIR, CBLAS_LIBDIR +# +# CBLAS could be directly embedded in BLAS library (ex: Intel MKL) so that +# we test a cblas function with the blas libraries found and set CBLAS +# variables to BLAS ones if test is successful. To skip this feature and +# look for a stand alone cblas, please add the following in your +# CMakeLists.txt before to call find_package(CBLAS): +# set(CBLAS_STANDALONE TRUE) +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - CBLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined +# 2) we look in environment variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +macro(Find_Cblas_Header _header_name _extra_paths) + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_CBLAS_DIR "$ENV{CBLAS_DIR}") + set(ENV_CBLAS_INCDIR "$ENV{CBLAS_INCDIR}") + if(ENV_CBLAS_INCDIR) + list(APPEND _inc_env "${ENV_CBLAS_INCDIR}") + elseif(ENV_CBLAS_DIR) + list(APPEND _inc_env "${ENV_CBLAS_DIR}") + list(APPEND _inc_env "${ENV_CBLAS_DIR}/include") + else() + # system variables + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${_extra_paths}") + list(APPEND _inc_env "${_extra_paths}/include") + list(REMOVE_DUPLICATES _inc_env) + + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + # Try to find the fftw header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(CBLAS_INCDIR) + set(CBLAS_${_header_name}_DIRS "CBLAS_${_header_name}_DIRS-NOTFOUND") + find_path(CBLAS_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${CBLAS_INCDIR}) + else() + if(CBLAS_DIR) + set(CBLAS_${_header_name}_DIRS "CBLAS_${_header_name}_DIRS-NOTFOUND") + find_path(CBLAS_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${CBLAS_DIR} + PATH_SUFFIXES "include") + else() + set(CBLAS_${_header_name}_DIRS "CBLAS_${_header_name}_DIRS-NOTFOUND") + find_path(CBLAS_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(CBLAS_${_header_name}_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (CBLAS_${_header_name}_DIRS) + set(CBLAS_INCLUDE_DIRS "${CBLAS_${_header_name}_DIRS}") + else () + set(CBLAS_INCLUDE_DIRS "CBLAS_INCLUDE_DIRS-NOTFOUND") + if(NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for CBLAS -- ${_header_name} not found") + endif() + endif() +endmacro() + +if (NOT CBLAS_FOUND) + set(CBLAS_DIR "" CACHE PATH "Installation directory of CBLAS library") + if (NOT CBLAS_FIND_QUIETLY) + message(STATUS "A cache variable, namely CBLAS_DIR, has been set to specify the install directory of CBLAS") + endif() +endif() + + +# CBLAS depends on BLAS anyway, try to find it +if (NOT BLAS_FOUND) + if(CBLAS_FIND_REQUIRED) + find_package(BLASEXT REQUIRED) + else() + find_package(BLASEXT) + endif() +endif() + + +# find CBLAS +if (BLAS_FOUND) + # find header + Find_Cblas_Header( "cblas.h" "${BLAS_DIR_FOUND}" ) + + if (NOT CBLAS_STANDALONE) + # check if a cblas function exists in the BLAS lib + # this can be the case with libs such as MKL, ACML + include(CheckFunctionExists) + set(CMAKE_REQUIRED_LIBRARIES "${BLAS_LINKER_FLAGS};${BLAS_LIBRARIES}") + set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}") + unset(CBLAS_WORKS CACHE) + check_function_exists(cblas_dscal CBLAS_WORKS) + check_function_exists(cblas_zgemm3m CBLAS_ZGEMM3M_FOUND) + mark_as_advanced(CBLAS_WORKS) + set(CMAKE_REQUIRED_LIBRARIES) + + if(CBLAS_WORKS) + + # Check for faster complex GEMM routine + # (only C/Z, no S/D version) + if ( CBLAS_ZGEMM3M_FOUND ) + add_definitions(-DCBLAS_HAS_ZGEMM3M -DCBLAS_HAS_CGEMM3M) + endif() + + if(NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for cblas: test with blas succeeds") + endif() + # test succeeds: CBLAS is in BLAS + set(CBLAS_LIBRARIES "${BLAS_LIBRARIES}") + if (BLAS_LIBRARY_DIRS) + set(CBLAS_LIBRARY_DIRS "${BLAS_LIBRARY_DIRS}") + endif() + if(BLAS_INCLUDE_DIRS) + set(CBLAS_INCLUDE_DIRS "${BLAS_INCLUDE_DIRS}") + endif() + if (BLAS_LINKER_FLAGS) + set(CBLAS_LINKER_FLAGS "${BLAS_LINKER_FLAGS}") + endif() + endif() + endif (NOT CBLAS_STANDALONE) + + if (CBLAS_STANDALONE OR NOT CBLAS_WORKS) + + if(NOT CBLAS_WORKS AND NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for cblas : test with blas fails") + endif() + # test fails: try to find CBLAS lib exterior to BLAS + + # Try to find CBLAS lib + ####################### + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_CBLAS_DIR "$ENV{CBLAS_DIR}") + set(ENV_CBLAS_INCDIR "$ENV{CBLAS_INCDIR}") + if(ENV_CBLAS_INCDIR) + list(APPEND _inc_env "${ENV_CBLAS_INCDIR}") + elseif(ENV_CBLAS_DIR) + list(APPEND _inc_env "${ENV_CBLAS_DIR}") + list(APPEND _inc_env "${ENV_CBLAS_DIR}/include") + list(APPEND _inc_env "${ENV_CBLAS_DIR}/include/cblas") + else() + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + + # Try to find the cblas header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(CBLAS_INCDIR) + set(CBLAS_cblas.h_DIRS "CBLAS_cblas.h_DIRS-NOTFOUND") + find_path(CBLAS_cblas.h_DIRS + NAMES cblas.h + HINTS ${CBLAS_INCDIR}) + else() + if(CBLAS_DIR) + set(CBLAS_cblas.h_DIRS "CBLAS_cblas.h_DIRS-NOTFOUND") + find_path(CBLAS_cblas.h_DIRS + NAMES cblas.h + HINTS ${CBLAS_DIR} + PATH_SUFFIXES "include" "include/cblas") + else() + set(CBLAS_cblas.h_DIRS "CBLAS_cblas.h_DIRS-NOTFOUND") + find_path(CBLAS_cblas.h_DIRS + NAMES cblas.h + HINTS ${_inc_env} + PATH_SUFFIXES "cblas") + endif() + endif() + mark_as_advanced(CBLAS_cblas.h_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (CBLAS_cblas.h_DIRS) + set(CBLAS_INCLUDE_DIRS "${CBLAS_cblas.h_DIRS}") + else () + set(CBLAS_INCLUDE_DIRS "CBLAS_INCLUDE_DIRS-NOTFOUND") + if(NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for cblas -- cblas.h not found") + endif() + endif() + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_CBLAS_LIBDIR "$ENV{CBLAS_LIBDIR}") + if(ENV_CBLAS_LIBDIR) + list(APPEND _lib_env "${ENV_CBLAS_LIBDIR}") + elseif(ENV_CBLAS_DIR) + list(APPEND _lib_env "${ENV_CBLAS_DIR}") + list(APPEND _lib_env "${ENV_CBLAS_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the cblas lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(CBLAS_LIBDIR) + set(CBLAS_cblas_LIBRARY "CBLAS_cblas_LIBRARY-NOTFOUND") + find_library(CBLAS_cblas_LIBRARY + NAMES cblas + HINTS ${CBLAS_LIBDIR}) + else() + if(CBLAS_DIR) + set(CBLAS_cblas_LIBRARY "CBLAS_cblas_LIBRARY-NOTFOUND") + find_library(CBLAS_cblas_LIBRARY + NAMES cblas + HINTS ${CBLAS_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(CBLAS_cblas_LIBRARY "CBLAS_cblas_LIBRARY-NOTFOUND") + find_library(CBLAS_cblas_LIBRARY + NAMES cblas + HINTS ${_lib_env}) + endif() + endif() + mark_as_advanced(CBLAS_cblas_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (CBLAS_cblas_LIBRARY) + get_filename_component(cblas_lib_path "${CBLAS_cblas_LIBRARY}" PATH) + # set cmake variables + set(CBLAS_LIBRARIES "${CBLAS_cblas_LIBRARY}") + set(CBLAS_LIBRARY_DIRS "${cblas_lib_path}") + else () + set(CBLAS_LIBRARIES "CBLAS_LIBRARIES-NOTFOUND") + set(CBLAS_LIBRARY_DIRS "CBLAS_LIBRARY_DIRS-NOTFOUND") + if (NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for cblas -- lib cblas not found") + endif() + endif () + + # check a function to validate the find + if(CBLAS_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LDFLAGS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # CBLAS + if (CBLAS_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${CBLAS_INCLUDE_DIRS}") + endif() + if (CBLAS_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${CBLAS_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${CBLAS_LIBRARIES}") + # BLAS + if (BLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") + endif() + if (BLAS_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${BLAS_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(CBLAS_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(cblas_dscal CBLAS_WORKS) + mark_as_advanced(CBLAS_WORKS) + + if(CBLAS_WORKS) + + # Check for faster complex GEMM routine + # (only C/Z, no S/D version) + check_function_exists(cblas_zgemm3m CBLAS_ZGEMM3M_FOUND) + if ( CBLAS_ZGEMM3M_FOUND ) + add_definitions(-DCBLAS_HAS_ZGEMM3M -DCBLAS_HAS_CGEMM3M) + endif() + + # save link with dependencies + set(CBLAS_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(CBLAS_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(CBLAS_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(CBLAS_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES CBLAS_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES CBLAS_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES CBLAS_LINKER_FLAGS) + else() + if(NOT CBLAS_FIND_QUIETLY) + message(STATUS "Looking for cblas : test of cblas_dscal with cblas and blas libraries fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(CBLAS_LIBRARIES) + + endif (CBLAS_STANDALONE OR NOT CBLAS_WORKS) + +else(BLAS_FOUND) + + if (NOT CBLAS_FIND_QUIETLY) + message(STATUS "CBLAS requires BLAS but BLAS has not been found." + "Please look for BLAS first.") + endif() + +endif(BLAS_FOUND) + +if (CBLAS_LIBRARIES) + list(GET CBLAS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(CBLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of CBLAS library" FORCE) + else() + set(CBLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of CBLAS library" FORCE) + endif() +endif() +mark_as_advanced(CBLAS_DIR) +mark_as_advanced(CBLAS_DIR_FOUND) + +# check that CBLAS has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CBLAS DEFAULT_MSG + CBLAS_LIBRARIES + CBLAS_WORKS) diff --git a/cmake/FindLAPACK.cmake b/cmake/FindLAPACK.cmake new file mode 100644 index 0000000..3390843 --- /dev/null +++ b/cmake/FindLAPACK.cmake @@ -0,0 +1,801 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindLAPACK +---------- + +Find Linear Algebra PACKage (LAPACK) library + +This module finds an installed Fortran library that implements the +`LAPACK linear-algebra interface`_. + +At least one of the ``C``, ``CXX``, or ``Fortran`` languages must be enabled. + +.. _`LAPACK linear-algebra interface`: https://netlib.org/lapack/ + +Input Variables +^^^^^^^^^^^^^^^ + +The following variables may be set to influence this module's behavior: + +``BLA_STATIC`` + if ``ON`` use static linkage + +``BLA_VENDOR`` + Set to one of the :ref:`BLAS/LAPACK Vendors` to search for BLAS only + from the specified vendor. If not set, all vendors are considered. + +``BLA_F95`` + if ``ON`` tries to find the BLAS95/LAPACK95 interfaces + +``BLA_PREFER_PKGCONFIG`` + .. versionadded:: 3.20 + + if set ``pkg-config`` will be used to search for a LAPACK library first + and if one is found that is preferred + +``BLA_PKGCONFIG_LAPACK`` + .. versionadded:: 3.25 + + If set, the ``pkg-config`` method will look for this module name instead of + just ``lapack``. + + +``BLA_SIZEOF_INTEGER`` + .. versionadded:: 3.22 + + Specify the BLAS/LAPACK library integer size: + + ``4`` + Search for a BLAS/LAPACK with 32-bit integer interfaces. + ``8`` + Search for a BLAS/LAPACK with 64-bit integer interfaces. + ``ANY`` + Search for any BLAS/LAPACK. + Most likely, a BLAS/LAPACK with 32-bit integer interfaces will be found. + +Imported targets +^^^^^^^^^^^^^^^^ + +This module defines the following :prop_tgt:`IMPORTED` targets: + +``LAPACK::LAPACK`` + .. versionadded:: 3.18 + + The libraries to use for LAPACK, if found. + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``LAPACK_FOUND`` + library implementing the LAPACK interface is found +``LAPACK_LINKER_FLAGS`` + uncached list of required linker flags (excluding ``-l`` and ``-L``). +``LAPACK_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK +``LAPACK95_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK95 +``LAPACK95_FOUND`` + library implementing the LAPACK95 interface is found + +Intel MKL +^^^^^^^^^ + +To use the Intel MKL implementation of LAPACK, a project must enable at least +one of the ``C`` or ``CXX`` languages. Set ``BLA_VENDOR`` to an Intel MKL +variant either on the command-line as ``-DBLA_VENDOR=Intel10_64lp`` or in +project code: + +.. code-block:: cmake + + set(BLA_VENDOR Intel10_64lp) + find_package(LAPACK) + +In order to build a project using Intel MKL, and end user must first +establish an Intel MKL environment. See the :module:`FindBLAS` module +section on :ref:`Intel MKL` for details. + +#]=======================================================================] + +# The approach follows that of the ``autoconf`` macro file, ``acx_lapack.m4`` +# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). + +if(CMAKE_Fortran_COMPILER_LOADED) + include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) +else() + include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) +endif() +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) + +function(_add_lapack_target) + if(LAPACK_FOUND AND NOT TARGET LAPACK::LAPACK) + add_library(LAPACK::LAPACK INTERFACE IMPORTED) + + # Filter out redundant BLAS info and replace with the BLAS target + set(_lapack_libs "${LAPACK_LIBRARIES}") + set(_lapack_flags "${LAPACK_LINKER_FLAGS}") + if(TARGET BLAS::BLAS) + if(_lapack_libs AND BLAS_LIBRARIES) + foreach(_blas_lib IN LISTS BLAS_LIBRARIES) + list(REMOVE_ITEM _lapack_libs "${_blas_lib}") + endforeach() + endif() + if(_lapack_flags AND BLAS_LINKER_FLAGS) + foreach(_blas_flag IN LISTS BLAS_LINKER_FLAGS) + list(REMOVE_ITEM _lapack_flags "${_blas_flag}") + endforeach() + endif() + list(APPEND _lapack_libs BLAS::BLAS) + endif() + if(_lapack_libs) + set_target_properties(LAPACK::LAPACK PROPERTIES + INTERFACE_LINK_LIBRARIES "${_lapack_libs}" + ) + endif() + if(_lapack_flags) + set_target_properties(LAPACK::LAPACK PROPERTIES + INTERFACE_LINK_OPTIONS "${_lapack_flags}" + ) + endif() + endif() +endfunction() + +# TODO: move this stuff to a separate module + +function(CHECK_LAPACK_LIBRARIES LIBRARIES _prefix _name _flags _list _deps _addlibdir _subdirs _blas) + # This function checks for the existence of the combination of libraries + # given by _list. If the combination is found, this checks whether can link + # against that library combination using the name of a routine given by _name + # using the linker flags given by _flags. If the combination of libraries is + # found and passes the link test, ${LIBRARIES} is set to the list of complete + # library paths that have been found. Otherwise, ${LIBRARIES} is set to FALSE. + + set(_libraries_work TRUE) + set(_libraries) + set(_combined_name) + + if(BLA_STATIC) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() + else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif() + endif() + + set(_extaddlibdir "${_addlibdir}") + if(WIN32) + list(APPEND _extaddlibdir ENV LIB) + elseif(APPLE) + list(APPEND _extaddlibdir ENV DYLD_LIBRARY_PATH) + else() + list(APPEND _extaddlibdir ENV LD_LIBRARY_PATH) + endif() + list(APPEND _extaddlibdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + + foreach(_library ${_list}) + if(_library MATCHES "^-") + # Respect linker flags as-is (required by MKL) + list(APPEND _libraries "${_library}") + else() + string(REGEX REPLACE "[^A-Za-z0-9]" "_" _lib_var "${_library}") + string(APPEND _combined_name "_${_lib_var}") + if(NOT "${_deps}" STREQUAL "") + string(APPEND _combined_name "_deps") + endif() + if(_libraries_work) + find_library(${_prefix}_${_lib_var}_LIBRARY + NAMES ${_library} + NAMES_PER_DIR + PATHS ${_extaddlibdir} + PATH_SUFFIXES ${_subdirs} + ) + mark_as_advanced(${_prefix}_${_lib_var}_LIBRARY) + list(APPEND _libraries ${${_prefix}_${_lib_var}_LIBRARY}) + set(_libraries_work ${${_prefix}_${_lib_var}_LIBRARY}) + endif() + endif() + endforeach() + + foreach(_flag ${_flags}) + string(REGEX REPLACE "[^A-Za-z0-9]" "_" _flag_var "${_flag}") + string(APPEND _combined_name "_${_flag_var}") + endforeach() + if(_libraries_work) + # Test this combination of libraries. + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${_libraries} ${_blas} ${_deps}) + set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY}) + if(CMAKE_Fortran_COMPILER_LOADED) + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) + endif() + set(CMAKE_REQUIRED_LIBRARIES) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + endif() + + if(_libraries_work) + if("${_list}${_blas}" STREQUAL "") + set(_libraries "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + else() + list(APPEND _libraries ${_blas} ${_deps}) + endif() + else() + set(_libraries FALSE) + endif() + set(${LIBRARIES} "${_libraries}" PARENT_SCOPE) +endfunction() + +macro(_lapack_find_dependency dep) + set(_lapack_quiet_arg) + if(LAPACK_FIND_QUIETLY) + set(_lapack_quiet_arg QUIET) + endif() + set(_lapack_required_arg) + if(LAPACK_FIND_REQUIRED) + set(_lapack_required_arg REQUIRED) + endif() + find_package(${dep} ${ARGN} + ${_lapack_quiet_arg} + ${_lapack_required_arg} + ) + if (NOT ${dep}_FOUND) + set(LAPACK_NOT_FOUND_MESSAGE "LAPACK could not be found because dependency ${dep} could not be found.") + endif() + + set(_lapack_required_arg) + set(_lapack_quiet_arg) +endmacro() + +set(LAPACK_LINKER_FLAGS) +set(LAPACK_LIBRARIES) +set(LAPACK95_LIBRARIES) +set(_lapack_fphsa_req_var LAPACK_LIBRARIES) + +# Check the language being used +if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED)) + set(LAPACK_NOT_FOUND_MESSAGE + "FindLAPACK requires Fortran, C, or C++ to be enabled.") +endif() + +if(NOT BLA_SIZEOF_INTEGER) + # in the reality we do not know which API of BLAS/LAPACK is masked in library + set(_lapack_sizeof_integer "ANY") +elseif((BLA_SIZEOF_INTEGER STREQUAL "ANY") OR +(BLA_SIZEOF_INTEGER STREQUAL "4") OR +(BLA_SIZEOF_INTEGER STREQUAL "8")) + set(_lapack_sizeof_integer ${BLA_SIZEOF_INTEGER}) +else() + message(FATAL_ERROR "BLA_SIZEOF_INTEGER can have only , ANY, 4, or 8 values") +endif() + +# Load BLAS +if(NOT LAPACK_NOT_FOUND_MESSAGE) + _lapack_find_dependency(BLAS) +endif() + +# Search with pkg-config if specified +if(BLA_PREFER_PKGCONFIG) + if(NOT BLA_PKGCONFIG_LAPACK) + set(BLA_PKGCONFIG_LAPACK "lapack") + endif() + find_package(PkgConfig QUIET) + pkg_check_modules(PKGC_LAPACK QUIET ${BLA_PKGCONFIG_LAPACK}) + if(PKGC_LAPACK_FOUND) + set(LAPACK_FOUND TRUE) + set(LAPACK_LIBRARIES "${PKGC_LAPACK_LINK_LIBRARIES}") + if (BLAS_LIBRARIES) + list(APPEND LAPACK_LIBRARIES "${BLAS_LIBRARIES}") + endif() + _add_lapack_target() + return() + endif() +endif() + +# Search for different LAPACK distributions if BLAS is found +if(NOT LAPACK_NOT_FOUND_MESSAGE) + set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS}) + if(NOT BLA_VENDOR) + if(NOT "$ENV{BLA_VENDOR}" STREQUAL "") + set(BLA_VENDOR "$ENV{BLA_VENDOR}") + else() + set(BLA_VENDOR "All") + endif() + endif() + + # LAPACK in the Intel MKL 10+ library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All") + AND (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED)) + # System-specific settings + if(NOT WIN32) + set(LAPACK_mkl_LM "-lm") + set(LAPACK_mkl_LDL "-ldl") + endif() + + _lapack_find_dependency(Threads) + + if(_lapack_sizeof_integer EQUAL 8) + set(LAPACK_mkl_ILP_MODE "ilp64") + elseif(_lapack_sizeof_integer EQUAL 4) + set(LAPACK_mkl_ILP_MODE "lp64") + else() + if(BLA_VENDOR MATCHES "_64ilp") + set(LAPACK_mkl_ILP_MODE "ilp64") + else() + set(LAPACK_mkl_ILP_MODE "lp64") + endif() + endif() + + set(LAPACK_SEARCH_LIBS "") + + if(BLA_F95) + set(LAPACK_mkl_SEARCH_SYMBOL "cheev_f95") + set(_LAPACK_LIBRARIES LAPACK95_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS95_LIBRARIES}) + + # old + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95") + # new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_intel_c") + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95_${LAPACK_mkl_ILP_MODE}") + else() + set(LAPACK_mkl_SEARCH_SYMBOL "cheev") + set(_LAPACK_LIBRARIES LAPACK_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS_LIBRARIES}) + + # old and new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack") + endif() + + # MKL uses a multitude of partially platform-specific subdirectories: + if(BLA_VENDOR STREQUAL "Intel10_32") + set(LAPACK_mkl_ARCH_NAME "ia32") + else() + set(LAPACK_mkl_ARCH_NAME "intel64") + endif() + if(WIN32) + set(LAPACK_mkl_OS_NAME "win") + elseif(APPLE) + set(LAPACK_mkl_OS_NAME "mac") + else() + set(LAPACK_mkl_OS_NAME "lin") + endif() + if(DEFINED ENV{MKLROOT}) + file(TO_CMAKE_PATH "$ENV{MKLROOT}" LAPACK_mkl_MKLROOT) + # If MKLROOT points to the subdirectory 'mkl', use the parent directory instead + # so we can better detect other relevant libraries in 'compiler' or 'tbb': + get_filename_component(LAPACK_mkl_MKLROOT_LAST_DIR "${LAPACK_mkl_MKLROOT}" NAME) + if(LAPACK_mkl_MKLROOT_LAST_DIR STREQUAL "mkl") + get_filename_component(LAPACK_mkl_MKLROOT "${LAPACK_mkl_MKLROOT}" DIRECTORY) + endif() + endif() + set(LAPACK_mkl_LIB_PATH_SUFFIXES + "compiler/lib" "compiler/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "compiler/lib/${LAPACK_mkl_ARCH_NAME}" + "mkl/lib" "mkl/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "mkl/lib/${LAPACK_mkl_ARCH_NAME}" + "lib" "lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "lib/${LAPACK_mkl_ARCH_NAME}" + ) + + # First try empty lapack libs (implicitly linked or automatic from BLAS) + if(NOT ${_LAPACK_LIBRARIES}) + check_lapack_libraries( + ${_LAPACK_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + if(LAPACK_WORKS AND NOT _BLAS_LIBRARIES) + # Give a more helpful "found" message + set(LAPACK_WORKS "implicitly linked") + set(_lapack_fphsa_req_var LAPACK_WORKS) + endif() + endif() + + # Then try the search libs + foreach(_search ${LAPACK_SEARCH_LIBS}) + string(REPLACE " " ";" _search ${_search}) + if(NOT ${_LAPACK_LIBRARIES}) + check_lapack_libraries( + ${_LAPACK_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "${_search}" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + endif() + endforeach() + + unset(_search) + unset(LAPACK_mkl_ILP_MODE) + unset(LAPACK_mkl_SEARCH_SYMBOL) + unset(LAPACK_mkl_LM) + unset(LAPACK_mkl_LDL) + unset(LAPACK_mkl_MKLROOT) + unset(LAPACK_mkl_ARCH_NAME) + unset(LAPACK_mkl_OS_NAME) + unset(LAPACK_mkl_LIB_PATH_SUFFIXES) + endif() + + # gotoblas? (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All")) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "goto2" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + + # FlexiBLAS? (http://www.mpi-magdeburg.mpg.de/mpcsc/software/FlexiBLAS/) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "FlexiBLAS" OR BLA_VENDOR STREQUAL "All")) + set(_lapack_flexiblas_lib "flexiblas") + + if(_lapack_sizeof_integer EQUAL 8) + string(APPEND _lapack_flexiblas_lib "64") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_flexiblas_lib}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + + unset(_lapack_flexiblas_lib) + endif() + + # OpenBLAS? (http://www.openblas.net) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All")) + set(_lapack_openblas_lib "openblas") + + if(_lapack_sizeof_integer EQUAL 8) + string(APPEND _lapack_openblas_lib "64") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_openblas_lib}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + + unset(_lapack_openblas_lib) + endif() + + # ArmPL? (https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-compiler-for-linux/arm-performance-libraries) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "Arm" OR BLA_VENDOR STREQUAL "All")) + # Check for 64bit Integer support + if(_lapack_sizeof_integer EQUAL 8) + set(LAPACK_armpl_LIB "armpl_ilp64") + elseif(_lapack_sizeof_integer EQUAL 4) + set(LAPACK_armpl_LIB "armpl_lp64") + else() + if(BLA_VENDOR MATCHES "_ilp64") + set(LAPACK_armpl_LIB "armpl_ilp64") + else() + set(LAPACK_armpl_LIB "armpl_lp64") + endif() + endif() + + # Check for OpenMP support, VIA BLA_VENDOR of Arm_mp or Arm_ipl64_mp + if(BLA_VENDOR MATCHES "_mp") + string(APPEND LAPACK_armpl_LIB "_mp") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${LAPACK_armpl_LIB}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + + # FLAME's blis library? (https://github.com/flame/blis) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All")) + if(_lapack_sizeof_integer EQUAL 8) + if(BLA_VENDOR STREQUAL "FLAME") + message(FATAL_ERROR "libFLAME does not support Int64 type") + endif() + else() + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "flame" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # AOCL? (https://developer.amd.com/amd-aocl/) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "AOCL" OR BLA_VENDOR STREQUAL "All")) + if(_lapack_sizeof_integer EQUAL 8) + set(_lapack_aocl_subdir "ILP64") + else() + set(_lapack_aocl_subdir "LP64") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "flame" + "-fopenmp" + "" + "${_lapack_aocl_subdir}" + "${BLAS_LIBRARIES}" + ) + unset(_lapack_aocl_subdir) + endif() + + # LAPACK in SCSL library? (SGI/Cray Scientific Library) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "SCSL" OR BLA_VENDOR STREQUAL "All")) + set(_lapack_scsl_lib "scs") + + if(_lapack_sizeof_integer EQUAL 8) + string(APPEND _lapack_scsl_lib "_i8") + endif() + # Check for OpenMP support, VIA BLA_VENDOR of scs_mp + if(BLA_VENDOR MATCHES "_mp") + string(APPEND _lapack_scsl_lib "_mp") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_scsl_lib}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + unset(_lapack_scsl_lib) + endif() + + # BLAS in acml library? + if(BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All") + if(BLAS_LIBRARIES MATCHES ".+acml.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + endif() + endif() + + # Apple LAPACK library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All")) + if(_lapack_sizeof_integer EQUAL 8) + if(BLA_VENDOR STREQUAL "Apple") + message(FATAL_ERROR "Accelerate Framework does not support Int64 type") + endif() + else() + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "Accelerate" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Apple NAS (vecLib) library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All")) + if(_lapack_sizeof_integer EQUAL 8) + if(BLA_VENDOR STREQUAL "NAS") + message(FATAL_ERROR "Accelerate Framework does not support Int64 type") + endif() + else() + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "vecLib" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Elbrus Math Library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "EML" OR BLA_VENDOR STREQUAL "All")) + if(BLAS_LIBRARIES MATCHES "eml.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + endif() + endif() + + # Fujitsu SSL2 Library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "Fujitsu_SSL2" OR BLA_VENDOR STREQUAL "All")) + if(BLAS_LIBRARIES MATCHES "fjlapack.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS}) + endif() + endif() + + # LAPACK in IBM ESSL library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "IBMESSL" OR BLA_VENDOR STREQUAL "All")) + if(BLAS_LIBRARIES MATCHES "essl.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + endif() + endif() + + # NVHPC Library? + + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "NVHPC" OR BLA_VENDOR STREQUAL "All")) + set(_lapack_nvhpc_lib "lapack") + + if(_lapack_sizeof_integer EQUAL 8) + string(APPEND _lapack_nvhpc_lib "_ilp64") + elseif(_lapack_sizeof_integer EQUAL 4) + string(APPEND _lapack_nvhpc_lib "_lp64") + endif() + set(_lapack_nvhpc_flags) + if(";${CMAKE_C_COMPILER_ID};${CMAKE_CXX_COMPILER_ID};${CMAKE_Fortran_COMPILER_ID};" MATCHES ";(NVHPC|PGI);") + set(_lapack_nvhpc_flags "-fortranlibs") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_nvhpc_lib}" + "${_lapack_nvhpc_flags}" + "" + "" + "${BLAS_LIBRARIES}" + ) + + # an additional check for NVHPC 2020 + # which does not have differentiation + # between lp64 and ilp64 modes + if(NOT LAPACK_LIBRARIES AND NOT _lapack_sizeof_integer EQUAL 8) + set(_lapack_nvhpc_lib "lapack") + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_nvhpc_lib}" + "${_lapack_nvhpc_flags}" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + + unset(_lapack_nvhpc_lib) + unset(_lapack_nvhpc_flags) + endif() + + # Generic LAPACK library? + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR STREQUAL "Generic" + OR BLA_VENDOR STREQUAL "ATLAS" + OR BLA_VENDOR STREQUAL "All")) + set(_lapack_generic_lib "lapack") + if(BLA_STATIC) + # We do not know for sure how the LAPACK reference implementation + # is built on this host. Guess typical dependencies. + set(_lapack_generic_deps "-lgfortran;-lm") + else() + set(_lapack_generic_deps "") + endif() + + if(_lapack_sizeof_integer EQUAL 8) + string(APPEND _lapack_generic_lib "64") + endif() + + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${_lapack_generic_lib}" + "${_lapack_generic_deps}" + "" + "" + "${BLAS_LIBRARIES}" + ) + + unset(_lapack_generic_deps) + unset(_lapack_generic_lib) + endif() +endif() + +if(BLA_F95) + set(LAPACK_LIBRARIES "${LAPACK95_LIBRARIES}") +endif() + +if(LAPACK_NOT_FOUND_MESSAGE) + set(LAPACK_NOT_FOUND_MESSAGE + REASON_FAILURE_MESSAGE ${LAPACK_NOT_FOUND_MESSAGE}) +endif() +find_package_handle_standard_args(LAPACK REQUIRED_VARS ${_lapack_fphsa_req_var} + ${LAPACK_NOT_FOUND_MESSAGE}) +unset(LAPACK_NOT_FOUND_MESSAGE) + +if(BLA_F95) + set(LAPACK95_FOUND ${LAPACK_FOUND}) +endif() + +# On compilers that implicitly link LAPACK (such as ftn, cc, and CC on Cray HPC machines) +# we used a placeholder for empty LAPACK_LIBRARIES to get through our logic above. +if(LAPACK_LIBRARIES STREQUAL "LAPACK_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + set(LAPACK_LIBRARIES "") +endif() + +_add_lapack_target() +unset(_lapack_fphsa_req_var) +unset(_lapack_sizeof_integer) +unset(_LAPACK_LIBRARIES) diff --git a/cmake/FindLAPACKE.cmake b/cmake/FindLAPACKE.cmake new file mode 100644 index 0000000..99da4e2 --- /dev/null +++ b/cmake/FindLAPACKE.cmake @@ -0,0 +1,389 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find LAPACKE include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(LAPACKE +# [REQUIRED] # Fail with error if lapacke is not found +# [COMPONENTS ...] # dependencies +# ) +# +# LAPACKE depends on the following libraries: +# - LAPACK +# +# This module finds headers and lapacke library. +# Results are reported in variables: +# LAPACKE_FOUND - True if headers and requested libraries were found +# LAPACKE_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# LAPACKE_INCLUDE_DIRS - lapacke include directories +# LAPACKE_LIBRARY_DIRS - Link directories for lapacke libraries +# LAPACKE_LIBRARIES - lapacke component libraries to be linked +# LAPACKE_INCLUDE_DIRS_DEP - lapacke + dependencies include directories +# LAPACKE_LIBRARY_DIRS_DEP - lapacke + dependencies link directories +# LAPACKE_LIBRARIES_DEP - lapacke libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DLAPACKE_DIR=path/to/lapacke): +# LAPACKE_DIR - Where to find the base directory of lapacke +# LAPACKE_INCDIR - Where to find the header files +# LAPACKE_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: LAPACKE_DIR, LAPACKE_INCDIR, LAPACKE_LIBDIR +# +# LAPACKE could be directly embedded in LAPACK library (ex: Intel MKL) so that +# we test a lapacke function with the lapack libraries found and set LAPACKE +# variables to LAPACK ones if test is successful. To skip this feature and +# look for a stand alone lapacke, please add the following in your +# CMakeLists.txt before to call find_package(LAPACKE): +# set(LAPACKE_STANDALONE TRUE) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +macro(Find_Lapacke_Header _header_name _extra_paths) + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_LAPACKE_DIR "$ENV{LAPACKE_DIR}") + set(ENV_LAPACKE_INCDIR "$ENV{LAPACKE_INCDIR}") + if(ENV_LAPACKE_INCDIR) + list(APPEND _inc_env "${ENV_LAPACKE_INCDIR}") + elseif(ENV_LAPACKE_DIR) + list(APPEND _inc_env "${ENV_LAPACKE_DIR}") + list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include") + list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include/lapacke") + else() + # system variables + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${_extra_paths}") + list(APPEND _inc_env "${_extra_paths}/include") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + # Try to find the lapacke header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(LAPACKE_INCDIR) + set(LAPACKE_${_header_name}_DIRS "LAPACKE_${_header_name}_DIRS-NOTFOUND") + find_path(LAPACKE_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${LAPACKE_INCDIR}) + else() + if(LAPACKE_DIR) + set(LAPACKE_${_header_name}_DIRS "LAPACKE_${_header_name}_DIRS-NOTFOUND") + find_path(LAPACKE_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${LAPACKE_DIR} + PATH_SUFFIXES "include" "include/lapacke") + else() + set(LAPACKE_${_header_name}_DIRS "LAPACKE_${_header_name}_DIRS-NOTFOUND") + find_path(LAPACKE_${_header_name}_DIRS + NAMES ${_header_name} + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(LAPACKE_${_header_name}_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (LAPACKE_${_header_name}_DIRS) + set(LAPACKE_INCLUDE_DIRS "${LAPACKE_${_header_name}_DIRS}") + else () + set(LAPACKE_INCLUDE_DIRS "LAPACKE_INCLUDE_DIRS-NOTFOUND") + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke -- ${_header_name} not found") + endif() + endif() +endmacro() + + + +if (NOT LAPACKE_FOUND) + set(LAPACKE_DIR "" CACHE PATH "Installation directory of LAPACKE library") + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "A cache variable, namely LAPACKE_DIR, has been set to specify the install directory of LAPACKE") + endif() +endif() + +# LAPACKE depends on LAPACK anyway, try to find it +if (NOT LAPACK_FOUND) + if(LAPACKE_FIND_REQUIRED) + find_package(LAPACKEXT REQUIRED) + else() + find_package(LAPACKEXT) + endif() +endif() + +# LAPACKE depends on LAPACK +if (LAPACK_FOUND) + # find header + Find_Lapacke_Header( "lapacke.h" "${LAPACK_DIR_FOUND}" ) + + if (NOT LAPACKE_STANDALONE) + # check if a lapacke function exists in the LAPACK lib + include(CheckFunctionExists) + set(CMAKE_REQUIRED_LIBRARIES "${LAPACK_LINKER_FLAGS};${LAPACK_LIBRARIES}") + unset(LAPACKE_WORKS CACHE) + check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) + mark_as_advanced(LAPACKE_WORKS) + set(CMAKE_REQUIRED_LIBRARIES) + + if(LAPACKE_WORKS) + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke: test with lapack succeeds") + endif() + # test succeeds: LAPACKE is in LAPACK + set(LAPACKE_LIBRARIES "${LAPACK_LIBRARIES}") + if (LAPACK_LIBRARY_DIRS) + set(LAPACKE_LIBRARY_DIRS "${LAPACK_LIBRARY_DIRS}") + endif() + if(LAPACK_INCLUDE_DIRS) + set(LAPACKE_INCLUDE_DIRS "${LAPACK_INCLUDE_DIRS}") + endif() + if (LAPACK_LINKER_FLAGS) + set(LAPACKE_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}") + endif() + endif() + endif (NOT LAPACKE_STANDALONE) + + if (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) + + if(NOT LAPACKE_WORKS AND NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke : test with lapack fails") + endif() + # test fails: try to find LAPACKE lib exterior to LAPACK + + # Try to find LAPACKE lib + ####################### + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_LAPACKE_LIBDIR "$ENV{LAPACKE_LIBDIR}") + if(ENV_LAPACKE_LIBDIR) + list(APPEND _lib_env "${ENV_LAPACKE_LIBDIR}") + elseif(ENV_LAPACKE_DIR) + list(APPEND _lib_env "${ENV_LAPACKE_DIR}") + list(APPEND _lib_env "${ENV_LAPACKE_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the lapacke lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(LAPACKE_LIBDIR) + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${LAPACKE_LIBDIR}) + else() + if(LAPACKE_DIR) + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${LAPACKE_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${_lib_env}) + endif() + endif() + mark_as_advanced(LAPACKE_lapacke_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (LAPACKE_lapacke_LIBRARY) + get_filename_component(lapacke_lib_path "${LAPACKE_lapacke_LIBRARY}" PATH) + # set cmake variables + set(LAPACKE_LIBRARIES "${LAPACKE_lapacke_LIBRARY}") + set(LAPACKE_LIBRARY_DIRS "${lapacke_lib_path}") + else () + set(LAPACKE_LIBRARIES "LAPACKE_LIBRARIES-NOTFOUND") + set(LAPACKE_LIBRARY_DIRS "LAPACKE_LIBRARY_DIRS-NOTFOUND") + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke -- lib lapacke not found") + endif() + endif () + + # check a function to validate the find + if(LAPACKE_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # LAPACKE + if (LAPACKE_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${LAPACKE_INCLUDE_DIRS}") + endif() + if (LAPACKE_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${LAPACKE_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${LAPACKE_LIBRARIES}") + # LAPACK + if (LAPACK_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${LAPACK_INCLUDE_DIRS}") + endif() + if (LAPACK_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${LAPACK_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${LAPACK_LIBRARIES}") + if (LAPACK_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${LAPACK_LINKER_FLAGS}") + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # m + find_library(M_LIBRARY NAMES m HINTS ${_lib_env}) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(LAPACKE_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) + mark_as_advanced(LAPACKE_WORKS) + + if(LAPACKE_WORKS) + # save link with dependencies + set(LAPACKE_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(LAPACKE_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(LAPACKE_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(LAPACKE_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES LAPACKE_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES LAPACKE_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES LAPACKE_LINKER_FLAGS) + else() + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke: test of LAPACKE_dgeqrf with lapacke and lapack libraries fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(LAPACKE_LIBRARIES) + + endif (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) + +else(LAPACK_FOUND) + + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "LAPACKE requires LAPACK but LAPACK has not been found." + "Please look for LAPACK first.") + endif() + +endif(LAPACK_FOUND) + +if (LAPACKE_LIBRARIES) + list(GET LAPACKE_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(LAPACKE_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of LAPACKE library" FORCE) + else() + set(LAPACKE_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of LAPACKE library" FORCE) + endif() +endif() +mark_as_advanced(LAPACKE_DIR) +mark_as_advanced(LAPACKE_DIR_FOUND) + +# check that LAPACKE has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LAPACKE DEFAULT_MSG + LAPACKE_LIBRARIES + LAPACKE_WORKS) diff --git a/cmake/FindLAPACKEXT.cmake b/cmake/FindLAPACKEXT.cmake new file mode 100644 index 0000000..034d3a7 --- /dev/null +++ b/cmake/FindLAPACKEXT.cmake @@ -0,0 +1,352 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find LAPACK EXTENDED for ECRC projects: find include dirs and libraries +# +# This module allows to find LAPACK libraries by calling the official FindLAPACK module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential LAPACK or a multihreaded (LAPACK_SEQ_LIBRARIES and LAPACK_PAR_LIBRARIES). +# LAPACK is detected with a FindLAPACK call and if the BLAS vendor is in the following list, +# Intel mkl, ACML then the module tries find the corresponding multithreaded libraries +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# LAPACK_INCLUDE_DIRS - LAPACK include directories +# LAPACK_LIBRARY_DIRS - Link directories for LAPACK libraries +# LAPACK_SEQ_LIBRARIES - LAPACK component libraries to be linked (sequential) +# LAPACK_PAR_LIBRARIES - LAPACK component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +# macro to factorize this call +macro(find_package_lapack) + if(LAPACKEXT_FIND_REQUIRED) + if(LAPACKEXT_FIND_QUIETLY) + find_package(LAPACK REQUIRED QUIET) + else() + find_package(LAPACK REQUIRED) + endif() + else() + if(LAPACKEXT_FIND_QUIETLY) + find_package(LAPACK QUIET) + else() + find_package(LAPACK) + endif() + endif() +endmacro() + +# LAPACKEXT depends on BLASEXT +# call our extended module for BLAS +#---------------------------------- +if (NOT BLAS_FOUND) + if(LAPACKEXT_FIND_REQUIRED) + if(LAPACKEXT_FIND_QUIETLY) + find_package(BLASEXT REQUIRED QUIET) + else() + find_package(BLASEXT REQUIRED) + endif() + else() + if(LAPACKEXT_FIND_QUIETLY) + find_package(BLASEXT QUIET) + else() + find_package(BLASEXT) + endif() + endif() +endif () + +if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "In FindLAPACKEXT") +endif() + +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the LAPACK vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + if (BLAS_DIR) + set(LAPACK_DIR ${BLAS_DIR}) + endif () + if (BLAS_INCDIR) + set(LAPACK_INCDIR ${BLAS_INCDIR}) + endif () + # find mkl.h inside known include paths + set(LAPACK_mkl_lapack.h_INCLUDE_DIRS "LAPACK_mkl_lapack.h_INCLUDE_DIRS-NOTFOUND") + if(LAPACK_INCDIR) + find_path(LAPACK_mkl_lapack.h_INCLUDE_DIRS + NAMES mkl_lapack.h + HINTS ${LAPACK_INCDIR}) + else() + if(LAPACK_DIR) + find_path(LAPACK_mkl_lapack.h_INCLUDE_DIRS + NAMES mkl_lapack.h + HINTS ${LAPACK_DIR} + PATH_SUFFIXES include) + else() + find_path(LAPACK_mkl_lapack.h_INCLUDE_DIRS + NAMES mkl_lapack.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(LAPACK_mkl_lapack.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT LAPACK_mkl_lapack.h_INCLUDE_DIRS) + # Print_Find_Header_Status(lapack mkl_lapack.h) + #endif () + set(LAPACK_INCLUDE_DIRS "") + if(LAPACK_mkl_lapack.h_INCLUDE_DIRS) + list(APPEND LAPACK_INCLUDE_DIRS "${LAPACK_mkl_lapack.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + + if (BLA_VENDOR MATCHES "Intel10_64lp*") + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + endif() + find_package_lapack() + + if (LAPACK_FOUND) + if(BLAS_SEQ_LIBRARIES) + set(LAPACK_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES}") + else() + set(LAPACK_SEQ_LIBRARIES "${LAPACK_SEQ_LIBRARIES-NOTFOUND}") + endif() + # if BLAS Intel 10 64 bit -> save sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(BLAS_PAR_LIBRARIES) + set(LAPACK_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES}") + else() + set(LAPACK_PAR_LIBRARIES "${LAPACK_PAR_LIBRARIES-NOTFOUND}") + endif() + endif() + endif() + +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_lapack() + + if (LAPACK_FOUND) + if(LAPACK_LIBRARIES) + set(LAPACK_SEQ_LIBRARIES "${LAPACK_LIBRARIES}") + else() + set(LAPACK_SEQ_LIBRARIES "${LAPACK_SEQ_LIBRARIES-NOTFOUND}") + endif() + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_lapack() + + if (LAPACK_FOUND) + if(LAPACK_LIBRARIES) + set(LAPACK_PAR_LIBRARIES "${LAPACK_LIBRARIES}") + else() + set(LAPACK_PAR_LIBRARIES "${LAPACK_PAR_LIBRARIES-NOTFOUND}") + endif() + endif() + +elseif(BLA_VENDOR MATCHES "ACML*") + + ### + # look for libs + ### + find_package_lapack() + + if (LAPACK_FOUND) + if(BLAS_SEQ_LIBRARIES) + set(LAPACK_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES}") + else() + set(LAPACK_SEQ_LIBRARIES "${LAPACK_SEQ_LIBRARIES-NOTFOUND}") + endif() + if(BLAS_PAR_LIBRARIES) + set(LAPACK_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES}") + else() + set(LAPACK_PAR_LIBRARIES "${LAPACK_PAR_LIBRARIES-NOTFOUND}") + endif() + endif() + +else() + + ## look for a sequential version + # call to the cmake official FindLAPACK module + # This module sets the following variables: + # LAPACK_FOUND - set to true if a library implementing the LAPACK interface + # is found + # LAPACK_LINKER_FLAGS - uncached list of required linker flags (excluding -l + # and -L). + # LAPACK_LIBRARIES - uncached list of libraries (using full path name) to + # link against to use LAPACK + # LAPACK95_LIBRARIES - uncached list of libraries (using full path name) + # to link against to use LAPACK95 interface + # LAPACK95_FOUND - set to true if a library implementing the LAPACK f95 interface + # is found + # BLA_STATIC if set on this determines what kind of linkage we do (static) + # BLA_VENDOR if set checks only the specified vendor, if not set checks + # all the possibilities + # BLA_F95 if set on tries to find the f95 interfaces for LAPACK/LAPACK + # Remark: it looks only into paths contained in the system environment variables + find_package_lapack() + + if(LAPACK_FOUND) + set(LAPACK_SEQ_LIBRARIES "${LAPACK_LIBRARIES}") + else() + set(LAPACK_SEQ_LIBRARIES "${LAPACK_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + +if (LAPACK_SEQ_LIBRARIES) + set(LAPACK_LIBRARIES "${LAPACK_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(LAPACK) +set(LAPACK_LIBRARY_DIRS "") +string(REPLACE " " ";" LAPACK_LIBRARIES "${LAPACK_LIBRARIES}") +foreach(lapack_lib ${LAPACK_LIBRARIES}) + if (EXISTS "${lapack_lib}") + get_filename_component(a_lapack_lib_dir "${lapack_lib}" PATH) + list(APPEND LAPACK_LIBRARY_DIRS "${a_lapack_lib_dir}" ) + else() + string(REPLACE "-L" "" lapack_lib "${lapack_lib}") + if (EXISTS "${lapack_lib}") + list(APPEND LAPACK_LIBRARY_DIRS "${lapack_lib}" ) + else() + get_filename_component(a_lapack_lib_dir "${lapack_lib}" PATH) + if (EXISTS "${a_lapack_lib_dir}") + list(APPEND LAPACK_LIBRARY_DIRS "${a_lapack_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (LAPACK_LIBRARY_DIRS) + list(REMOVE_DUPLICATES LAPACK_LIBRARY_DIRS) +endif () + +# check that LAPACK has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel" + "\n (see LAPACK_SEQ_LIBRARIES and LAPACK_PAR_LIBRARIES)") + message(STATUS "LAPACK sequential libraries stored in LAPACK_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_SEQ_LIBRARIES + LAPACK_LIBRARY_DIRS + LAPACK_INCLUDE_DIRS) + if(LAPACK_PAR_LIBRARIES) + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK parallel libraries stored in LAPACK_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_PAR_LIBRARIES) + endif() + + else() + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK sequential libraries stored in LAPACK_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_SEQ_LIBRARIES + LAPACK_LIBRARY_DIRS + LAPACK_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel" + "\n (see LAPACK_SEQ_LIBRARIES and LAPACK_PAR_LIBRARIES)") + message(STATUS "LAPACK sequential libraries stored in LAPACK_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_SEQ_LIBRARIES + LAPACK_LIBRARY_DIRS) + if(LAPACK_PAR_LIBRARIES) + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK parallel libraries stored in LAPACK_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK found is IBMESSL:" + "\n we manage two lists of libs, one sequential and one parallel" + "\n (see LAPACK_SEQ_LIBRARIES and LAPACK_PAR_LIBRARIES)") + message(STATUS "LAPACK sequential libraries stored in LAPACK_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_SEQ_LIBRARIES + LAPACK_LIBRARY_DIRS) + if(LAPACK_PAR_LIBRARIES) + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK parallel libraries stored in LAPACK_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_PAR_LIBRARIES) + endif() +else() + if(NOT LAPACKEXT_FIND_QUIETLY) + message(STATUS "LAPACK sequential libraries stored in LAPACK_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(LAPACK DEFAULT_MSG + LAPACK_SEQ_LIBRARIES + LAPACK_LIBRARY_DIRS) +endif() diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake new file mode 100644 index 0000000..926bd8b --- /dev/null +++ b/cmake/FindPackageHandleStandardArgs.cmake @@ -0,0 +1,605 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindPackageHandleStandardArgs +----------------------------- + +This module provides functions intended to be used in :ref:`Find Modules` +implementing :command:`find_package()` calls. + +.. command:: find_package_handle_standard_args + + This command handles the ``REQUIRED``, ``QUIET`` and version-related + arguments of :command:`find_package`. It also sets the + ``_FOUND`` variable. The package is considered found if all + variables listed contain valid results, e.g. valid filepaths. + + There are two signatures: + + .. code-block:: cmake + + find_package_handle_standard_args( + (DEFAULT_MSG|) + ... + ) + + find_package_handle_standard_args( + [FOUND_VAR ] + [REQUIRED_VARS ...] + [VERSION_VAR ] + [HANDLE_VERSION_RANGE] + [HANDLE_COMPONENTS] + [CONFIG_MODE] + [NAME_MISMATCHED] + [REASON_FAILURE_MESSAGE ] + [FAIL_MESSAGE ] + ) + + The ``_FOUND`` variable will be set to ``TRUE`` if all + the variables ``...`` are valid and any optional + constraints are satisfied, and ``FALSE`` otherwise. A success or + failure message may be displayed based on the results and on + whether the ``REQUIRED`` and/or ``QUIET`` option was given to + the :command:`find_package` call. + + The options are: + + ``(DEFAULT_MSG|)`` + In the simple signature this specifies the failure message. + Use ``DEFAULT_MSG`` to ask for a default message to be computed + (recommended). Not valid in the full signature. + + ``FOUND_VAR `` + .. deprecated:: 3.3 + + Specifies either ``_FOUND`` or + ``_FOUND`` as the result variable. This exists only + for compatibility with older versions of CMake and is now ignored. + Result variables of both names are always set for compatibility. + + ``REQUIRED_VARS ...`` + Specify the variables which are required for this package. + These may be named in the generated failure message asking the + user to set the missing variable values. Therefore these should + typically be cache entries such as ``FOO_LIBRARY`` and not output + variables like ``FOO_LIBRARIES``. + + .. versionchanged:: 3.18 + If ``HANDLE_COMPONENTS`` is specified, this option can be omitted. + + ``VERSION_VAR `` + Specify the name of a variable that holds the version of the package + that has been found. This version will be checked against the + (potentially) specified required version given to the + :command:`find_package` call, including its ``EXACT`` option. + The default messages include information about the required + version and the version which has been actually found, both + if the version is ok or not. + + ``HANDLE_VERSION_RANGE`` + .. versionadded:: 3.19 + + Enable handling of a version range, if one is specified. Without this + option, a developer warning will be displayed if a version range is + specified. + + ``HANDLE_COMPONENTS`` + Enable handling of package components. In this case, the command + will report which components have been found and which are missing, + and the ``_FOUND`` variable will be set to ``FALSE`` + if any of the required components (i.e. not the ones listed after + the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are + missing. + + ``CONFIG_MODE`` + Specify that the calling find module is a wrapper around a + call to ``find_package( NO_MODULE)``. This implies + a ``VERSION_VAR`` value of ``_VERSION``. The command + will automatically check whether the package configuration file + was found. + + ``REASON_FAILURE_MESSAGE `` + .. versionadded:: 3.16 + + Specify a custom message of the reason for the failure which will be + appended to the default generated message. + + ``FAIL_MESSAGE `` + Specify a custom failure message instead of using the default + generated message. Not recommended. + + ``NAME_MISMATCHED`` + .. versionadded:: 3.17 + + Indicate that the ```` does not match + ``${CMAKE_FIND_PACKAGE_NAME}``. This is usually a mistake and raises a + warning, but it may be intentional for usage of the command for components + of a larger package. + +Example for the simple signature: + +.. code-block:: cmake + + find_package_handle_standard_args(LibXml2 DEFAULT_MSG + LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR) + +The ``LibXml2`` package is considered to be found if both +``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid. +Then also ``LibXml2_FOUND`` is set to ``TRUE``. If it is not found +and ``REQUIRED`` was used, it fails with a +:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was +used or not. If it is found, success will be reported, including +the content of the first ````. On repeated CMake runs, +the same message will not be printed again. + +.. note:: + + If ```` does not match ``CMAKE_FIND_PACKAGE_NAME`` for the + calling module, a warning that there is a mismatch is given. The + ``FPHSA_NAME_MISMATCHED`` variable may be set to bypass the warning if using + the old signature and the ``NAME_MISMATCHED`` argument using the new + signature. To avoid forcing the caller to require newer versions of CMake for + usage, the variable's value will be used if defined when the + ``NAME_MISMATCHED`` argument is not passed for the new signature (but using + both is an error).. + +Example for the full signature: + +.. code-block:: cmake + + find_package_handle_standard_args(LibArchive + REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR + VERSION_VAR LibArchive_VERSION) + +In this case, the ``LibArchive`` package is considered to be found if +both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid. +Also the version of ``LibArchive`` will be checked by using the version +contained in ``LibArchive_VERSION``. Since no ``FAIL_MESSAGE`` is given, +the default messages will be printed. + +Another example for the full signature: + +.. code-block:: cmake + + find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4) + find_package_handle_standard_args(Automoc4 CONFIG_MODE) + +In this case, a ``FindAutmoc4.cmake`` module wraps a call to +``find_package(Automoc4 NO_MODULE)`` and adds an additional search +directory for ``automoc4``. Then the call to +``find_package_handle_standard_args`` produces a proper success/failure +message. + +.. command:: find_package_check_version + + .. versionadded:: 3.19 + + Helper function which can be used to check if a ```` is valid + against version-related arguments of :command:`find_package`. + + .. code-block:: cmake + + find_package_check_version( + [HANDLE_VERSION_RANGE] + [RESULT_MESSAGE_VARIABLE ] + ) + + The ```` will hold a boolean value giving the result of the check. + + The options are: + + ``HANDLE_VERSION_RANGE`` + Enable handling of a version range, if one is specified. Without this + option, a developer warning will be displayed if a version range is + specified. + + ``RESULT_MESSAGE_VARIABLE `` + Specify a variable to get back a message describing the result of the check. + +Example for the usage: + +.. code-block:: cmake + + find_package_check_version(1.2.3 result HANDLE_VERSION_RANGE + RESULT_MESSAGE_VARIABLE reason) + if (result) + message (STATUS "${reason}") + else() + message (FATAL_ERROR "${reason}") + endif() +#]=======================================================================] + +include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake) + + +cmake_policy(PUSH) +# numbers and boolean constants +cmake_policy (SET CMP0012 NEW) +# IN_LIST operator +cmake_policy (SET CMP0057 NEW) + + +# internal helper macro +macro(_FPHSA_FAILURE_MESSAGE _msg) + set (__msg "${_msg}") + if (FPHSA_REASON_FAILURE_MESSAGE) + string(APPEND __msg "\n Reason given by package: ${FPHSA_REASON_FAILURE_MESSAGE}\n") + endif() + if (${_NAME}_FIND_REQUIRED) + message(FATAL_ERROR "${__msg}") + else () + if (NOT ${_NAME}_FIND_QUIETLY) + message(STATUS "${__msg}") + endif () + endif () +endmacro() + + +# internal helper macro to generate the failure message when used in CONFIG_MODE: +macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE) + # _CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found: + if(${_NAME}_CONFIG) + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})") + else() + # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version. + # List them all in the error message: + if(${_NAME}_CONSIDERED_CONFIGS) + set(configsText "") + list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount) + math(EXPR configsCount "${configsCount} - 1") + foreach(currentConfigIndex RANGE ${configsCount}) + list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename) + list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version) + string(APPEND configsText "\n ${filename} (version ${version})") + endforeach() + if (${_NAME}_NOT_FOUND_MESSAGE) + if (FPHSA_REASON_FAILURE_MESSAGE) + string(PREPEND FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}\n ") + else() + set(FPHSA_REASON_FAILURE_MESSAGE "${${_NAME}_NOT_FOUND_MESSAGE}") + endif() + else() + string(APPEND configsText "\n") + endif() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:${configsText}") + + else() + # Simple case: No Config-file was found at all: + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}") + endif() + endif() +endmacro() + + +function(FIND_PACKAGE_CHECK_VERSION version result) + cmake_parse_arguments (PARSE_ARGV 2 FPCV "HANDLE_VERSION_RANGE;NO_AUTHOR_WARNING_VERSION_RANGE" "RESULT_MESSAGE_VARIABLE" "") + + if (FPCV_UNPARSED_ARGUMENTS) + message (FATAL_ERROR "find_package_check_version(): ${FPCV_UNPARSED_ARGUMENTS}: unexpected arguments") + endif() + if ("RESULT_MESSAGE_VARIABLE" IN_LIST FPCV_KEYWORDS_MISSING_VALUES) + message (FATAL_ERROR "find_package_check_version(): RESULT_MESSAGE_VARIABLE expects an argument") + endif() + + set (${result} FALSE PARENT_SCOPE) + if (FPCV_RESULT_MESSAGE_VARIABLE) + unset (${FPCV_RESULT_MESSAGE_VARIABLE} PARENT_SCOPE) + endif() + + if (_CMAKE_FPHSA_PACKAGE_NAME) + set (package "${_CMAKE_FPHSA_PACKAGE_NAME}") + elseif (CMAKE_FIND_PACKAGE_NAME) + set (package "${CMAKE_FIND_PACKAGE_NAME}") + else() + message (FATAL_ERROR "find_package_check_version(): Cannot be used outside a 'Find Module'") + endif() + + if (NOT FPCV_NO_AUTHOR_WARNING_VERSION_RANGE + AND ${package}_FIND_VERSION_RANGE AND NOT FPCV_HANDLE_VERSION_RANGE) + message(AUTHOR_WARNING + "`find_package()` specify a version range but the option " + "HANDLE_VERSION_RANGE` is not passed to `find_package_check_version()`. " + "Only the lower endpoint of the range will be used.") + endif() + + + set (version_ok FALSE) + unset (version_msg) + + if (FPCV_HANDLE_VERSION_RANGE AND ${package}_FIND_VERSION_RANGE) + if ((${package}_FIND_VERSION_RANGE_MIN STREQUAL "INCLUDE" + AND version VERSION_GREATER_EQUAL ${package}_FIND_VERSION_MIN) + AND ((${package}_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" + AND version VERSION_LESS_EQUAL ${package}_FIND_VERSION_MAX) + OR (${package}_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" + AND version VERSION_LESS ${package}_FIND_VERSION_MAX))) + set (version_ok TRUE) + set(version_msg "(found suitable version \"${version}\", required range is \"${${package}_FIND_VERSION_RANGE}\")") + else() + set(version_msg "Found unsuitable version \"${version}\", required range is \"${${package}_FIND_VERSION_RANGE}\"") + endif() + elseif (DEFINED ${package}_FIND_VERSION) + if(${package}_FIND_VERSION_EXACT) # exact version required + # count the dots in the version string + string(REGEX REPLACE "[^.]" "" version_dots "${version}") + # add one dot because there is one dot more than there are components + string(LENGTH "${version_dots}." version_dots) + if (version_dots GREATER ${package}_FIND_VERSION_COUNT) + # Because of the C++ implementation of find_package() ${package}_FIND_VERSION_COUNT + # is at most 4 here. Therefore a simple lookup table is used. + if (${package}_FIND_VERSION_COUNT EQUAL 1) + set(version_regex "[^.]*") + elseif (${package}_FIND_VERSION_COUNT EQUAL 2) + set(version_regex "[^.]*\\.[^.]*") + elseif (${package}_FIND_VERSION_COUNT EQUAL 3) + set(version_regex "[^.]*\\.[^.]*\\.[^.]*") + else() + set(version_regex "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*") + endif() + string(REGEX REPLACE "^(${version_regex})\\..*" "\\1" version_head "${version}") + if (NOT ${package}_FIND_VERSION VERSION_EQUAL version_head) + set(version_msg "Found unsuitable version \"${version}\", but required is exact version \"${${package}_FIND_VERSION}\"") + else () + set(version_ok TRUE) + set(version_msg "(found suitable exact version \"${_FOUND_VERSION}\")") + endif () + else () + if (NOT ${package}_FIND_VERSION VERSION_EQUAL version) + set(version_msg "Found unsuitable version \"${version}\", but required is exact version \"${${package}_FIND_VERSION}\"") + else () + set(version_ok TRUE) + set(version_msg "(found suitable exact version \"${version}\")") + endif () + endif () + else() # minimum version + if (${package}_FIND_VERSION VERSION_GREATER version) + set(version_msg "Found unsuitable version \"${version}\", but required is at least \"${${package}_FIND_VERSION}\"") + else() + set(version_ok TRUE) + set(version_msg "(found suitable version \"${version}\", minimum required is \"${${package}_FIND_VERSION}\")") + endif() + endif() + else () + set(version_ok TRUE) + set(version_msg "(found version \"${version}\")") + endif() + + set (${result} ${version_ok} PARENT_SCOPE) + if (FPCV_RESULT_MESSAGE_VARIABLE) + set (${FPCV_RESULT_MESSAGE_VARIABLE} "${version_msg}" PARENT_SCOPE) + endif() +endfunction() + + +function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG) + + # Set up the arguments for `cmake_parse_arguments`. + set(options CONFIG_MODE HANDLE_COMPONENTS NAME_MISMATCHED HANDLE_VERSION_RANGE) + set(oneValueArgs FAIL_MESSAGE REASON_FAILURE_MESSAGE VERSION_VAR FOUND_VAR) + set(multiValueArgs REQUIRED_VARS) + + # Check whether we are in 'simple' or 'extended' mode: + set(_KEYWORDS_FOR_EXTENDED_MODE ${options} ${oneValueArgs} ${multiValueArgs} ) + list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX) + + unset(FPHSA_NAME_MISMATCHED_override) + if (DEFINED FPHSA_NAME_MISMATCHED) + # If the variable NAME_MISMATCHED variable is set, error if it is passed as + # an argument. The former is for old signatures, the latter is for new + # signatures. + list(FIND ARGN "NAME_MISMATCHED" name_mismatched_idx) + if (NOT name_mismatched_idx EQUAL "-1") + message(FATAL_ERROR + "The `NAME_MISMATCHED` argument may only be specified by the argument or " + "the variable, not both.") + endif () + + # But use the variable if it is not an argument to avoid forcing minimum + # CMake version bumps for calling modules. + set(FPHSA_NAME_MISMATCHED_override "${FPHSA_NAME_MISMATCHED}") + endif () + + if(${INDEX} EQUAL -1) + set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG}) + set(FPHSA_REQUIRED_VARS ${ARGN}) + set(FPHSA_VERSION_VAR) + else() + cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}" ${_FIRST_ARG} ${ARGN}) + + if(FPHSA_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"") + endif() + + if(NOT FPHSA_FAIL_MESSAGE) + set(FPHSA_FAIL_MESSAGE "DEFAULT_MSG") + endif() + + # In config-mode, we rely on the variable _CONFIG, which is set by find_package() + # when it successfully found the config-file, including version checking: + if(FPHSA_CONFIG_MODE) + list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG) + list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS) + set(FPHSA_VERSION_VAR ${_NAME}_VERSION) + endif() + + if(NOT FPHSA_REQUIRED_VARS AND NOT FPHSA_HANDLE_COMPONENTS) + message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()") + endif() + endif() + + if (DEFINED FPHSA_NAME_MISMATCHED_override) + set(FPHSA_NAME_MISMATCHED "${FPHSA_NAME_MISMATCHED_override}") + endif () + + if (DEFINED CMAKE_FIND_PACKAGE_NAME + AND NOT FPHSA_NAME_MISMATCHED + AND NOT _NAME STREQUAL CMAKE_FIND_PACKAGE_NAME) + message(AUTHOR_WARNING + "The package name passed to `find_package_handle_standard_args` " + "(${_NAME}) does not match the name of the calling package " + "(${CMAKE_FIND_PACKAGE_NAME}). This can lead to problems in calling " + "code that expects `find_package` result variables (e.g., `_FOUND`) " + "to follow a certain pattern.") + endif () + + if (${_NAME}_FIND_VERSION_RANGE AND NOT FPHSA_HANDLE_VERSION_RANGE) + message(AUTHOR_WARNING + "`find_package()` specify a version range but the module ${_NAME} does " + "not support this capability. Only the lower endpoint of the range " + "will be used.") + endif() + + # to propagate package name to FIND_PACKAGE_CHECK_VERSION + set(_CMAKE_FPHSA_PACKAGE_NAME "${_NAME}") + + # now that we collected all arguments, process them + + if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG") + set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}") + endif() + + if (FPHSA_REQUIRED_VARS) + list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR) + endif() + + string(TOUPPER ${_NAME} _NAME_UPPER) + string(TOLOWER ${_NAME} _NAME_LOWER) + + if(FPHSA_FOUND_VAR) + set(_FOUND_VAR_UPPER ${_NAME_UPPER}_FOUND) + set(_FOUND_VAR_MIXED ${_NAME}_FOUND) + if(FPHSA_FOUND_VAR STREQUAL _FOUND_VAR_MIXED OR FPHSA_FOUND_VAR STREQUAL _FOUND_VAR_UPPER) + set(_FOUND_VAR ${FPHSA_FOUND_VAR}) + else() + message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_FOUND_VAR_MIXED}\" and \"${_FOUND_VAR_UPPER}\" are valid names.") + endif() + else() + set(_FOUND_VAR ${_NAME_UPPER}_FOUND) + endif() + + # collect all variables which were not found, so they can be printed, so the + # user knows better what went wrong (#6375) + set(MISSING_VARS "") + set(DETAILS "") + # check if all passed variables are valid + set(FPHSA_FOUND_${_NAME} TRUE) + foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS}) + if(NOT ${_CURRENT_VAR}) + set(FPHSA_FOUND_${_NAME} FALSE) + string(APPEND MISSING_VARS " ${_CURRENT_VAR}") + else() + string(APPEND DETAILS "[${${_CURRENT_VAR}}]") + endif() + endforeach() + if(FPHSA_FOUND_${_NAME}) + set(${_NAME}_FOUND TRUE) + set(${_NAME_UPPER}_FOUND TRUE) + else() + set(${_NAME}_FOUND FALSE) + set(${_NAME_UPPER}_FOUND FALSE) + endif() + + # component handling + unset(FOUND_COMPONENTS_MSG) + unset(MISSING_COMPONENTS_MSG) + + if(FPHSA_HANDLE_COMPONENTS) + foreach(comp ${${_NAME}_FIND_COMPONENTS}) + if(${_NAME}_${comp}_FOUND) + + if(NOT DEFINED FOUND_COMPONENTS_MSG) + set(FOUND_COMPONENTS_MSG "found components:") + endif() + string(APPEND FOUND_COMPONENTS_MSG " ${comp}") + + else() + + if(NOT DEFINED MISSING_COMPONENTS_MSG) + set(MISSING_COMPONENTS_MSG "missing components:") + endif() + string(APPEND MISSING_COMPONENTS_MSG " ${comp}") + + if(${_NAME}_FIND_REQUIRED_${comp}) + set(${_NAME}_FOUND FALSE) + string(APPEND MISSING_VARS " ${comp}") + endif() + + endif() + endforeach() + set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}") + string(APPEND DETAILS "[c${COMPONENT_MSG}]") + endif() + + # version handling: + set(VERSION_MSG "") + set(VERSION_OK TRUE) + + # check with DEFINED here as the requested or found version may be "0" + if (DEFINED ${_NAME}_FIND_VERSION) + if(DEFINED ${FPHSA_VERSION_VAR}) + set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}}) + if (FPHSA_HANDLE_VERSION_RANGE) + set (FPCV_HANDLE_VERSION_RANGE HANDLE_VERSION_RANGE) + else() + set(FPCV_HANDLE_VERSION_RANGE NO_AUTHOR_WARNING_VERSION_RANGE) + endif() + find_package_check_version ("${_FOUND_VERSION}" VERSION_OK RESULT_MESSAGE_VARIABLE VERSION_MSG + ${FPCV_HANDLE_VERSION_RANGE}) + else() + # if the package was not found, but a version was given, add that to the output: + if(${_NAME}_FIND_VERSION_EXACT) + set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")") + elseif (FPHSA_HANDLE_VERSION_RANGE AND ${_NAME}_FIND_VERSION_RANGE) + set(VERSION_MSG "(Required is version range \"${${_NAME}_FIND_VERSION_RANGE}\")") + else() + set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")") + endif() + endif() + else () + # Check with DEFINED as the found version may be 0. + if(DEFINED ${FPHSA_VERSION_VAR}) + set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")") + endif() + endif () + + if(VERSION_OK) + string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]") + else() + set(${_NAME}_FOUND FALSE) + endif() + + + # print the result: + if (${_NAME}_FOUND) + FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}") + else () + + if(FPHSA_CONFIG_MODE) + _FPHSA_HANDLE_FAILURE_CONFIG_MODE() + else() + if(NOT VERSION_OK) + set(RESULT_MSG) + if (_FIRST_REQUIRED_VAR) + string (APPEND RESULT_MSG "found ${${_FIRST_REQUIRED_VAR}}") + endif() + if (COMPONENT_MSG) + if (RESULT_MSG) + string (APPEND RESULT_MSG ", ") + endif() + string (APPEND RESULT_MSG "${FOUND_COMPONENTS_MSG}") + endif() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (${RESULT_MSG})") + else() + _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}") + endif() + endif() + + endif () + + set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) + set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE) +endfunction() + + +cmake_policy(POP) diff --git a/cmake/FindPackageMessage.cmake b/cmake/FindPackageMessage.cmake new file mode 100644 index 0000000..323cdf6 --- /dev/null +++ b/cmake/FindPackageMessage.cmake @@ -0,0 +1,48 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindPackageMessage +------------------ + +.. code-block:: cmake + + find_package_message( "message for user" "find result details") + +This function is intended to be used in FindXXX.cmake modules files. +It will print a message once for each unique find result. This is +useful for telling the user where a package was found. The first +argument specifies the name (XXX) of the package. The second argument +specifies the message to display. The third argument lists details +about the find result so that if they change the message will be +displayed again. The macro also obeys the QUIET argument to the +find_package command. + +Example: + +.. code-block:: cmake + + if(X11_FOUND) + find_package_message(X11 "Found X11: ${X11_X11_LIB}" + "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") + else() + ... + endif() +#]=======================================================================] + +function(find_package_message pkg msg details) + # Avoid printing a message repeatedly for the same find result. + if(NOT ${pkg}_FIND_QUIETLY) + string(REPLACE "\n" "" details "${details}") + set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) + if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}") + # The message has not yet been printed. + message(STATUS "${msg}") + + # Save the find details in the cache to avoid printing the same + # message again. + set("${DETAILS_VAR}" "${details}" + CACHE INTERNAL "Details about finding ${pkg}") + endif() + endif() +endfunction() diff --git a/cmake/FindPkgconfigLibrariesAbsolutePath.cmake b/cmake/FindPkgconfigLibrariesAbsolutePath.cmake new file mode 100644 index 0000000..74a6c3d --- /dev/null +++ b/cmake/FindPkgconfigLibrariesAbsolutePath.cmake @@ -0,0 +1,99 @@ +### +# +# @copyright (c) 2018 Inria. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# @file FindPkgconfigLibrariesAbsolutePath.cmake +# +# @project AL4SAN +# AL4SAN is a software package provided by: +# Inria Bordeaux - Sud-Ouest, +# Univ. of Tennessee, +# King Abdullah Univesity of Science and Technology +# Univ. of California Berkeley, +# Univ. of Colorado Denver. +# +# @version 1.0.0 +# @author Florent Pruvost +# @date 06-04-2018 +# +### + +# Transform relative path into absolute path for libraries found with the +# pkg_search_module cmake macro +# _prefix: the name of the CMake variable used when pkg_search_module was called +# e.g. for pkg_search_module(BLAS blas) _prefix would be BLAS +macro(FIND_PKGCONFIG_LIBRARIES_ABSOLUTE_PATH _prefix) + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + elseif(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # non static case + set(${_prefix}_LIBRARIES_COPY "${${_prefix}_LIBRARIES}") + set(${_prefix}_LIBRARIES "") + foreach(_library ${${_prefix}_LIBRARIES_COPY}) + if(EXISTS "${_library}") + list(APPEND ${_prefix}_LIBRARIES ${_library}) + else() + get_filename_component(_ext "${_library}" EXT) + set(_lib_extensions ".so" ".a" ".dyld" ".dll") + list(FIND _lib_extensions "${_ext}" _index) + if (${_index} GREATER -1) + get_filename_component(_library "${_library}" NAME_WE) + endif() + find_library(_library_path NAMES ${_library} + HINTS ${${_prefix}_LIBDIR} ${${_prefix}_LIBRARY_DIRS} ${_lib_env}) + if (_library_path) + list(APPEND ${_prefix}_LIBRARIES ${_library_path}) + else() + message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND") + endif() + unset(_library_path CACHE) + endif() + endforeach() + set (${_prefix}_LIBRARIES "${${_prefix}_LIBRARIES}" CACHE INTERNAL "" FORCE) + ## static case + #set(${_prefix}_STATIC_LIBRARIES_COPY "${${_prefix}_STATIC_LIBRARIES}") + #set(${_prefix}_STATIC_LIBRARIES "") + #foreach(_library ${${_prefix}_STATIC_LIBRARIES_COPY}) + # if(EXISTS "${_library}") + # list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library}) + # else() + # get_filename_component(_ext "${_library}" EXT) + # set(_lib_extensions ".so" ".a" ".dyld" ".dll") + # list(FIND _lib_extensions "${_ext}" _index) + # if (${_index} GREATER -1) + # get_filename_component(_library "${_library}" NAME_WE) + # endif() + # # try static first + # set (default_find_library_suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES}) + # set (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX}) + # find_library(_library_path NAMES ${_library} + # HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env}) + # set (CMAKE_FIND_LIBRARY_SUFFIXES ${default_find_library_suffixes}) + # # if not found try dynamic + # if (NOT _library_path) + # find_library(_library_path NAMES ${_library} + # HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env}) + # endif() + # if (_library_path) + # list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library_path}) + # else() + # message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND") + # endif() + # unset(_library_path CACHE) + # endif() + #endforeach() + #set (${_prefix}_STATIC_LIBRARIES "${${_prefix}_STATIC_LIBRARIES}" CACHE INTERNAL "" FORCE) +endmacro() + +## +## @end file FindPkgconfigLibrariesAbsolutePath.cmake +## diff --git a/cmake/FindSTARPU.cmake b/cmake/FindSTARPU.cmake new file mode 100644 index 0000000..2cfc39d --- /dev/null +++ b/cmake/FindSTARPU.cmake @@ -0,0 +1,952 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2015 Inria. All rights reserved. +# @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find STARPU include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(STARPU +# [version] [EXACT] # Minimum or EXACT version e.g. 1.1 +# [REQUIRED] # Fail with error if starpu is not found +# [COMPONENTS ...] # dependencies +# ) +# +# COMPONENTS are optional libraries STARPU could be linked with, +# Use it to drive detection of a specific compilation chain +# COMPONENTS can be some of the following: +# - HWLOC: to activate the detection of StarPU linked with HWLOC +# - CUDA: to activate the detection of StarPU linked with CUDA +# - MPI: to activate the detection of StarPU linked with MPI +# - BLAS: to activate the detection of StarPU linked with BLAS +# - MAGMA: to activate the detection of StarPU linked with MAGMA +# - FXT: to activate the detection of StarPU linked with FxT +# - SIMGRID: to activate the detection of StarPU linked with SimGrid +# +# Results are reported in variables: +# STARPU_FOUND - True if headers and requested libraries were found +# STARPU_C_FLAGS - list of required compilation flags (excluding -I) +# STARPU_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# STARPU_INCLUDE_DIRS - starpu include directories +# STARPU_LIBRARY_DIRS - Link directories for starpu libraries +# STARPU_LIBRARIES - starpu libraries +# STARPU_SHM_LIBRARIES - starpu libraries without libstarpumpi +# STARPU_MPI_LIBRARIES - starpu libraries with libstarpumpi +# STARPU_INCLUDE_DIRS_DEP - starpu + dependencies include directories +# STARPU_LIBRARY_DIRS_DEP - starpu + dependencies link directories +# STARPU_LIBRARIES_DEP - starpu libraries + dependencies +# STARPU_VERSION_STRING - A human-readable string containing the version of the package found +# STARPU_VERSION_MAJOR - The major version of the package found +# STARPU_VERSION_MINOR - The minor version of the package found +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DSTARPU=path/to/starpu): +# STARPU_DIR - Where to find the base directory of starpu +# STARPU_INCDIR - Where to find the header files +# STARPU_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: STARPU_DIR, STARPU_INCDIR, STARPU_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +if (NOT STARPU_FOUND) + set(STARPU_DIR "" CACHE PATH "Installation directory of STARPU library") + if (NOT STARPU_FIND_QUIETLY) + message(STATUS "A cache variable, namely STARPU_DIR, has been set to specify the install directory of STARPU") + endif() +endif() + +# STARPU may depend on other packages (HWLOC, MPI, CUDA, BLAS, MAGMA, FXT, ...) +# try to find them if specified as COMPONENTS during the call +set(STARPU_LOOK_FOR_HWLOC FALSE) +set(STARPU_LOOK_FOR_CUDA FALSE) +set(STARPU_LOOK_FOR_MPI FALSE) +set(STARPU_LOOK_FOR_BLAS FALSE) +set(STARPU_LOOK_FOR_MAGMA FALSE) +set(STARPU_LOOK_FOR_FXT FALSE) +set(STARPU_LOOK_FOR_SIMGRID FALSE) + +if( STARPU_FIND_COMPONENTS ) + foreach( component ${STARPU_FIND_COMPONENTS} ) + if(${component} STREQUAL "HWLOC") + set(STARPU_LOOK_FOR_HWLOC TRUE) + elseif(${component} STREQUAL "CUDA") + set(STARPU_LOOK_FOR_CUDA TRUE) + elseif(${component} STREQUAL "MPI") + set(STARPU_LOOK_FOR_MPI TRUE) + elseif(${component} STREQUAL "BLAS") + set(STARPU_LOOK_FOR_BLAS TRUE) + elseif(${component} STREQUAL "MAGMA") + set(STARPU_LOOK_FOR_MAGMA TRUE) + elseif(${component} STREQUAL "FXT") + set(STARPU_LOOK_FOR_FXT TRUE) + elseif(${component} STREQUAL "SIMGRID") + set(STARPU_LOOK_FOR_SIMGRID TRUE) + endif() + endforeach() +endif() + +# STARPU may depend on pthread, try to find it +find_package(Threads) +if( THREADS_FOUND ) + list(APPEND STARPU_EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) +endif () +# STARPU may depend on libm, try to find it +find_library(M_m_LIBRARY NAMES m) +mark_as_advanced(M_m_LIBRARY) +if( M_m_LIBRARY ) + list(APPEND STARPU_EXTRA_LIBRARIES ${M_m_LIBRARY}) +endif () +# STARPU may depend on librt, try to find it +find_library(RT_rt_LIBRARY NAMES m) +mark_as_advanced(RT_rt_LIBRARY) +if( RT_rt_LIBRARY ) + list(APPEND STARPU_EXTRA_LIBRARIES ${RT_rt_LIBRARY}) +endif () + +# STARPU may depend on HWLOC, try to find it +if (NOT HWLOC_FOUND AND STARPU_LOOK_FOR_HWLOC) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_HWLOC) + find_package(HWLOC REQUIRED) + else() + find_package(HWLOC) + endif() +endif() + +# STARPU may depend on CUDA, try to find it +if (NOT CUDA_FOUND AND STARPU_LOOK_FOR_CUDA) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_CUDA) + find_package(CUDA REQUIRED) + else() + find_package(CUDA) + endif() + if (CUDA_FOUND) + mark_as_advanced(CUDA_BUILD_CUBIN) + mark_as_advanced(CUDA_BUILD_EMULATION) + mark_as_advanced(CUDA_SDK_ROOT_DIR) + mark_as_advanced(CUDA_TOOLKIT_ROOT_DIR) + mark_as_advanced(CUDA_VERBOSE_BUILD) + endif() +endif() + +# STARPU may depend on MPI, try to find it +if (NOT MPI_FOUND AND STARPU_LOOK_FOR_MPI) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_MPI) + find_package(MPI REQUIRED) + else() + find_package(MPI) + endif() + if (MPI_FOUND) + mark_as_advanced(MPI_LIBRARY) + mark_as_advanced(MPI_EXTRA_LIBRARY) + endif() +endif() + +# STARPU may depend on BLAS, try to find it +if (NOT BLAS_FOUND AND STARPU_LOOK_FOR_BLAS) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_BLAS) + find_package(BLASEXT REQUIRED) + else() + find_package(BLASEXT) + endif() +endif() + +# STARPU may depend on MAGMA, try to find it +if (NOT MAGMA_FOUND AND STARPU_LOOK_FOR_MAGMA) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_MAGMA) + find_package(MAGMA REQUIRED) + else() + find_package(MAGMA) + endif() +endif() + +# STARPU may depend on FXT, try to find it +if (NOT FXT_FOUND AND STARPU_LOOK_FOR_FXT) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_FXT) + find_package(FXT REQUIRED) + else() + find_package(FXT) + endif() +endif() + +# STARPU may depend on SIMGRID, try to find it +if (NOT SIMGRID_FOUND AND STARPU_LOOK_FOR_SIMGRID) + if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_SIMGRID) + find_package(SIMGRID REQUIRED) + else() + find_package(SIMGRID) + endif() +endif() + +set(ENV_STARPU_DIR "$ENV{STARPU_DIR}") +set(ENV_STARPU_INCDIR "$ENV{STARPU_INCDIR}") +set(ENV_STARPU_LIBDIR "$ENV{STARPU_LIBDIR}") +set(STARPU_GIVEN_BY_USER "FALSE") +if ( STARPU_DIR OR ( STARPU_INCDIR AND STARPU_LIBDIR) OR ENV_STARPU_DIR OR (ENV_STARPU_INCDIR AND ENV_STARPU_LIBDIR) ) + set(STARPU_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- + + +if(PKG_CONFIG_EXECUTABLE AND NOT STARPU_GIVEN_BY_USER) + pkg_search_module(STARPU_SHM libstarpu) + set(STARPU_INCLUDE_DIRS "${STARPU_SHM_INCLUDE_DIRS}") + set(STARPU_LIBRARY_DIRS "${STARPU_SHM_LIBRARY_DIRS}") + if(STARPU_LOOK_FOR_MPI AND MPI_FOUND) + pkg_search_module(STARPU_MPI libstarpumpi) + endif() + if (NOT STARPU_FIND_QUIETLY) + if (STARPU_SHM_FOUND AND STARPU_SHM_LIBRARIES) + message(STATUS "Looking for STARPU - found using PkgConfig") + #if(NOT STARPU_SHM_INCLUDE_DIRS) + # message("${Magenta}STARPU_SHM_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to starpu headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + set(STARPU_VERSION_STRING "${STARPU_SHM_VERSION}") + string(REPLACE "." ";" STARPU_VERSION_STRING_LIST ${STARPU_VERSION_STRING}) + list(GET STARPU_VERSION_STRING_LIST 0 STARPU_VERSION_MAJOR) + list(GET STARPU_VERSION_STRING_LIST 1 STARPU_VERSION_MINOR) + else() + message(STATUS "${Magenta}Looking for STARPU - not found using PkgConfig." + "\n Perhaps you should add the directory containing libstarpu.pc" + "\n to the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + if (STARPU_LOOK_FOR_MPI) + if (STARPU_MPI_FOUND AND STARPU_MPI_LIBRARIES) + message(STATUS "Looking for STARPU MPI - found using PkgConfig") + else() + message(STATUS "${Magenta}Looking for STARPU MPI - not found using PkgConfig." + " Perhaps you should add the directory containing libstarpumpi.pc to" + " the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + endif() + + if (STARPU_FIND_VERSION_EXACT) + if( NOT (STARPU_FIND_VERSION_MAJOR STREQUAL STARPU_VERSION_MAJOR) OR + NOT (STARPU_FIND_VERSION_MINOR STREQUAL STARPU_VERSION_MINOR) ) + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "STARPU version found is ${STARPU_VERSION_STRING}" + " when required is ${STARPU_FIND_VERSION}") + endif() + endif() + else() + # if the version found is older than the required then error + if( (STARPU_FIND_VERSION_MAJOR STRGREATER STARPU_VERSION_MAJOR) OR + (STARPU_FIND_VERSION_MINOR STRGREATER STARPU_VERSION_MINOR) ) + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "STARPU version found is ${STARPU_VERSION_STRING}" + " when required is ${STARPU_FIND_VERSION} or newer") + endif() + endif() + endif() + + if(STARPU_MPI_LIBRARIES) + if (STARPU_LOOK_FOR_SIMGRID) + # Cmake does not fetch explicit libfxt.a static paths from pkg-config... + find_package(FXT) + string(REGEX MATCH "[^;]*/libfxt.a" FXT_STATIC_LIB "${STARPU_MPI_LDFLAGS_OTHER}") + list(APPEND STARPU_MPI_LIBRARIES "${FXT_STATIC_LIB}") + endif() + set(STARPU_LIBRARIES "${STARPU_MPI_LIBRARIES}") + set(STARPU_LINKER_FLAGS "${STARPU_MPI_LDFLAGS_OTHER}") + elseif(STARPU_SHM_LIBRARIES) + set(STARPU_LIBRARIES "${STARPU_SHM_LIBRARIES}") + set(STARPU_LINKER_FLAGS "${STARPU_SHM_LDFLAGS_OTHER}") + else() + set(STARPU_LIBRARIES "STARPU_LIBRARIES-NOTFOUND") + endif() + set(STARPU_INCLUDE_DIRS_DEP "${STARPU_INCLUDE_DIRS}") + set(STARPU_LIBRARY_DIRS_DEP "${STARPU_LIBRARY_DIRS}") + set(STARPU_LIBRARIES_DEP "${STARPU_LIBRARIES}") + if (STARPU_SHM_FOUND) + set(STARPU_FOUND "TRUE") + else() + set(STARPU_FOUND "FALSE") + endif() + if (STARPU_LOOK_FOR_MPI AND NOT STARPU_MPI_FOUND) + set(STARPU_FOUND "FALSE") + endif() +endif(PKG_CONFIG_EXECUTABLE AND NOT STARPU_GIVEN_BY_USER) + + +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARPU_FOUND) OR (STARPU_GIVEN_BY_USER) ) + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_STARPU_DIR "$ENV{STARPU_DIR}") + set(ENV_STARPU_INCDIR "$ENV{STARPU_INCDIR}") + if(ENV_STARPU_INCDIR) + list(APPEND _inc_env "${ENV_STARPU_INCDIR}") + elseif(ENV_STARPU_DIR) + list(APPEND _inc_env "${ENV_STARPU_DIR}") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include/starpu/1.0") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include/starpu/1.1") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include/starpu/1.2") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include/starpu/1.3") + list(APPEND _inc_env "${ENV_STARPU_DIR}/include/starpu/1.4") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # Try to find the version of StarPU in starpu_config.h file + set(STARPU_hdrs_to_find "starpu_config.h") + + # call cmake macro to find the header path + if(STARPU_INCDIR) + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${STARPU_INCDIR}) + endforeach() + else() + if(STARPU_DIR) + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${STARPU_DIR} + PATH_SUFFIXES "include" + "include/starpu/1.0" + "include/starpu/1.1" + "include/starpu/1.2" + "include/starpu/1.3" + "include/starpu/1.4") + endforeach() + else() + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${_inc_env} + PATH_SUFFIXES + "starpu/1.0" + "starpu/1.1" + "starpu/1.2" + "starpu/1.3" + "starpu/1.4") + endforeach() + endif() + endif() + mark_as_advanced(STARPU_starpu_config.h_INCLUDE_DIRS) + + ### + # + # GET_VERSION: Get the version of the software by parsing a file + # + ### + MACRO(GET_VERSION _PACKAGE _filepath) + + #message(STATUS "Looking for ${_PACKAGE} version in the file ${_filepath}") + file(READ "${_filepath}" _file) + string(REGEX REPLACE + "(.*)define([ \t]*)${_PACKAGE}_MAJOR_VERSION([ \t]*)([0-9]+)(.*)" + "\\4" ${_PACKAGE}_VERSION_MAJOR "${_file}") + string(REGEX REPLACE + "(.*)define([ \t]*)${_PACKAGE}_MINOR_VERSION([ \t]*)([0-9]+)(.*)" + "\\4" ${_PACKAGE}_VERSION_MINOR "${_file}") + set(${_PACKAGE}_VERSION_STRING + "${${_PACKAGE}_VERSION_MAJOR}.${${_PACKAGE}_VERSION_MINOR}" CACHE PATH "StarPU version found in starpu_config.h") + #message(STATUS "${_PACKAGE}_VERSION_MAJOR = -${${_PACKAGE}_VERSION_MAJOR}-") + #message(STATUS "${_PACKAGE}_VERSION_MINOR = -${${_PACKAGE}_VERSION_MINOR}-") + + ENDMACRO(GET_VERSION) + + # Find the version of StarPU in starpu_config.h file + # remark: the version is defined in this file since the STARPU 1.0 version + if (STARPU_starpu_config.h_INCLUDE_DIRS) + GET_VERSION("STARPU" "${STARPU_starpu_config.h_INCLUDE_DIRS}/starpu_config.h") + if (STARPU_VERSION_MAJOR AND STARPU_VERSION_MINOR) + if (STARPU_FIND_VERSION_EXACT) + if (STARPU_FIND_VERSION_MAJOR AND STARPU_FIND_VERSION_MINOR) + if( NOT (STARPU_FIND_VERSION_MAJOR STREQUAL STARPU_VERSION_MAJOR) OR + NOT (STARPU_FIND_VERSION_MINOR STREQUAL STARPU_VERSION_MINOR) ) + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "STARPU version found is ${STARPU_VERSION_STRING} " + "when required is ${STARPU_FIND_VERSION}") + endif() + endif() + endif() + else() + if (STARPU_FIND_VERSION_MAJOR AND STARPU_FIND_VERSION_MINOR) + # if the version found is older than the required then error + if( (STARPU_FIND_VERSION_MAJOR STRGREATER STARPU_VERSION_MAJOR) OR + (STARPU_FIND_VERSION_MINOR STRGREATER STARPU_VERSION_MINOR) ) + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "STARPU version found is ${STARPU_VERSION_STRING} " + "when required is ${STARPU_FIND_VERSION} or newer") + endif() + endif() + endif() + endif() + else() + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "STARPU version has not been found using starpu_config.h" + " located in ${STARPU_starpu_config.h_INCLUDE_DIRS}") + endif() + endif() + else() + if(STARPU_FIND_REQUIRED AND NOT STARPU_FIND_QUIETLY) + message(FATAL_ERROR + "starpu_config.h has not been found while required to get StarPU version") + endif() + endif() + + + # Try to find the starpu headers in the given paths + # ------------------------------------------------- + + # create list of headers to find + list(APPEND STARPU_hdrs_to_find "starpu.h;starpu_profiling.h") + if(STARPU_LOOK_FOR_MPI AND MPI_FOUND) + list(APPEND STARPU_hdrs_to_find "starpu_mpi.h") + endif() + if(STARPU_LOOK_FOR_CUDA AND CUDA_FOUND) + list(APPEND STARPU_hdrs_to_find "starpu_cuda.h;starpu_scheduler.h") + endif() + + # call cmake macro to find the header path + if(STARPU_INCDIR) + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${STARPU_INCDIR}) + endforeach() + else() + if(STARPU_DIR) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${STARPU_DIR} + PATH_SUFFIXES "include" + "include/starpu/1.0" + "include/starpu/1.1" + "include/starpu/1.2" + "include/starpu/1.3" + "include/starpu/1.4") + endforeach() + else() + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS + NAMES ${starpu_hdr} + HINTS ${_inc_env} + PATH_SUFFIXES + "starpu/1.0" + "starpu/1.1" + "starpu/1.2" + "starpu/1.3" + "starpu/1.4") + endforeach() + endif() + endif() + + # If found, add path to cmake variable + # ------------------------------------ + set(STARPU_INCLUDE_DIRS "") + foreach(starpu_hdr ${STARPU_hdrs_to_find}) + + if (STARPU_${starpu_hdr}_INCLUDE_DIRS) + # set cmake variables using the pkg-config naming convention + list(APPEND STARPU_INCLUDE_DIRS "${STARPU_${starpu_hdr}_INCLUDE_DIRS}" ) + else () + if(NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu -- ${starpu_hdr} not found") + endif() + if(starpu_hdr STREQUAL "starpu_mpi.h") + if(NOT STARPU_FIND_REQUIRED_MPI) + if (NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu -- ${starpu_hdr} not required") + endif() + else() + list(APPEND STARPU_INCLUDE_DIRS "${STARPU_${starpu_hdr}_INCLUDE_DIRS}" ) + endif() + elseif( (starpu_hdr STREQUAL "starpu_cuda.h") OR (starpu_hdr STREQUAL "starpu_scheduler.h") ) + if(NOT STARPU_FIND_REQUIRED_CUDA) + if (NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu -- ${starpu_hdr} not required") + endif() + else() + list(APPEND STARPU_INCLUDE_DIRS "${STARPU_${starpu_hdr}_INCLUDE_DIRS}" ) + endif() + endif() + endif () + mark_as_advanced(STARPU_${starpu_hdr}_INCLUDE_DIRS) + + endforeach(starpu_hdr ${STARPU_hdrs_to_find}) + + if (STARPU_INCLUDE_DIRS) + list(REMOVE_DUPLICATES STARPU_INCLUDE_DIRS) + endif () + + if (STARPU_starpu_config.h_INCLUDE_DIRS) + # Looking for lib + # --------------- + + set(STARPU_SHM_LIBRARIES "") + set(STARPU_MPI_LIBRARIES "") + set(STARPU_LIBRARY_DIRS "") + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_STARPU_LIBDIR "$ENV{STARPU_LIBDIR}") + if(ENV_STARPU_LIBDIR) + list(APPEND _lib_env "${ENV_STARPU_LIBDIR}") + elseif(ENV_STARPU_DIR) + list(APPEND _lib_env "${ENV_STARPU_DIR}") + list(APPEND _lib_env "${ENV_STARPU_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the starpu libs in the given paths + # ---------------------------------------------- + + # create list of libs to find + set(STARPU_libs_to_find "starpu-${STARPU_VERSION_STRING}") + set(STARPU_SHM_libs_to_find "starpu-${STARPU_VERSION_STRING}") + if (STARPU_LOOK_FOR_MPI OR MPI_FOUND) + list(INSERT STARPU_libs_to_find 0 "starpumpi-${STARPU_VERSION_STRING}") + set(STARPU_MPI_libs_to_find "${STARPU_libs_to_find}") + endif() + + # call cmake macro to find the lib path + if(STARPU_LIBDIR) + foreach(starpu_lib ${STARPU_libs_to_find}) + set(STARPU_${starpu_lib}_LIBRARY "STARPU_${starpu_lib}_LIBRARY-NOTFOUND") + find_library(STARPU_${starpu_lib}_LIBRARY + NAMES ${starpu_lib} + HINTS ${STARPU_LIBDIR}) + endforeach() + else() + if(STARPU_DIR) + foreach(starpu_lib ${STARPU_libs_to_find}) + set(STARPU_${starpu_lib}_LIBRARY "STARPU_${starpu_lib}_LIBRARY-NOTFOUND") + find_library(STARPU_${starpu_lib}_LIBRARY + NAMES ${starpu_lib} + HINTS ${STARPU_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(starpu_lib ${STARPU_libs_to_find}) + set(STARPU_${starpu_lib}_LIBRARY "STARPU_${starpu_lib}_LIBRARY-NOTFOUND") + find_library(STARPU_${starpu_lib}_LIBRARY + NAMES ${starpu_lib} + HINTS ${_lib_env}) + endforeach() + endif() + endif() + + # If found, add path to cmake variable + # ------------------------------------ + foreach(starpu_lib ${STARPU_libs_to_find}) + + if (STARPU_${starpu_lib}_LIBRARY) + + get_filename_component(${starpu_lib}_lib_path ${STARPU_${starpu_lib}_LIBRARY} PATH) + # set cmake variables (respects naming convention) + + foreach(starpu_shm_lib ${STARPU_SHM_libs_to_find}) + if(starpu_shm_lib STREQUAL starpu_lib) + list(APPEND STARPU_SHM_LIBRARIES "${STARPU_${starpu_lib}_LIBRARY}") + endif() + endforeach() + if (STARPU_LOOK_FOR_MPI AND MPI_FOUND) + foreach(starpu_mpi_lib ${STARPU_MPI_libs_to_find}) + if(starpu_mpi_lib STREQUAL starpu_lib) + list(APPEND STARPU_MPI_LIBRARIES "${STARPU_${starpu_lib}_LIBRARY}") + endif() + endforeach() + endif () + list(APPEND STARPU_LIBRARY_DIRS "${${starpu_lib}_lib_path}") + + else (STARPU_${starpu_lib}_LIBRARY) + + if(NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu -- lib ${starpu_lib} not found") + endif() + if(starpu_lib STREQUAL "starpumpi-${STARPU_VERSION_STRING}" AND + NOT STARPU_FIND_REQUIRED_MPI) + # if MPI optional, not a problem: no NOTFOUND in list of MPI LIBRARIES + if(NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu -- lib ${starpu_lib} not required") + endif() + else() + # for any other lib, add NOTFOUND in the proper list of LIBRARIES + foreach(starpu_shm_lib ${STARPU_SHM_libs_to_find}) + if(starpu_shm_lib STREQUAL starpu_lib) + set(STARPU_SHM_LIBRARIES "${STARPU_${starpu_lib}_LIBRARY}") + endif() + endforeach() + if (STARPU_LOOK_FOR_MPI AND MPI_FOUND) + foreach(starpu_mpi_lib ${STARPU_MPI_libs_to_find}) + if(starpu_mpi_lib STREQUAL starpu_lib) + list(APPEND STARPU_MPI_LIBRARIES "${STARPU_${starpu_mpi_lib}_LIBRARY}") + endif() + endforeach() + endif () + endif() + + endif (STARPU_${starpu_lib}_LIBRARY) + + mark_as_advanced(STARPU_${starpu_lib}_LIBRARY) + + endforeach(starpu_lib ${STARPU_libs_to_find}) + + list(REMOVE_DUPLICATES STARPU_SHM_LIBRARIES) + list(REMOVE_DUPLICATES STARPU_MPI_LIBRARIES) + if (STARPU_LIBRARY_DIRS) + list(REMOVE_DUPLICATES STARPU_LIBRARY_DIRS) + endif () + + if (STARPU_SHM_LIBRARIES AND STARPU_LIBRARY_DIRS AND STARPU_INCLUDE_DIRS) + set(STARPU_SHM_FOUND TRUE) + if(STARPU_MPI_LIBRARIES) + set(STARPU_MPI_FOUND TRUE) + endif() + endif() + + else(STARPU_starpu_config.h_INCLUDE_DIRS) + if(NOT STARPU_FIND_QUIETLY) + message(STATUS "The version of StarPU is not known so that we do not search libraries") + endif() + endif(STARPU_starpu_config.h_INCLUDE_DIRS) + + foreach(lib ${STARPU_SHM_LIBRARIES}) + if (NOT lib) + set(STARPU_SHM_LIBRARIES "STARPU_SHM_LIBRARIES-NOTFOUND") + endif() + endforeach() + foreach(lib ${STARPU_MPI_LIBRARIES}) + if (NOT lib) + set(STARPU_MPI_LIBRARIES "STARPU_MPI_LIBRARIES-NOTFOUND") + endif() + endforeach() + if(STARPU_LOOK_FOR_MPI) + set(STARPU_LIBRARIES "${STARPU_MPI_LIBRARIES}") + else() + set(STARPU_LIBRARIES "${STARPU_SHM_LIBRARIES}") + endif() + + # check a function to validate the find + if(STARPU_LIBRARIES) + + set(REQUIRED_FLAGS) + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # STARPU + if (STARPU_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS}") + endif() + set(CMAKE_REQUIRED_FLAGS) + foreach(libdir ${STARPU_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + set(REQUIRED_LIBS "${STARPU_LIBRARIES}") + # HWLOC + if (HWLOC_FOUND AND STARPU_LOOK_FOR_HWLOC) + if (HWLOC_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + if (HWLOC_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${HWLOC_LIBRARY_DIRS}") + endif() + foreach(lib ${HWLOC_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + # MPI + if (MPI_FOUND AND STARPU_LOOK_FOR_MPI) + if (MPI_C_INCLUDE_PATH) + list(APPEND REQUIRED_INCDIRS "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # MAGMA + if (MAGMA_FOUND AND STARPU_LOOK_FOR_MAGMA) + if (MAGMA_INCLUDE_DIRS_DEP) + list(APPEND REQUIRED_INCDIRS "${MAGMA_INCLUDE_DIRS_DEP}") + elseif(MAGMA_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${MAGMA_INCLUDE_DIRS}") + endif() + if (MAGMA_LIBRARY_DIRS_DEP) + list(APPEND REQUIRED_LIBDIRS "${MAGMA_LIBRARY_DIRS_DEP}") + elseif(MAGMA_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${MAGMA_LIBRARY_DIRS}") + endif() + if (MAGMA_LIBRARIES_DEP) + list(APPEND REQUIRED_LIBS "${MAGMA_LIBRARIES_DEP}") + elseif(MAGMA_LIBRARIES) + foreach(lib ${MAGMA_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + if (MAGMA_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${MAGMA_LINKER_FLAGS}") + endif() + endif() + # CUDA + if (CUDA_FOUND AND STARPU_LOOK_FOR_CUDA) + if (CUDA_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${CUDA_INCLUDE_DIRS}") + endif() + if (CUDA_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${CUDA_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES}") + endif() + # FXT + if (FXT_FOUND AND STARPU_LOOK_FOR_FXT) + if (FXT_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${FXT_INCLUDE_DIRS}") + endif() + if (FXT_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${FXT_LIBRARY_DIRS}") + endif() + foreach(lib ${FXT_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + # SIMGRID + if (SIMGRID_FOUND AND STARPU_LOOK_FOR_SIMGRID) + if (SIMGRID_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${SIMGRID_INCLUDE_DIRS}") + endif() + if (SIMGRID_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${SIMGRID_LIBRARY_DIRS}") + endif() + foreach(lib ${SIMGRID_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + list(APPEND REQUIRED_FLAGS "-include starpu_simgrid_wrap.h") + endif() + # BLAS + if (BLAS_FOUND AND STARPU_LOOK_FOR_BLAS) + if (BLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") + endif() + if (BLAS_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${BLAS_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY AND CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # EXTRA LIBS such that pthread, m, rt + list(APPEND REQUIRED_LIBS ${STARPU_EXTRA_LIBRARIES}) + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(STARPU_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(starpu_init STARPU_WORKS) + mark_as_advanced(STARPU_WORKS) + + if(STARPU_WORKS) + # save link with dependencies + if (REQUIRED_FLAGS) + set(STARPU_LIBRARIES_DEP "${REQUIRED_FLAGS};${REQUIRED_LIBS}") + else() + set(STARPU_LIBRARIES_DEP "${REQUIRED_LIBS}") + endif() + set(STARPU_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(STARPU_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(STARPU_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES STARPU_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES STARPU_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES STARPU_LINKER_FLAGS) + else() + if(NOT STARPU_FIND_QUIETLY) + message(STATUS "Looking for starpu : test of starpu_init fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + message(STATUS "Maybe STARPU is linked with specific libraries. " + "Have you tried with COMPONENTS (HWLOC, CUDA, MPI, BLAS, MAGMA, FXT, SIMGRID)? " + "See the explanation in FindSTARPU.cmake.") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(STARPU_LIBRARIES) + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARPU_FOUND) OR (STARPU_GIVEN_BY_USER) ) + +if (STARPU_LIBRARIES) + if (STARPU_LIBRARY_DIRS) + foreach(dir ${STARPU_LIBRARY_DIRS}) + if ("${dir}" MATCHES "starpu") + set(first_lib_path "${dir}") + endif() + endforeach() + else() + list(GET STARPU_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(STARPU_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of STARPU library" FORCE) + else() + set(STARPU_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of STARPU library" FORCE) + endif() +endif() +mark_as_advanced(STARPU_DIR) +mark_as_advanced(STARPU_DIR_FOUND) + +# check that STARPU has been found +# -------------------------------- +include(FindPackageHandleStandardArgs) +if(NOT STARPU_FIND_QUIETLY) + if(STARPU_SHM_FOUND) + message(STATUS "StarPU library has been found.") + if(STARPU_MPI_LIBRARIES) + message(STATUS "The mpi version of StarPU has been found so that we manage" + " two lists of libs, one sequential and one parallel (see" + " STARPU_SHM_LIBRARIES and STARPU_MPI_LIBRARIES).") + endif() + message(STATUS "StarPU shared memory libraries stored in STARPU_SHM_LIBRARIES") + endif() +endif() +if (PKG_CONFIG_EXECUTABLE AND STARPU_SHM_FOUND AND NOT STARPU_GIVEN_BY_USER) + find_package_handle_standard_args(STARPU DEFAULT_MSG + STARPU_SHM_LIBRARIES) +else() + find_package_handle_standard_args(STARPU DEFAULT_MSG + STARPU_SHM_LIBRARIES + STARPU_WORKS) +endif() +if(STARPU_LOOK_FOR_MPI) + if(STARPU_MPI_LIBRARIES AND NOT STARPU_FIND_QUIETLY) + message(STATUS "StarPU mpi libraries stored in STARPU_MPI_LIBRARIES") + endif() + if (PKG_CONFIG_EXECUTABLE AND STARPU_MPI_FOUND AND NOT STARPU_GIVEN_BY_USER) + find_package_handle_standard_args(STARPU DEFAULT_MSG + STARPU_MPI_LIBRARIES) + else() + find_package_handle_standard_args(STARPU DEFAULT_MSG + STARPU_MPI_LIBRARIES + STARPU_WORKS) + endif() +endif() diff --git a/cmake/FindSTARSH.cmake b/cmake/FindSTARSH.cmake new file mode 100644 index 0000000..d9ffe6c --- /dev/null +++ b/cmake/FindSTARSH.cmake @@ -0,0 +1,424 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +# +### +# +# - Find STARSH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(STARSH +# [REQUIRED] # Fail with error if starsh is not found +# [COMPONENTS ...] # dependencies +# ) +# +# STARSH depends on the following libraries: +# - LAPACK +# - LAPACKE +# - BLAS +# - CBLAS +# +# COMPONENTS are optional libraries STARSH could be linked with, +# Use it to drive detection of a specific compilation chain +# COMPONENTS can be some of the following: +# - no components are available for now: maybe STARSH in the future? +# +# Results are reported in variables: +# STARSH_FOUND - True if headers and requested libraries were found +# STARSH_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# STARSH_INCLUDE_DIRS - starsh include directories +# STARSH_LIBRARY_DIRS - Link directories for starsh libraries +# STARSH_LIBRARIES - starsh libraries +# STARSH_INCLUDE_DIRS_DEP - starsh + dependencies include directories +# STARSH_LIBRARY_DIRS_DEP - starsh + dependencies link directories +# STARSH_LIBRARIES_DEP - starsh libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DSTARSH_DIR=path/to/starsh): +# STARSH_DIR - Where to find the base directory of starsh +# STARSH_INCDIR - Where to find the header files +# STARSH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: STARSH_DIR, STARSH_INCDIR, STARSH_LIBDIR +# +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# Copyright 2017 Eduardo Gonzalez +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + + +if(NOT STARSH_FOUND) + set(STARSH_DIR "") + if (NOT STARSH_FIND_QUIETLY) + message(STATUS "A cache variable, namely STARSH_DIR, has been set to specify the install directory of STARSH") + endif() +endif(NOT STARSH_FOUND) + + +set(ENV_STARSH_DIR "$ENV{STARSH_DIR}") +set(ENV_STARSH_INCDIR "$ENV{STARSH_INCDIR}") +set(ENV_STARSH_LIBDIR "$ENV{STARSH_LIBDIR}") +set(STARSH_GIVEN_BY_USER "FALSE") +if ( STARSH_DIR OR ( STARSH_INCDIR AND STARSH_LIBDIR) OR ENV_STARSH_DIR OR (ENV_STARSH_INCDIR AND ENV_STARSH_LIBDIR) ) + set(STARSH_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +include(FindPkgConfig) +find_package(PkgConfig QUIET) +if(PKG_CONFIG_EXECUTABLE AND NOT STARSH_GIVEN_BY_USER) + + pkg_search_module(STARSH starsh) + if (NOT STARSH_FIND_QUIETLY) + if (STARSH_FOUND AND STARSH_LIBRARIES) + message(STATUS "Looking for STARSH - found using PkgConfig") + #if(NOT STARSH_INCLUDE_DIRS) + # message("${Magenta}STARSH_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to starsh headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + else() + message("${Magenta}Looking for STARSH - not found using PkgConfig. " + "Perhaps you should add the directory containing starsh.pc " + "to the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + + if (STARSH_FIND_VERSION_EXACT) + if( NOT (STARSH_FIND_VERSION_MAJOR STREQUAL STARSH_VERSION_MAJOR) OR + NOT (STARSH_FIND_VERSION_MINOR STREQUAL STARSH_VERSION_MINOR) ) + if(NOT STARSH_FIND_QUIETLY) + message(FATAL_ERROR + "STARSH version found is ${STARSH_VERSION_STRING} " + "when required is ${STARSH_FIND_VERSION}") + endif() + endif() + else() + # if the version found is older than the required then error + if( (STARSH_FIND_VERSION_MAJOR STRGREATER STARSH_VERSION_MAJOR) OR + (STARSH_FIND_VERSION_MINOR STRGREATER STARSH_VERSION_MINOR) ) + if(NOT STARSH_FIND_QUIETLY) + message(FATAL_ERROR + "STARSH version found is ${STARSH_VERSION_STRING} " + "when required is ${STARSH_FIND_VERSION} or newer") + endif() + endif() + endif() + + # if pkg-config is used: these variables are empty + # the pkg_search_module call will set the following: + # STARSH_LDFLAGS: all required linker flags + # STARSH_CFLAGS: all required cflags + set(STARSH_INCLUDE_DIRS_DEP "") + set(STARSH_LIBRARY_DIRS_DEP "") + set(STARSH_LIBRARIES_DEP "") + # replace it anyway: we should update it with dependencies given by pkg-config + set(STARSH_INCLUDE_DIRS_DEP "${STARSH_INCLUDE_DIRS}") + set(STARSH_LIBRARY_DIRS_DEP "${STARSH_LIBRARY_DIRS}") + set(STARSH_LIBRARIES_DEP "${STARSH_LIBRARIES}") + +endif(PKG_CONFIG_EXECUTABLE AND NOT STARSH_GIVEN_BY_USER) + +# if STARSH is not found using pkg-config +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARSH_FOUND) OR (STARSH_GIVEN_BY_USER) ) + + if (NOT STARSH_FIND_QUIETLY) + message(STATUS "Looking for STARSH - PkgConfig not used") + endif() + + # STARSH depends on LAPACKE anyway, try to find it + if (NOT LAPACKE_FOUND) + if(STARSH_FIND_REQUIRED) + find_package(LAPACKE REQUIRED) + else() + find_package(LAPACKE) + endif() + endif() + # STARSH depends on CBLAS anyway, try to find it + if (NOT CBLAS_FOUND) + if(STARSH_FIND_REQUIRED) + find_package(CBLAS REQUIRED) + else() + find_package(CBLAS) + endif() + endif() + # BLAS and LAPACK are searched by CBLAS and LAPACKE + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_STARSH_DIR "$ENV{STARSH_DIR}") + set(ENV_STARSH_INCDIR "$ENV{STARSH_INCDIR}") + if(ENV_STARSH_INCDIR) + list(APPEND _inc_env "${ENV_STARSH_INCDIR}") + elseif(ENV_STARSH_DIR) + list(APPEND _inc_env "${ENV_STARSH_DIR}") + list(APPEND _inc_env "${ENV_STARSH_DIR}/include") + list(APPEND _inc_env "${ENV_STARSH_DIR}/include/starsh") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + + # Try to find the starsh header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(STARSH_INCDIR) + set(STARSH_starsh.h_DIRS "STARSH_starsh.h_DIRS-NOTFOUND") + find_path(STARSH_starsh.h_DIRS + NAMES starsh.h + HINTS ${STARSH_INCDIR}) + else() + if(STARSH_DIR) + set(STARSH_starsh.h_DIRS "STARSH_starsh.h_DIRS-NOTFOUND") + find_path(STARSH_starsh.h_DIRS + NAMES starsh.h + HINTS ${STARSH_DIR} + PATH_SUFFIXES "include" "include/starsh") + else() + set(STARSH_starsh.h_DIRS "STARSH_starsh.h_DIRS-NOTFOUND") + find_path(STARSH_starsh.h_DIRS + NAMES starsh.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(STARSH_starsh.h_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (STARSH_starsh.h_DIRS) + set(STARSH_INCLUDE_DIRS "${STARSH_starsh.h_DIRS}") + else () + set(STARSH_INCLUDE_DIRS "STARSH_INCLUDE_DIRS-NOTFOUND") + if(NOT STARSH_FIND_QUIETLY) + message(STATUS "Looking for starsh -- starsh.h not found") + endif() + endif() + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_STARSH_LIBDIR "$ENV{STARSH_LIBDIR}") + if(ENV_STARSH_LIBDIR) + list(APPEND _lib_env "${ENV_STARSH_LIBDIR}") + elseif(ENV_STARSH_DIR) + list(APPEND _lib_env "${ENV_STARSH_DIR}") + list(APPEND _lib_env "${ENV_STARSH_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the starsh lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(STARSH_LIBDIR) + set(STARSH_starsh_LIBRARY "STARSH_starsh_LIBRARY-NOTFOUND") + find_library(STARSH_starsh_LIBRARY + NAMES starsh + HINTS ${STARSH_LIBDIR}) + else() + if(STARSH_DIR) + set(STARSH_starsh_LIBRARY "STARSH_starsh_LIBRARY-NOTFOUND") + find_library(STARSH_starsh_LIBRARY + NAMES starsh + HINTS ${STARSH_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(STARSH_starsh_LIBRARY "STARSH_starsh_LIBRARY-NOTFOUND") + find_library(STARSH_starsh_LIBRARY + NAMES starsh + HINTS ${_lib_env}) + endif() + endif() + mark_as_advanced(STARSH_starsh_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (STARSH_starsh_LIBRARY) + get_filename_component(starsh_lib_path "${STARSH_starsh_LIBRARY}" PATH) + # set cmake variables + set(STARSH_LIBRARIES "${STARSH_starsh_LIBRARY}") + set(STARSH_LIBRARY_DIRS "${starsh_lib_path}") + else () + set(STARSH_LIBRARIES "STARSH_LIBRARIES-NOTFOUND") + set(STARSH_LIBRARY_DIRS "STARSH_LIBRARY_DIRS-NOTFOUND") + if(NOT STARSH_FIND_QUIETLY) + message(STATUS "Looking for starsh -- lib starsh not found") + endif() + endif () + + # check a function to validate the find + if (STARSH_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # STARSH + if (STARSH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${STARSH_INCLUDE_DIRS}") + endif() + if (STARSH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${STARSH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${STARSH_LIBRARIES}") + # CBLAS + if (CBLAS_INCLUDE_DIRS_DEP) + list(APPEND REQUIRED_INCDIRS "${CBLAS_INCLUDE_DIRS_DEP}") + elseif (CBLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${CBLAS_INCLUDE_DIRS}") + endif() + if(CBLAS_LIBRARY_DIRS_DEP) + list(APPEND REQUIRED_LIBDIRS "${CBLAS_LIBRARY_DIRS_DEP}") + elseif(CBLAS_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${CBLAS_LIBRARY_DIRS}") + endif() + if (CBLAS_LIBRARIES_DEP) + list(APPEND REQUIRED_LIBS "${CBLAS_LIBRARIES_DEP}") + elseif(CBLAS_LIBRARIES) + list(APPEND REQUIRED_LIBS "${CBLAS_LIBRARIES}") + endif() + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + # LAPACK + if (LAPACK_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${LAPACK_INCLUDE_DIRS}") + endif() + if(LAPACK_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${LAPACK_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${LAPACK_LIBRARIES}") + if (LAPACK_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${LAPACK_LINKER_FLAGS}") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(STARSH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(starsh_dgetrf STARSH_WORKS) + mark_as_advanced(STARSH_WORKS) + + if(STARSH_WORKS) + # save link with dependencies + set(STARSH_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(STARSH_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(STARSH_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(STARSH_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES STARSH_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES STARSH_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES STARSH_LINKER_FLAGS) + else() + if(NOT STARSH_FIND_QUIETLY) + message(STATUS "Looking for starsh : test of starsh_dgetrf with + starsh, cblas, and lapack libraries fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(STARSH_LIBRARIES) + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARSH_FOUND) OR (STARSH_GIVEN_BY_USER) ) + +if (STARSH_LIBRARIES) + if (STARSH_LIBRARY_DIRS) + set( first_lib_path "" ) + foreach(dir ${STARSH_LIBRARY_DIRS}) + if ("${dir}" MATCHES "starsh") + set(first_lib_path "${dir}") + endif() + endforeach() + if( NOT first_lib_path ) + list(GET STARSH_LIBRARY_DIRS 0 first_lib_path) + endif() + else() + list(GET STARSH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(STARSH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of STARSH library" FORCE) + else() + set(STARSH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of STARSH library" FORCE) + endif() +endif() + +# check that STARSH has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +if (PKG_CONFIG_EXECUTABLE AND STARSH_FOUND) + find_package_handle_standard_args(STARSH DEFAULT_MSG + STARSH_LIBRARIES) +else() + find_package_handle_standard_args(STARSH DEFAULT_MSG + STARSH_LIBRARIES + STARSH_WORKS) +endif() diff --git a/cmake/ImportCatch2.cmake b/cmake/ImportCatch2.cmake new file mode 100644 index 0000000..932a64a --- /dev/null +++ b/cmake/ImportCatch2.cmake @@ -0,0 +1,19 @@ +IF (NOT TARGET Catch2) + find_package(Catch2 QUIET) + + # If Catch2 is not found, fetch and build it + if (NOT Catch2_FOUND) + message(STATUS "${Red}Couldn't find catch2 pre-installed, will begin fetching it v3.3.2${ColourReset}") + include(FetchContent) + set(FETCHCONTENT_QUIET OFF) + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.3.2 # Replace with the version of Catch2 you want to use for v3 + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(Catch2) + else () + message(STATUS "${Green}Found catch2 pre-installed${ColourReset}") + endif () +endif () \ No newline at end of file diff --git a/cmake/ImportLAPACKE.cmake b/cmake/ImportLAPACKE.cmake new file mode 100644 index 0000000..f92eba1 --- /dev/null +++ b/cmake/ImportLAPACKE.cmake @@ -0,0 +1,43 @@ + find_package(LAPACKE COMPONENTS LAPACKEXT) + if (LAPACK_FOUND AND LAPACK_LIBRARY_DIRS) + # the RPATH to be used when installing + list(APPEND CMAKE_INSTALL_RPATH "${LAPACK_LIBRARY_DIRS}") + else () + message(FATAL_ERROR "A LAPACK library is required but has not been found") + endif () + if (LAPACKE_FOUND) + include_directories(${LAPACKE_INCLUDE_DIRS}) + if (LAPACKE_LIBRARY_DIRS) + # the RPATH to be used when installing + list(APPEND CMAKE_INSTALL_RPATH "${LAPACKE_LIBRARY_DIRS}") + endif () + if (LAPACKE_LINKER_FLAGS) + list(APPEND CMAKE_EXE_LINKER_FLAGS "${LAPACKE_LINKER_FLAGS}") + endif () + if (LAPACKE_LIBRARIES) + if (LAPACKE_LIBRARIES_DEP) + list(INSERT HICMA_DEP 0 ${LAPACKE_LIBRARIES_DEP}) + else () + list(INSERT HICMA_DEP 0 ${LAPACKE_LIBRARIES}) + endif () + endif () + else () + if (ECRC_VERBOSE_FIND_PACKAGE) + if (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) + if (NOT LAPACKE_lapacke.h_DIRS) + Print_Find_Header_Status(lapacke lapacke.h) + endif () + if (NOT LAPACKE_lapacke_LIBRARY) + Print_Find_Library_Status(lapacke liblapacke) + endif () + endif () + else () + message(WARNING "LAPACKE library has not been found and ECRC_VERBOSE_FIND_PACKAGE is set to OFF." + " Try to activate ECRC_VERBOSE_FIND_PACKAGE option (-DECRC_VERBOSE_FIND_PACKAGE=ON) to get some hints for the detection") + endif () + message(FATAL_ERROR "A LAPACKE library is required but has not been found") + endif () + + list(APPEND LIBS ${LAPACKE_LIBRARIES}) + link_directories(${LAPACKE_LIBRARIES_DEP}) + include_directories(${LAPACKE_INCLUDE_DIRS}) diff --git a/cmake/ImportSTARPU.cmake b/cmake/ImportSTARPU.cmake new file mode 100644 index 0000000..d27914a --- /dev/null +++ b/cmake/ImportSTARPU.cmake @@ -0,0 +1,80 @@ + +# Copyright (c) 2017-2023 King Abdullah University of Science and Technology, +# Copyright (c) 2023 by Brightskies inc, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportSTARPU.cmake +# @brief Find and include STARPU library as a dependency. +# @version 1.0.0 +# @author Sameh Abdulah +# @date 2023-03-13 + +message("") +message("---------------------------------------- StarPU") +message(STATUS "Checking for StarPU") + +include(macros/BuildDependency) + +if (NOT TARGET STARPU) + # Try to find STARPU. + include(FindPkgConfig) + find_package(PkgConfig QUIET) + find_package(STARPU 1.4.1 QUIET COMPONENTS ${STARPU_COMPONENT_LIST}) + + # If STARPU is found, print its location. + if (STARPU_FOUND) + message(" Found StarPU: ${STARPU_LIBRARIES}") + # If not found, install it. + else () + set(STARPU_DIR ${PROJECT_SOURCE_DIR}/installdir/_deps/STARPU/) + # Set the flags to be passed to the build command. + set(ISCMAKE OFF) + set(ISGIT ON) + set(AUTO_GEN ON) + + if (USE_CUDA AND USE_MPI) + message("Downloading STARPU - MPI CUDA" ) + set(FLAGS --prefix=${PROJECT_SOURCE_DIR}/installdir/_deps/STARPU/ \--enable-cuda \--disable-opencl \--enable-shared \--disable-build-doc \--disable-export-dynamic \--enable-mpi) + elseif(USE_CUDA) + message("Downloading STARPU - CUDA" ) + set(FLAGS --prefix=${PROJECT_SOURCE_DIR}/installdir/_deps/STARPU/ \--enable-cuda \--disable-opencl \--enable-shared \--disable-build-doc \--disable-export-dynamic \--disable-mpi) + elseif(USE_MPI) + message("Downloading STARPU - MPI" ) + set(FLAGS --prefix=${PROJECT_SOURCE_DIR}/installdir/_deps/STARPU/ \--disable-cuda \--disable-opencl \--enable-shared \--disable-build-doc \--disable-export-dynamic \--enable-mpi) + else() + message("Downloading STARPU - SERIAL" ) + set(FLAGS --prefix=${PROJECT_SOURCE_DIR}/installdir/_deps/STARPU/ \--disable-cuda \--disable-opencl \--enable-shared \--disable-build-doc \--disable-export-dynamic \--disable-mpi) + + endif() + + BuildDependency(STARPU "https://gitlab.inria.fr/starpu/starpu.git" "starpu-1.4.1" ${FLAGS} ${ISCMAKE} ${ISGIT} ${AUTO_GEN}) + + # Clear the flags. + set(FLAGS "") + # Find StarPU after installation. + unset(STARPU_DIR) + find_package(STARPU 1.4.1 QUIET COMPONENTS ${STARPU_COMPONENT_LIST}) + endif () +else () + message(" STARPU already included") +endif () + +# Include STARPU headers. +list(APPEND LIBS ${STARPU_LIBRARIES}) +link_directories(${STARPU_LIBRARY_DIRS_DEP}) +include_directories(${STARPU_INCLUDE_DIRS}) +include_directories(${STARPU_INCLUDE_DIRS}/runtime/starpu) +include_directories(${STARPU_INCLUDE_DIRS_DEP}) + +# Set linker flags. +if (STARPU_LINKER_FLAGS) + list(APPEND CMAKE_EXE_LINKER_FLAGS "${STARPU_LINKER_FLAGS}") +endif () +set(CMAKE_REQUIRED_INCLUDES "${STARPU_INCLUDE_DIRS_DEP}") +foreach (libdir ${STARPU_LIBRARY_DIRS_DEP}) + list(APPEND CMAKE_REQUIRED_FLAGS "-L${libdir}") +endforeach () +set(CMAKE_REQUIRED_LIBRARIES "${STARPU_LIBRARIES_DEP}") + +message(STATUS "starpu done") diff --git a/cmake/ImportSTARSH.cmake b/cmake/ImportSTARSH.cmake new file mode 100644 index 0000000..eee0553 --- /dev/null +++ b/cmake/ImportSTARSH.cmake @@ -0,0 +1,86 @@ +# Copyright (c) 2017-2023 King Abdullah University of Science and Technology, +# Copyright (c) 2023 by Brightskies inc, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @brief Find and include STARSH library as a dependency. +# @version 1.0.0 +# @author Sameh Abdulah +# @date 2023-03-13 + +message("") +message("---------------------------------------- Stars-H") +message(STATUS "Checking for STARSH") +include(macros/BuildDependency) + +if (NOT TARGET STARSH_FOUND) + # Try to find STARSH. + include(FindPkgConfig) + find_package(PkgConfig QUIET) + find_package(STARSH QUIET) + + # If STARSH is found, print its location. + if (STARSH_FOUND) + message(" Found STARSH: ${STARSH_LIBRARIES}") + # If not found, install it. + else() + message(" Can't find STARSH, Installing it instead ..") + set(FLAGS -DCMAKE_INSTALL_PREFIX=${PROJECT_SOURCE_DIR}/installdir/_deps/STARSH/ \-DSTARPU=OFF \-DMPI=${USE_MPI}) + set(ISCMAKE ON) + set(ISGIT ON) + set(AUTO_GEN OFF) + BuildDependency(STARSH "https://github.com/ecrc/stars-h.git" "v0.3.1" ${FLAGS} ${ISCMAKE} ${ISGIT} ${AUTO_GEN}) + set(FLAGS "") + unset(STARSH_DIR) + find_package(STARSH REQUIRED) + endif() +else() + message(" STARSH already included") +endif() + +# Include STARSH headers. +include_directories(${STARSH_INCLUDE_DIRS_DEP}) + +# Set linker flags and library directories. +if (STARSH_LINKER_FLAGS) + list(APPEND CMAKE_EXE_LINKER_FLAGS "${STARSH_LINKER_FLAGS}") +endif () +if (STARSH_LIBRARY_DIRS) + list(APPEND CMAKE_INSTALL_RPATH "${STARSH_LIBRARY_DIRS}") +endif () + +# Check if GSL is a dependency of STARSH and add it if needed. +if (STARSH_LIBRARIES) + find_library(_STARSH_LIB NAME starsh PATHS ${STARSH_LIBRARY_DIRS}) + if (_STARSH_LIB AND NOT "${STARSH_LIBRARIES_DEP}" MATCHES "gsl") + execute_process(COMMAND nm ${_STARSH_LIB} COMMAND grep gsl RESULT_VARIABLE GSL_IN_STARSH) + if (${GSL_IN_STARSH} EQUAL 0) + message(STATUS "STARSH depends on gsl. Adding it to dependency list") + find_package(GSL REQUIRED) + if (GSL_FOUND) + if (STARSH_LIBRARIES_DEP) + list(APPEND STARSH_LIBRARIES_DEP ${GSL_LIBRARIES}) + else () + list(APPEND STARSH_LIBRARIES ${GSL_LIBRARIES}) + endif () + endif () + endif () + endif () + + # Add STARSH libraries to the project. + if (STARSH_LIBRARIES_DEP) + list(APPEND LIBS ${STARSH_LIBRARIES_DEP}) + link_directories(${STARSH_LIBRARY_DIRS_DEP}) + link_directories(${STARSH_LIBRARIES_DEP}) + else () + list(APPEND LIBS ${STARSH_LIBRARIES}) + link_directories(${STARSH_LIBRARIES}) + endif () + + list(APPEND LIBS ${STARSH_LIBRARIES} ) + link_directories(${STARSH_LIBRARY_DIRS_DEP}) + include_directories(${STARSH_INCLUDE_DIRS}) +endif() + +message(STATUS "StarsH Done") \ No newline at end of file diff --git a/cmake/MKLConfig.cmake b/cmake/MKLConfig.cmake new file mode 100644 index 0000000..b31d55c --- /dev/null +++ b/cmake/MKLConfig.cmake @@ -0,0 +1,850 @@ +#=============================================================================== +# Copyright 2021-2022 Intel Corporation. +# +# This software and the related documents are Intel copyrighted materials, and +# your use of them is governed by the express license under which they were +# provided to you (License). Unless the License provides otherwise, you may not +# use, modify, copy, publish, distribute, disclose or transmit this software or +# the related documents without Intel's prior written permission. +# +# This software and the related documents are provided as is, with no express +# or implied warranties, other than those that are expressly stated in the +# License. +#=============================================================================== + +#=================================================================== +# CMake Config file for Intel(R) oneAPI Math Kernel Library (oneMKL) +#=================================================================== + +#=============================================================================== +# Input parameters +#================= +#------------- +# Main options +#------------- +# MKL_ROOT: oneMKL root directory (May be required for non-standard install locations. Optional otherwise.) +# Default: use location from MKLROOT environment variable or /../../../ if MKLROOT is not defined +# MKL_ARCH +# Values: ia32 intel64 +# Default: intel64 +# MKL_LINK +# Values: static, dynamic, sdl +# Default: dynamic +# Exceptions:- DPC++ doesn't support sdl +# MKL_THREADING +# Values: sequential, +# intel_thread (Intel OpenMP), +# gnu_thread (GNU OpenMP), +# pgi_thread (PGI OpenMP), +# tbb_thread +# Default: intel_thread +# Exceptions:- DPC++ defaults to tbb, PGI compiler on Windows defaults to pgi_thread +# MKL_INTERFACE (for MKL_ARCH=intel64 only) +# Values: lp64, ilp64 +# GNU or INTEL interface will be selected based on Compiler. +# Default: ilp64 +# MKL_MPI +# Values: intelmpi, mpich, openmpi, msmpi, mshpc +# Default: intelmpi +#----------------------------------- +# Special options (OFF by default) +#----------------------------------- +# ENABLE_BLAS95: Enables BLAS Fortran95 API +# ENABLE_LAPACK95: Enables LAPACK Fortran95 API +# ENABLE_BLACS: Enables cluster BLAS library +# ENABLE_CDFT: Enables cluster DFT library +# ENABLE_CPARDISO: Enables cluster PARDISO functionality +# ENABLE_SCALAPACK: Enables cluster LAPACK library +# ENABLE_OMP_OFFLOAD: Enables OpenMP Offload functionality +# +#================== +# Output parameters +#================== +# MKL_ROOT +# oneMKL root directory. +# MKL_INCLUDE +# Use of target_include_directories() is recommended. +# INTERFACE_INCLUDE_DIRECTORIES property is set on mkl_core and mkl_rt libraries. +# Alternatively, this variable can be used directly (not recommended as per Modern CMake) +# MKL_ENV +# Provides all environment variables based on input parameters. +# Currently useful for mkl_rt linking and BLACS on Windows. +# Must be set as an ENVIRONMENT property. +# Example: +# add_test(NAME mytest COMMAND myexe) +# if(MKL_ENV) +# set_tests_properties(mytest PROPERTIES ENVIRONMENT "${MKL_ENV}") +# endif() +# +# MKL:: +# IMPORTED targets to link MKL libraries individually or when using a custom link-line. +# mkl_core and mkl_rt have INTERFACE_* properties set to them. +# Please refer to Intel(R) oneMKL Link Line Advisor for help with linking. +# +# Below INTERFACE targets provide full link-lines for direct use. +# Example: +# target_link_options( PUBLIC $) +# +# MKL::MKL +# Link line for C and Fortran API +# MKL::MKL_DPCPP +# Link line for DPC++ API +# +# Note: For Device API, library linking is not required. +# Compile options can be added from the INTERFACE_COMPILE_OPTIONS property on MKL::MKL_DPCPP +# Include directories can be added from the INTERFACE_INCLUDE_DIRECTORIES property on MKL::MKL_DPCPP +# +# Note: Output parameters' and targets' availability can change +# based on Input parameters and application project languages. +#=============================================================================== + +function(mkl_message MSG_MODE MSG_TEXT) + if(MSG_MODE STREQUAL "FATAL_ERROR") + message(${MSG_MODE} ${MSG_TEXT}) + else() + if(NOT MKL_FIND_QUIETLY) + message(${MSG_MODE} ${MSG_TEXT}) + endif() + endif() +endfunction() + +if(${CMAKE_VERSION} VERSION_LESS "3.13") + mkl_message(FATAL_ERROR "The minimum supported CMake version is 3.13. You are running version ${CMAKE_VERSION}") +endif() + +include_guard() +include(FindPackageHandleStandardArgs) + +if(NOT MKL_LIBRARIES) + +# Set CMake policies for well-defined behavior across CMake versions +cmake_policy(SET CMP0011 NEW) +cmake_policy(SET CMP0057 NEW) + +# Project Languages +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +list(APPEND MKL_LANGS C CXX Fortran) +foreach(lang ${languages}) + if(${lang} IN_LIST MKL_LANGS) + list(APPEND CURR_LANGS ${lang}) + endif() +endforeach() +list(REMOVE_DUPLICATES CURR_LANGS) + +option(ENABLE_BLAS95 "Enables BLAS Fortran95 API" OFF) +option(ENABLE_LAPACK95 "Enables LAPACK Fortran95 API" OFF) +option(ENABLE_BLACS "Enables cluster BLAS library" OFF) +option(ENABLE_CDFT "Enables cluster DFT library" OFF) +option(ENABLE_CPARDISO "Enables cluster PARDISO functionality" OFF) +option(ENABLE_SCALAPACK "Enables cluster LAPACK library" OFF) +option(ENABLE_OMP_OFFLOAD "Enables OpenMP Offload functionality" OFF) + +# Use MPI if any of these are enabled +if(ENABLE_BLACS OR ENABLE_CDFT OR ENABLE_SCALAPACK OR ENABLE_CPARDISO) + set(USE_MPI ON) +endif() + +# Check Parameters +function(define_param TARGET_PARAM DEFAULT_PARAM SUPPORTED_LIST) + if(NOT DEFINED ${TARGET_PARAM} AND NOT DEFINED ${DEFAULT_PARAM}) + mkl_message(STATUS "${TARGET_PARAM}: Undefined") + elseif(NOT DEFINED ${TARGET_PARAM} AND DEFINED ${DEFAULT_PARAM}) + set(${TARGET_PARAM} "${${DEFAULT_PARAM}}" CACHE STRING "Choose ${TARGET_PARAM} options are: ${${SUPPORTED_LIST}}") + foreach(opt ${${DEFAULT_PARAM}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(STATUS "${TARGET_PARAM}: None, set to `${STR_LIST}` by default") + elseif(${SUPPORTED_LIST}) + set(ITEM_FOUND 1) + foreach(opt ${${TARGET_PARAM}}) + if(NOT ${opt} IN_LIST ${SUPPORTED_LIST}) + set(ITEM_FOUND 0) + endif() + endforeach() + if(ITEM_FOUND EQUAL 0) + foreach(opt ${${SUPPORTED_LIST}}) + set(STR_LIST "${STR_LIST} ${opt}") + endforeach() + mkl_message(FATAL_ERROR "Invalid ${TARGET_PARAM} `${${TARGET_PARAM}}`, options are: ${STR_LIST}") + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() + else() + mkl_message(STATUS "${TARGET_PARAM}: ${${TARGET_PARAM}}") + endif() +endfunction() + +#================ +# Compiler checks +#================ + +if(CMAKE_C_COMPILER) + get_filename_component(C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME) +endif() +if(CMAKE_CXX_COMPILER) + get_filename_component(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME) +endif() +if(CMAKE_Fortran_COMPILER) + get_filename_component(Fortran_COMPILER_NAME ${CMAKE_Fortran_COMPILER} NAME) +endif() + +# Determine Compiler Family +if(CXX_COMPILER_NAME STREQUAL "dpcpp" OR CXX_COMPILER_NAME STREQUAL "dpcpp.exe" + OR CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + set(DPCPP_COMPILER ON) +endif() +if(C_COMPILER_NAME MATCHES "^clang") + set(CLANG_COMPILER ON) +endif() +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + set(PGI_COMPILER ON) +elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" + OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_Fortran_COMPILER_ID STREQUAL "IntelLLVM") + set(INTEL_COMPILER ON) +else() + if(CMAKE_C_COMPILER_ID STREQUAL "GNU") + set(GNU_C_COMPILER ON) + endif() + if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + set(GNU_Fortran_COMPILER ON) + endif() +endif() + +if(USE_MPI AND (C_COMPILER_NAME MATCHES "^mpi" OR Fortran_COMPILER_NAME MATCHES "^mpi")) + set(USE_MPI_SCRIPT ON) +endif() + +#================ + +#================ +# System-specific +#================ + +# Extensions +if(UNIX) + set(LIB_PREFIX "lib") + set(LIB_EXT ".a") + set(DLL_EXT ".so") + if(APPLE) + set(DLL_EXT ".dylib") + endif() + set(LINK_PREFIX "-l") + set(LINK_SUFFIX "") +else() + set(LIB_PREFIX "") + set(LIB_EXT ".lib") + set(DLL_EXT "_dll.lib") + set(LINK_PREFIX "") + set(LINK_SUFFIX ".lib") +endif() + +# Set target system architecture +set(DEFAULT_MKL_ARCH intel64) +if(DPCPP_COMPILER OR PGI_COMPILER OR ENABLE_OMP_OFFLOAD OR USE_MPI) + set(MKL_ARCH_LIST intel64) +else() + set(MKL_ARCH_LIST ia32 intel64) +endif() +define_param(MKL_ARCH DEFAULT_MKL_ARCH MKL_ARCH_LIST) + +#================ + +#========== +# Setup MKL +#========== + +# Set MKL_ROOT directory +if(NOT DEFINED MKL_ROOT) + if(DEFINED ENV{MKLROOT}) + set(MKL_ROOT $ENV{MKLROOT}) + else() + get_filename_component(MKL_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" REALPATH) + get_filename_component(MKL_ROOT "${MKL_CMAKE_PATH}/../../../" ABSOLUTE) + mkl_message(STATUS "MKL_ROOT ${MKL_ROOT}") + endif() +endif() +string(REPLACE "\\" "/" MKL_ROOT ${MKL_ROOT}) + +# Define MKL_LINK +set(DEFAULT_MKL_LINK dynamic) +if(DPCPP_COMPILER OR USE_MPI) + set(MKL_LINK_LIST static dynamic) +else() + set(MKL_LINK_LIST static dynamic sdl) +endif() +define_param(MKL_LINK DEFAULT_MKL_LINK MKL_LINK_LIST) + +# Define MKL_INTERFACE +if(MKL_ARCH STREQUAL "intel64") + set(IFACE_TYPE intel) + if(GNU_Fortran_COMPILER) + set(IFACE_TYPE gf) + endif() + if(DPCPP_COMPILER) + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL intel_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE intel_ilp64) + set(MKL_INTERFACE_LIST intel_ilp64) + else() + if(MKL_INTERFACE) + set(MKL_INTERFACE_FULL ${IFACE_TYPE}_${MKL_INTERFACE}) + endif() + set(DEFAULT_MKL_INTERFACE ${IFACE_TYPE}_ilp64) + set(MKL_INTERFACE_LIST ${IFACE_TYPE}_ilp64 ${IFACE_TYPE}_lp64) + endif() + define_param(MKL_INTERFACE_FULL DEFAULT_MKL_INTERFACE MKL_INTERFACE_LIST) +else() + if(WIN32) + set(MKL_INTERFACE_FULL intel_c) + elseif(NOT APPLE) + if(GNU_Fortran_COMPILER) + set(MKL_INTERFACE_FULL gf) + else() + set(MKL_INTERFACE_FULL intel) + endif() + else() + mkl_message(FATAL_ERROR "OSX does not support MKL_ARCH ia32.") + endif() +endif() +if(MKL_INTERFACE_FULL MATCHES "ilp64") + set(MKL_INTERFACE "ilp64") +else() + set(MKL_INTERFACE "lp64") +endif() + +# Define MKL headers +find_path(MKL_H mkl.h + HINTS ${MKL_ROOT} + PATH_SUFFIXES include) +list(APPEND MKL_INCLUDE ${MKL_H}) + +# Add pre-built F95 Interface Modules +if(INTEL_COMPILER AND (ENABLE_BLAS95 OR ENABLE_LAPACK95)) + if(MKL_ARCH STREQUAL "intel64") + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}/${MKL_INTERFACE}") + else() + list(APPEND MKL_INCLUDE "${MKL_ROOT}/include/${MKL_ARCH}") + endif() +endif() + +# Define MKL_THREADING +# All APIs support sequential threading +set(MKL_THREADING_LIST "sequential" "intel_thread" "tbb_thread") +set(DEFAULT_MKL_THREADING intel_thread) +# DPC++ API supports TBB threading, but not OpenMP threading +if(DPCPP_COMPILER) + set(DEFAULT_MKL_THREADING tbb_thread) + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) +# C, Fortran API +elseif(PGI_COMPILER) + # PGI compiler supports PGI OpenMP threading, additionally + list(APPEND MKL_THREADING_LIST pgi_thread) + # PGI compiler does not support TBB threading + list(REMOVE_ITEM MKL_THREADING_LIST tbb_thread) + if(WIN32) + # PGI 19.10 and 20.1 on Windows, do not support Intel OpenMP threading + list(REMOVE_ITEM MKL_THREADING_LIST intel_thread) + set(DEFAULT_MKL_THREADING pgi_thread) + endif() +elseif(GNU_C_COMPILER OR GNU_Fortran_COMPILER OR CLANG_COMPILER) + list(APPEND MKL_THREADING_LIST gnu_thread) +else() + # Intel and Microsoft compilers + # Nothing to do, only for completeness +endif() +define_param(MKL_THREADING DEFAULT_MKL_THREADING MKL_THREADING_LIST) + +# Define MKL_MPI +set(DEFAULT_MKL_MPI intelmpi) +if(UNIX) + if(APPLE) + # Override defaults for OSX + set(DEFAULT_MKL_MPI mpich) + set(MKL_MPI_LIST mpich) + else() + set(MKL_MPI_LIST intelmpi openmpi mpich mpich2) + endif() +else() + # Windows + set(MKL_MPI_LIST intelmpi mshpc msmpi) +endif() +define_param(MKL_MPI DEFAULT_MKL_MPI MKL_MPI_LIST) +# MSMPI is now called MSHPC. MSMPI option exists for backward compatibility. +if(MKL_MPI STREQUAL "mshpc") + set(MKL_MPI msmpi) +endif() +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_MPI) + +# Checkpoint - Verify if required options are defined +find_package_handle_standard_args(MKL REQUIRED_VARS MKL_ROOT MKL_ARCH MKL_INCLUDE MKL_LINK MKL_THREADING MKL_INTERFACE_FULL) + +# Provides a list of IMPORTED targets for the project +if(NOT DEFINED MKL_IMPORTED_TARGETS) + set(MKL_IMPORTED_TARGETS "") +endif() + +# Clear temporary variables +set(MKL_C_COPT "") +set(MKL_F_COPT "") +set(MKL_SDL_COPT "") +set(MKL_CXX_COPT "") +set(MKL_DPCPP_COPT "") +set(MKL_DPCPP_LOPT "") +set(MKL_OFFLOAD_COPT "") +set(MKL_OFFLOAD_LOPT "") + +set(MKL_SUPP_LINK "") # Other link options. Usually at the end of the link-line. +set(MKL_LINK_LINE) # For MPI only +set(MKL_ENV_PATH "") # Temporary variable to work with PATH +set(MKL_ENV "") # Exported environment variables + +# Modify PATH variable to make it CMake-friendly +set(OLD_PATH $ENV{PATH}) +string(REPLACE ";" "\;" OLD_PATH "${OLD_PATH}") + +# Compiler options +if(GNU_C_COMPILER OR GNU_Fortran_COMPILER) + if(MKL_ARCH STREQUAL "ia32") + list(APPEND MKL_C_COPT -m32) + list(APPEND MKL_F_COPT -m32) + else() + list(APPEND MKL_C_COPT -m64) + list(APPEND MKL_F_COPT -m64) + endif() +endif() + +# Additonal compiler & linker options +if(CXX_COMPILER_NAME STREQUAL "icpx" OR CXX_COMPILER_NAME STREQUAL "icx.exe") + list(APPEND MKL_DPCPP_COPT "-fsycl") + list(APPEND MKL_DPCPP_LOPT "-fsycl") +endif() +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(MKL_LINK STREQUAL "static") + list(APPEND MKL_DPCPP_LOPT "-fsycl-device-code-split=per_kernel") + list(APPEND MKL_OFFLOAD_LOPT "-fsycl-device-code-split=per_kernel") + endif() +endif() + +# For OpenMP Offload +if(ENABLE_OMP_OFFLOAD) + if(WIN32) + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64 -Qopenmp-version:51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else() + list(APPEND MKL_OFFLOAD_COPT -Qiopenmp -Qopenmp-targets:spir64) + endif() + # -MD and -MDd are manually added here because offload functionality uses DPC++ runtime. + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_OFFLOAD_COPT -MDd) + else() + list(APPEND MKL_OFFLOAD_COPT -MD) + endif() + list(APPEND MKL_OFFLOAD_LOPT -Qiopenmp -Qopenmp-targets:spir64 -fsycl) + set(SKIP_LIBPATH ON) + else() + if(OPENMP_VERSION VERSION_GREATER_EQUAL "5.1") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -DONEMKL_USE_OPENMP_VERSION=202011) + else() + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64 -fopenmp-version=51 -DONEMKL_USE_OPENMP_VERSION=202011) + endif() + else () + list(APPEND MKL_OFFLOAD_COPT -fiopenmp -fopenmp-targets=spir64) + endif() + list(APPEND MKL_OFFLOAD_LOPT -fiopenmp -fopenmp-targets=spir64 -fsycl) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() +endif() + +# For selected Interface +if(MKL_INTERFACE_FULL) + if(MKL_ARCH STREQUAL "ia32") + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU") + endif() + else() + if(GNU_Fortran_COMPILER) + set(MKL_SDL_IFACE_ENV "GNU,${MKL_INTERFACE}") + else() + set(MKL_SDL_IFACE_ENV "${MKL_INTERFACE}") + endif() + if(MKL_INTERFACE STREQUAL "ilp64") + if("Fortran" IN_LIST CURR_LANGS) + if(INTEL_COMPILER) + if(WIN32) + list(APPEND MKL_F_COPT "-4I8") + else() + list(APPEND MKL_F_COPT "-i8") + endif() + elseif(GNU_Fortran_COMPILER) + list(APPEND MKL_F_COPT "-fdefault-integer-8") + elseif(PGI_COMPILER) + list(APPEND MKL_F_COPT "-i8") + endif() + endif() + list(INSERT MKL_C_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_SDL_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_CXX_COPT 0 "-DMKL_ILP64") + list(INSERT MKL_OFFLOAD_COPT 0 "-DMKL_ILP64") + else() + # lp64 + endif() + endif() + if(MKL_SDL_IFACE_ENV) + string(TOUPPER ${MKL_SDL_IFACE_ENV} MKL_SDL_IFACE_ENV) + endif() +endif() # MKL_INTERFACE_FULL + +# All MKL Libraries +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + set(MKL_SYCL mkl_sycld) +else() + set(MKL_SYCL mkl_sycl) +endif() +set(MKL_IFACE_LIB mkl_${MKL_INTERFACE_FULL}) +set(MKL_CORE mkl_core) +if(WIN32 AND CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo" AND MKL_THREADING STREQUAL "tbb_thread") + set(MKL_THREAD mkl_tbb_threadd) +else() + set(MKL_THREAD mkl_${MKL_THREADING}) +endif() +set(MKL_SDL mkl_rt) +if(MKL_ARCH STREQUAL "ia32") + set(MKL_BLAS95 mkl_blas95) + set(MKL_LAPACK95 mkl_lapack95) +else() + set(MKL_BLAS95 mkl_blas95_${MKL_INTERFACE}) + set(MKL_LAPACK95 mkl_lapack95_${MKL_INTERFACE}) +endif() +# BLACS +set(MKL_BLACS mkl_blacs_${MKL_MPI}_${MKL_INTERFACE}) +if(UNIX AND NOT APPLE AND MKL_MPI MATCHES "mpich") + # MPICH is compatible with INTELMPI Wrappers on Linux + set(MKL_BLACS mkl_blacs_intelmpi_${MKL_INTERFACE}) +endif() +if(WIN32) + if(MKL_MPI STREQUAL "msmpi") + if("Fortran" IN_LIST CURR_LANGS) + list(APPEND MKL_SUPP_LINK "msmpifec.lib") + endif() + # MSMPI and MSHPC are supported with the same BLACS library + set(MKL_BLACS mkl_blacs_msmpi_${MKL_INTERFACE}) + if(NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV MSMPI) + endif() + elseif(MKL_MPI STREQUAL "intelmpi" AND NOT MKL_LINK STREQUAL "static") + set(MKL_BLACS mkl_blacs_${MKL_INTERFACE}) + set(MKL_BLACS_ENV INTELMPI) + endif() +endif() +# CDFT & SCALAPACK +set(MKL_CDFT mkl_cdft_core) +set(MKL_SCALAPACK mkl_scalapack_${MKL_INTERFACE}) + + +if (UNIX) + if(NOT APPLE) + if(MKL_LINK STREQUAL "static") + set(START_GROUP "-Wl,--start-group") + set(END_GROUP "-Wl,--end-group") + if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + set(EXPORT_DYNAMIC "-Wl,-export-dynamic") + endif() + elseif(MKL_LINK STREQUAL "dynamic") + set(MKL_RPATH "-Wl,-rpath=$") + if((GNU_Fortran_COMPILER OR PGI_COMPILER) AND "Fortran" IN_LIST CURR_LANGS) + set(NO_AS_NEEDED -Wl,--no-as-needed) + endif() + else() + set(MKL_RPATH "-Wl,-rpath=$") + endif() + endif() +endif() + +# Create a list of requested libraries, based on input options (MKL_LIBRARIES) +# Create full link-line in MKL_LINK_LINE +list(APPEND MKL_LINK_LINE $,${MKL_OFFLOAD_LOPT},> + $,${MKL_DPCPP_LOPT},> ${EXPORT_DYNAMIC} ${NO_AS_NEEDED} ${MKL_RPATH}) +if(ENABLE_BLAS95) + list(APPEND MKL_LIBRARIES ${MKL_BLAS95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLAS95}) +endif() +if(ENABLE_LAPACK95) + list(APPEND MKL_LIBRARIES ${MKL_LAPACK95}) + list(APPEND MKL_LINK_LINE MKL::${MKL_LAPACK95}) +endif() +if(ENABLE_SCALAPACK) + list(APPEND MKL_LIBRARIES ${MKL_SCALAPACK}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SCALAPACK}) +endif() +if(DPCPP_COMPILER OR (ENABLE_OMP_OFFLOAD AND NOT MKL_LINK STREQUAL "sdl")) + list(APPEND MKL_LIBRARIES ${MKL_SYCL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SYCL}) +endif() +list(APPEND MKL_LINK_LINE ${START_GROUP}) +if(ENABLE_CDFT) + list(APPEND MKL_LIBRARIES ${MKL_CDFT}) + list(APPEND MKL_LINK_LINE MKL::${MKL_CDFT}) +endif() +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_LIBRARIES ${MKL_SDL}) + list(APPEND MKL_LINK_LINE MKL::${MKL_SDL}) +else() + list(APPEND MKL_LIBRARIES ${MKL_IFACE_LIB} ${MKL_THREAD} ${MKL_CORE}) + list(APPEND MKL_LINK_LINE MKL::${MKL_IFACE_LIB} MKL::${MKL_THREAD} MKL::${MKL_CORE}) +endif() +if(USE_MPI) + list(APPEND MKL_LIBRARIES ${MKL_BLACS}) + list(APPEND MKL_LINK_LINE MKL::${MKL_BLACS}) +endif() +list(APPEND MKL_LINK_LINE ${END_GROUP}) + +# Find all requested libraries +foreach(lib ${MKL_LIBRARIES}) + unset(${lib}_file CACHE) + if(MKL_LINK STREQUAL "static" AND NOT ${lib} STREQUAL ${MKL_SDL}) + find_library(${lib}_file ${LIB_PREFIX}${lib}${LIB_EXT} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} STATIC IMPORTED) + else() + find_library(${lib}_file NAMES ${LIB_PREFIX}${lib}${DLL_EXT} ${lib} + PATHS ${MKL_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}") + add_library(MKL::${lib} SHARED IMPORTED) + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS ${lib}_file) + # CMP0111, implemented in CMake 3.20+ requires a shared library target on Windows + # to be defined with IMPLIB and LOCATION property. + # It also requires a static library target to be defined with LOCATION property. + # Setting the policy to OLD usage, using cmake_policy() does not work as of 3.20.0, hence the if-else below. + if(WIN32 AND NOT MKL_LINK STREQUAL "static") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_IMPLIB "${${lib}_file}") + # Find corresponding DLL + set(MKL_DLL_GLOB ${lib}.*.dll) + file(GLOB MKL_DLL_FILE "${MKL_ROOT}/redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/${MKL_DLL_GLOB}" + "${MKL_ROOT}/../redist/${MKL_ARCH}/mkl/${MKL_DLL_GLOB}") + if(NOT ${lib} STREQUAL ${MKL_IFACE_LIB} AND NOT ${lib} STREQUAL ${MKL_BLAS95} AND NOT ${lib} STREQUAL ${MKL_LAPACK95}) # Windows IFACE libs are static only + list(LENGTH MKL_DLL_FILE MKL_DLL_FILE_LEN) + if(MKL_DLL_FILE_LEN) + # in case multiple versions of the same dll are found, select the highest version + list(SORT MKL_DLL_FILE) + list(REVERSE MKL_DLL_FILE) + list(GET MKL_DLL_FILE 0 MKL_DLL_FILE) + + mkl_message(STATUS "Found DLL: ${MKL_DLL_FILE}") + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${MKL_DLL_FILE}") + else() + mkl_message(FATAL_ERROR "${MKL_DLL_GLOB} not found") + endif() + endif() + else() + set_target_properties(MKL::${lib} PROPERTIES IMPORTED_LOCATION "${${lib}_file}") + endif() + list(APPEND MKL_IMPORTED_TARGETS MKL::${lib}) +endforeach() + +# Threading selection +if(MKL_THREADING) + if(MKL_THREADING STREQUAL "tbb_thread") + find_package(TBB REQUIRED CONFIG COMPONENTS tbb) + set(MKL_THREAD_LIB $) + set(MKL_SDL_THREAD_ENV "TBB") + get_property(TBB_LIB TARGET TBB::tbb PROPERTY IMPORTED_LOCATION_RELEASE) + get_filename_component(TBB_LIB_DIR ${TBB_LIB} DIRECTORY) + if(UNIX) + if(CMAKE_SKIP_BUILD_RPATH) + set(TBB_LINK "-L${TBB_LIB_DIR} -ltbb") + else() + set(TBB_LINK "-Wl,-rpath,${TBB_LIB_DIR} -L${TBB_LIB_DIR} -ltbb") + endif() + list(APPEND MKL_SUPP_LINK ${TBB_LINK}) + if(APPLE) + list(APPEND MKL_SUPP_LINK -lc++) + else() + list(APPEND MKL_SUPP_LINK -lstdc++) + endif() + endif() + if(WIN32 OR APPLE) + set(MKL_ENV_PATH ${TBB_LIB_DIR}) + endif() + elseif(MKL_THREADING MATCHES "_thread") + if(MKL_THREADING STREQUAL "pgi_thread") + list(APPEND MKL_SUPP_LINK -mp -pgf90libs) + set(MKL_SDL_THREAD_ENV "PGI") + elseif(MKL_THREADING STREQUAL "gnu_thread") + list(APPEND MKL_SUPP_LINK -lgomp) + set(MKL_SDL_THREAD_ENV "GNU") + else() + # intel_thread + if(UNIX) + set(MKL_OMP_LIB iomp5) + set(LIB_EXT ".so") + if(APPLE) + set(LIB_EXT ".dylib") + endif() + else() + set(MKL_OMP_LIB libiomp5md) + endif() + set(MKL_SDL_THREAD_ENV "INTEL") + set(OMP_LIBNAME ${LIB_PREFIX}${MKL_OMP_LIB}${LIB_EXT}) + + find_library(OMP_LIBRARY ${OMP_LIBNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "lib" "lib/${MKL_ARCH}" + "lib/${MKL_ARCH}_lin" "lib/${MKL_ARCH}_win" + "linux/compiler/lib/${MKL_ARCH}" + "linux/compiler/lib/${MKL_ARCH}_lin" + "windows/compiler/lib/${MKL_ARCH}" + "windows/compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}_lin" "../compiler/lib/${MKL_ARCH}_win" + "../compiler/lib/${MKL_ARCH}" "../compiler/lib" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/linux/compiler/lib/${MKL_ARCH}_lin" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}" + "../../compiler/latest/windows/compiler/lib/${MKL_ARCH}_win" + "../../compiler/latest/mac/compiler/lib") + if(WIN32) + set(OMP_DLLNAME ${LIB_PREFIX}${MKL_OMP_LIB}.dll) + find_path(OMP_DLL_DIR ${OMP_DLLNAME} + HINTS $ENV{LIB} $ENV{LIBRARY_PATH} $ENV{MKLROOT} ${MKL_ROOT} ${CMPLR_ROOT} + PATH_SUFFIXES "redist/${MKL_ARCH}" + "redist/${MKL_ARCH}_win" "redist/${MKL_ARCH}_win/compiler" + "../redist/${MKL_ARCH}/compiler" "../compiler/lib" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/redist/${MKL_ARCH}_win/compiler" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win" + "../../compiler/latest/windows/compiler/redist/${MKL_ARCH}_win/compiler") + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_DLL_DIR) + set(MKL_ENV_PATH "${OMP_DLL_DIR}") + endif() + + if(WIN32 AND SKIP_LIBPATH) + # Only for Intel OpenMP Offload + set(OMP_LINK "libiomp5md.lib") + else() + set(OMP_LINK "${OMP_LIBRARY}") + if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + # Disable PGI OpenMP runtime for correct work of Intel OpenMP runtime + list(APPEND MKL_SUPP_LINK -nomp) + endif() + endif() + find_package_handle_standard_args(MKL REQUIRED_VARS OMP_LIBRARY OMP_LINK) + set(MKL_THREAD_LIB ${OMP_LINK}) + endif() + else() + # Sequential threading + set(MKL_SDL_THREAD_ENV "SEQUENTIAL") + endif() +endif() # MKL_THREADING + +if (UNIX) + list(APPEND MKL_SUPP_LINK -lm -ldl -lpthread) +endif() + +if(DPCPP_COMPILER OR ENABLE_OMP_OFFLOAD) + if(WIN32) + # Detect sycl library version + if(NOT DEFINED SYCL_LIB_VER_CACHE) + set(SYCL_LIB_VER "") + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(NOT SYCL_LIB_DIR) + foreach(ver RANGE 6 99) + find_library(SYCL_LIB_DIR ${LIB_PREFIX}sycl${ver}${LIB_EXT} + HINTS $ENV{LIB} $ENV{CMPLR_ROOT} + PATH_SUFFIXES "windows/lib") + if(SYCL_LIB_DIR) + set(SYCL_LIB_VER ${ver}) + break() + endif() + endforeach() + endif() + set(SYCL_LIB_VER_CACHE ${SYCL_LIB_VER} CACHE STRING "") + endif() + + if(CMAKE_BUILD_TYPE MATCHES "Debug|DebInfo") + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}d${LINK_SUFFIX}) + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${SYCL_LIB_VER_CACHE}${LINK_SUFFIX}) + endif() + else() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}sycl${LINK_SUFFIX}) + endif() + list(APPEND MKL_SUPP_LINK ${LINK_PREFIX}OpenCL${LINK_SUFFIX}) +endif() + +# Setup link types based on input options +set(LINK_TYPES "") + +if(DPCPP_COMPILER) + add_library(MKL::MKL_DPCPP INTERFACE IMPORTED GLOBAL) + target_compile_options(MKL::MKL_DPCPP INTERFACE ${MKL_DPCPP_COPT}) + target_link_libraries(MKL::MKL_DPCPP INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) + list(APPEND LINK_TYPES MKL::MKL_DPCPP) +endif() +# Single target for all C, Fortran link-lines +add_library(MKL::MKL INTERFACE IMPORTED GLOBAL) +target_compile_options(MKL::MKL INTERFACE + $<$,C>:${MKL_C_COPT}> + $<$,Fortran>:${MKL_F_COPT}> + $<$,CXX>:${MKL_CXX_COPT}> + $,${MKL_OFFLOAD_COPT},>) +target_link_libraries(MKL::MKL INTERFACE ${MKL_LINK_LINE} ${MKL_THREAD_LIB} ${MKL_SUPP_LINK}) +list(APPEND LINK_TYPES MKL::MKL) + +foreach(link ${LINK_TYPES}) + # Set properties on all INTERFACE targets + target_include_directories(${link} BEFORE INTERFACE "${MKL_INCLUDE}") + list(APPEND MKL_IMPORTED_TARGETS ${link}) +endforeach(link) # LINK_TYPES + +if(MKL_LINK STREQUAL "sdl") + list(APPEND MKL_ENV "MKL_INTERFACE_LAYER=${MKL_SDL_IFACE_ENV}" "MKL_THREADING_LAYER=${MKL_SDL_THREAD_ENV}") +endif() +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + list(APPEND MKL_ENV "MKL_BLACS_MPI=${MKL_BLACS_ENV}") +endif() + +# Add MKL dynamic libraries if RPATH is not defined on Unix +if(UNIX AND CMAKE_SKIP_BUILD_RPATH) + if(MKL_LINK STREQUAL "sdl") + set(MKL_LIB_DIR $) + else() + set(MKL_LIB_DIR $) + endif() + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{DYLD_LIBRARY_PATH}") + else() + list(APPEND MKL_ENV "LD_LIBRARY_PATH=${MKL_LIB_DIR}\;$ENV{LD_LIBRARY_PATH}") + endif() +endif() + +# Add MKL dynamic libraries to PATH on Windows +if(WIN32 AND NOT MKL_LINK STREQUAL "static") + get_filename_component(MKL_DLL_DIR ${MKL_DLL_FILE} DIRECTORY) + set(MKL_ENV_PATH "${MKL_DLL_DIR}\;${MKL_ENV_PATH}") +endif() + +if(MKL_ENV_PATH) + list(APPEND MKL_ENV "PATH=${MKL_ENV_PATH}\;${OLD_PATH}") + if(APPLE) + list(APPEND MKL_ENV "DYLD_LIBRARY_PATH=${MKL_ENV_PATH}\:${OLD_PATH}") + endif() +endif() + +unset(MKL_DLL_FILE) + +endif() # MKL_LIBRARIES diff --git a/cmake/macros/BuildDependency.cmake b/cmake/macros/BuildDependency.cmake new file mode 100644 index 0000000..8340375 --- /dev/null +++ b/cmake/macros/BuildDependency.cmake @@ -0,0 +1,110 @@ +# Copyright (c) 2017-2023 King Abdullah University of Science and Technology, +# Copyright (c) 2023 by Brightskies inc, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file BuildDependency.cmake +# @brief Fetches, builds, and installs a dependency. +# @version 1.0.0 +# @author Sameh Abdulah +# @date 2023-03-12 + +# @param raw_name The name of the dependency. +# @param url The URL from which to fetch the dependency. +# @param tag The version or tag of the dependency to fetch. +# @param ${FLAGS} Additional flags to pass to the configure/make commands. +# @param ${ISCMAKE} A boolean flag indicating whether the dependency uses CMake as its build system. +# @param ${ISGIT} A boolean flag indicating whether the dependency is hosted on a git repository. + +# This macro fetches the dependency using CMake's FetchContent module, and then builds and installs it. +# It also sets several environment variables (LD_LIBRARY_PATH, LIBRARY_PATH, CPATH, PKG_CONFIG_PATH, +# and ${capital_name}_DIR) and includes and links to the installation directory of the dependency. + +# After building and installing the dependency, the macro installs the lib, include, and share directories in the current directory. +macro(BuildDependency raw_name url tag ${FLAGS} ${ISCMAKE} ${ISGIT} ${AUTO_GEN}) + # Set the name of the dependency. + string(TOLOWER ${raw_name} name) + string(TOUPPER ${raw_name} capital_name) + + # Fetch the dependency, depending on whether it's a git repo or not. + message(STATUS "Fetching ${name} ${tag} from ${url}") + include(FetchContent) + set(FETCHCONTENT_BASE_DIR ${PROJECT_SOURCE_DIR}/installdir/_deps/${capital_name}/) + if (ISGIT) + FetchContent_Declare(${name} GIT_REPOSITORY "${url}" GIT_TAG "${tag}") + else() + FetchContent_Declare(${name} URL "${url}") + endif () + FetchContent_Populate(${name}) + + # Set up build paths and create directory for build artifacts. + set(${name}_srcpath ${PROJECT_SOURCE_DIR}/installdir/_deps/${capital_name}/${name}-src) + set(${name}_binpath ${${name}_srcpath}/bin) + set(${name}_installpath ${PROJECT_SOURCE_DIR}/installdir/_deps/${capital_name}) + file(MAKE_DIRECTORY ${${name}_binpath}) + + # Configure subproject. + if (ISCMAKE) + execute_process(COMMAND ${CMAKE_COMMAND} ${FLAGS} + ${${name}_srcpath} + WORKING_DIRECTORY + ${${name}_binpath}) + else() + if (AUTO_GEN) + execute_process(COMMAND ./autogen.sh + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + endif() + execute_process(COMMAND ./configure ${FLAGS} + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + endif () + + # Build and install subproject. + include(ProcessorCount) + ProcessorCount(N) + if (ISCMAKE) + execute_process(COMMAND make -j ${N} + WORKING_DIRECTORY ${${name}_binpath} + COMMAND_ERROR_IS_FATAL ANY) + execute_process(COMMAND make install -j ${N} + WORKING_DIRECTORY ${${name}_binpath} + COMMAND_ERROR_IS_FATAL ANY) + else() + execute_process(COMMAND make -j ${N} + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + execute_process(COMMAND make install -j ${N} + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + endif () + + # Set environment variables and include/link to the installation directory of the dependency. + set(ENV{LD_LIBRARY_PATH} "${${name}_installpath}/lib:${${name}_installpath}/lib64:$ENV{LD_LIBRARY_PATH}") + set(ENV{LIBRARY_PATH} "${${name}_installpath}/lib:${${name}_installpath}/lib64:$ENV{LIBRARY_PATH}") + set(ENV{CPATH} "${${name}_installpath}/include:$ENV{CPATH}") + set(ENV{PKG_CONFIG_PATH} "${${name}_installpath}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") + set(${capital_name}_DIR "${${name}_installpath}") + include_directories(${${name}_installpath}/include) + link_directories(${${name}_installpath}/lib) + + # Install the dependency's lib, include, and share directories in the current directory. + install( + DIRECTORY + "${${name}_installpath}/lib" + DESTINATION + . + ) + install( + DIRECTORY + "${${name}_installpath}/include" + DESTINATION + . + ) + install( + DIRECTORY + "${${name}_installpath}/share" + DESTINATION + . + ) +endmacro() \ No newline at end of file diff --git a/cmake/macros/BuildSTARPU.cmake b/cmake/macros/BuildSTARPU.cmake new file mode 100644 index 0000000..c739dcb --- /dev/null +++ b/cmake/macros/BuildSTARPU.cmake @@ -0,0 +1,50 @@ +macro(BuildStarPU raw_name url tag) + string(TOLOWER ${raw_name} name) + string(TOUPPER ${raw_name} capital_name) + message(STATUS "Fetching ${name} ${tag} from ${url}") + include(FetchContent) + FetchContent_Declare(${name} GIT_REPOSITORY "${url}" GIT_TAG "${tag}") + FetchContent_Populate(${name}) + set(${name}_srcpath ${CMAKE_BINARY_DIR}/_deps/${name}-src) + set(${name}_binpath ${CMAKE_BINARY_DIR}/_deps/${name}-bin) + set(${name}_installpath ${CMAKE_BINARY_DIR}/_deps/${name}-install) + file(MAKE_DIRECTORY ${${name}_binpath}) + file(MAKE_DIRECTORY ${${name}_installpath}) + # Configure subproject into + include(ProcessorCount) + ProcessorCount(N) + execute_process(COMMAND ./autogen.sh + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + execute_process(COMMAND ./configure --prefix=${${name}_installpath} --disable-cuda --disable-starpufft --disable-opencl --disable-starpu-top --disable-starpufft --disable-build-doc --disable-starpufft-examples --disable-fortran --disable-glpk --with-perf-model-dir=${${name}_srcpath} --disable-fstack-protector-all --disable-gcc-extensions + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + execute_process(COMMAND make install -j ${N} + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY) + set(ENV{LD_LIBRARY_PATH} "${${name}_installpath}/lib:${${name}_installpath}/lib64:$ENV{LD_LIBRARY_PATH}") + set(ENV{LIBRARY_PATH} "${${name}_installpath}/lib:${${name}_installpath}/lib64:$ENV{LIBRARY_PATH}") + set(ENV{CPATH} "${${name}_installpath}/include:$ENV{CPATH}") + set(ENV{PKG_CONFIG_PATH} "${${name}_installpath}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") + set(${capital_name}_DIR "${${name}_installpath}") + include_directories(${${name}_installpath}/include) + link_directories(${${name}_installpath}/lib) + install( + DIRECTORY + "${${name}_installpath}/lib" + DESTINATION + ./ + ) + install( + DIRECTORY + "${${name}_installpath}/include" + DESTINATION + ./ + ) + install( + DIRECTORY + "${${name}_installpath}/share" + DESTINATION + ./ + ) +endmacro() \ No newline at end of file diff --git a/cmake/macros/FindBLASEXT.cmake b/cmake/macros/FindBLASEXT.cmake new file mode 100644 index 0000000..fb4b0fe --- /dev/null +++ b/cmake/macros/FindBLASEXT.cmake @@ -0,0 +1,382 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +### +# +# - Find BLAS EXTENDED for ECRC projects: find include dirs and libraries +# +# This module allows to find BLAS libraries by calling the official FindBLAS module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# BLAS_INCLUDE_DIRS - BLAS include directories +# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries +# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) +# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file ECRC-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Ecrc, substitute the full +# License text for the above reference.) + +# macro to factorize this call +macro(find_package_blas) + if(BLASEXT_FIND_REQUIRED) + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS REQUIRED QUIET) + else() + find_package(BLAS REQUIRED) + endif() + else() + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS QUIET) + else() + find_package(BLAS) + endif() + endif() +endmacro() + +# add a cache variable to let the user specify the BLAS vendor +set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: + Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, + Intel10_32 (intel mkl v10 32 bit), + Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), + Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), + Intel( older versions of mkl 32 and 64 bit), + ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + +if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "In FindBLASEXT") + message(STATUS "If you want to force the use of one specific library, " + "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" + "\n at cmake configure.") + message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " + "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," + "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," + "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "\n Intel( older versions of mkl 32 and 64 bit)," + "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") +endif() + +if (NOT BLAS_FOUND) + # First try to detect two cases: + # 1: only SEQ libs are handled + # 2: both SEQ and PAR libs are handled + find_package_blas() +endif () + +# detect the cases where SEQ and PAR libs are handled +if(BLA_VENDOR STREQUAL "All" AND + (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) + ) + set(BLA_VENDOR "Intel") + if(BLAS_mkl_intel_LIBRARY) + set(BLA_VENDOR "Intel10_32") + endif() + if(BLAS_mkl_intel_lp64_LIBRARY) + set(BLA_VENDOR "Intel10_64lp") + endif() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the MKL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) + set(BLA_VENDOR "ACML") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ACML." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) + set(BLA_VENDOR "IBMESSL") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +endif() + +# Intel case +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the BLAS vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + # find mkl.h inside known include paths + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + if(BLAS_INCDIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES include) + else() + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND ECRC_VERBOSE) + # Print_Find_Header_Status(blas mkl.h) + #endif () + set(BLAS_INCLUDE_DIRS "") + if(BLAS_mkl.h_INCLUDE_DIRS) + list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + # if Intel 10 64 bit -> look for sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the sequential version Intel10_64lp_seq") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "Intel10_64lp") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the multithreaded version Intel10_64lp") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + else() + + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + endif() + + # ACML case +elseif(BLA_VENDOR MATCHES "ACML*") + + ## look for the sequential version + set(BLA_VENDOR "ACML") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "ACML_MP") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + # IBMESSL case +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + +else() + + if(BLAS_FOUND) + # define the SEQ libs as the BLAS_LIBRARIES + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + + +if(BLAS_SEQ_LIBRARIES) + set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(BLAS) +set(BLAS_LIBRARY_DIRS "") +string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") +foreach(blas_lib ${BLAS_LIBRARIES}) + if (EXISTS "${blas_lib}") + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + else() + string(REPLACE "-L" "" blas_lib "${blas_lib}") + if (EXISTS "${blas_lib}") + list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) + else() + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + if (EXISTS "${a_blas_lib_dir}") + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (BLAS_LIBRARY_DIRS) + list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) +endif () + +# check that BLAS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() + else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ESSL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) +endif() diff --git a/config.sh b/config.sh new file mode 100755 index 0000000..ba0cb4d --- /dev/null +++ b/config.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# Set variables and default values +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +INSTALL_PREFIX=$PWD/installdir/hicmapp +PROJECT_SOURCE_DIR=$(dirname "$0") +BUILDING_TESTS="OFF" +BUILDING_EXAMPLES="OFF" +VERBOSE=OFF +BUILD_TYPE="RELEASE" +USE_CUDA="OFF" +USE_MPI="OFF" +HICMAPP_SCHED="Default" + + +# Parse command line options +while getopts ":tevhi:dscmoxr:" opt; do + case $opt in + i) ##### Define installation path ##### + echo -e "${YELLOW}Installation path set to $OPTARG.${NC}" + INSTALL_PREFIX=$OPTARG + ;; + t) ##### Building tests enabled ##### + echo -e "${GREEN}Building tests enabled.${NC}" + BUILDING_TESTS="ON" + ;; + e) ##### Building examples enabled ##### + echo -e "${GREEN}Building examples enabled.${NC}" + BUILDING_EXAMPLES="ON" + ;; + c)##### Using cuda enabled ##### + echo -e "${GREEN}Cuda enabled ${NC}" + USE_CUDA=ON + ;; + s) ##### Using sycl enabled ##### + echo -e "${YELLOW}Sycl enabled ${NC}" + USE_SYCL=ON + ;; + m)##### Using MPI enabled ##### + echo -e "${GREEN}MPI enabled ${NC}" + USE_MPI=ON + ;; + o)##### Using OMP enabled ##### + echo -e "${GREEN}OMP enabled ${NC}" + USE_OMP=ON + ;; + v) ##### printing full output of make ##### + echo -e "${YELLOW}printing make with details.${NC}" + VERBOSE=ON + ;; + d)##### Using debug mode to build ##### + echo -e "${RED}Debug mode enabled ${NC}" + BUILD_TYPE="DEBUG" + ;; + x) ##### Using Timer for debugging enabled ##### + echo -e "${BLUE}Timer for Debugging enabled ${NC}" + HICMAPP_USE_TIMER=ON + ;; + r) ##### Using Starpu runtime enabled ##### + echo -e "${YELLOW}Selected $OPTARG as runtime ${NC}" + HICMAPP_SCHED=$OPTARG + ;; + \?) ##### using default settings ##### + BUILDING_TESTS="OFF" + BUILDING_EXAMPLES="OFF" + VERBOSE=OFF + BUILD_TYPE="RELEASE" + USE_CUDA="OFF" + USE_MPI="OFF" + USE_SYCL="OFF" + USE_OMP="OFF" + HICMAPP_SCHED="Default" + INSTALL_PREFIX=$PWD/installdir/hicmapp + + echo -e "${RED}Building tests disabled.${NC}" + echo -e "${RED}Building examples disabled.${NC}" + echo -e "${BLUE}Installation path set to $INSTALL_PREFIX.${NC}" + ;; + :) ##### Error in an option ##### + echo "Option $OPTARG requires parameter(s)" + exit 0 + ;; + h) ##### Prints the help ##### + echo "Usage of $(basename "$0"):" + echo "" + printf "%20s %s\n" "-t :" "to enable building tests." + echo "" + printf "%20s %s\n" "-e :" "to enable building examples." + echo "" + printf "%20s %s\n" "-i [path] :" "specify installation path." + printf "%20s %s\n" "" "default = /hicmapp/installdir/hicmapp" + echo "" + exit 1 + ;; + esac +done + +echo -e "${BLUE}Installation path set to $INSTALL_PREFIX.${NC}" + +if [ -z "$BUILDING_TESTS" ]; then + BUILDING_TESTS="OFF" + echo -e "${RED}Building tests disabled.${NC}" +fi + +if [ -z "$BUILDING_EXAMPLES" ]; then + BUILDING_EXAMPLES="OFF" + echo -e "${RED}Building examples disabled.${NC}" +fi + +if [ -z "$BUILD_TYPE" ]; then + BUILD_TYPE="RELEASE" + echo -e "${GREEN}Building in release mode${NC}" +fi +if [ -z "$USE_CUDA" ]; then + USE_CUDA="OFF" + echo -e "${RED}Using CUDA disabled${NC}" +fi + +if [ -z "$USE_MPI" ]; then + USE_MPI="OFF" + echo -e "${RED}Using MPI disabled${NC}" +fi + +if [ -z "$USE_OMP" ]; then + USE_OMP="OFF" + echo -e "${RED}Using OMP disabled${NC}" +fi + +if [ -z "$USE_SYCL" ]; then + USE_SYCL="OFF" + echo -e "${RED}Using SYCL disabled${NC}" +fi + +if [ -z "$HICMAPP_USE_TIMER" ]; then + HICMAPP_USE_TIMER="OFF" + echo -e "${RED}Using Timer for debugging disabled${NC}" +fi + + +echo "" +echo -e "${YELLOW}Use -h to print the usages of hicmapp flags.${NC}" +echo "" +rm -rf bin/ +mkdir -p bin/installdir + +cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DHICMAPP_INSTALL_PREFIX="$INSTALL_PREFIX" \ + -DHICMAPP_BUILD_TESTS="$BUILDING_TESTS" \ + -DHICMAPP_BUILD_EXAMPLES="$BUILDING_EXAMPLES" \ + -DCMAKE_VERBOSE_MAKEFILE:BOOL=$VERBOSE \ + -DHICMAPP_SCHED=$HICMAPP_SCHED \ + -DUSE_OMP="$USE_OMP" \ + -DUSE_CUDA="$USE_CUDA" \ + -DUSE_SYCL="$USE_SYCL" \ + -DHICMAPP_USE_MPI="$USE_MPI" \ + -H"${PROJECT_SOURCE_DIR}" \ + -B"${PROJECT_SOURCE_DIR}/bin" \ + -G "Unix Makefiles" \ + -DHICMAPP_USE_TIMER=$HICMAPP_USE_TIMER diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..74ecc46 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,4 @@ +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.in" + "${CMAKE_CURRENT_BINARY_DIR}/config") +add_custom_target(docs COMMAND + "${DOXYGEN_EXECUTABLE}" "${CMAKE_CURRENT_BINARY_DIR}/config") diff --git a/docs/config.in b/docs/config.in new file mode 100644 index 0000000..7a6fa2f --- /dev/null +++ b/docs/config.in @@ -0,0 +1,19 @@ +PROJECT_NAME = "HICMAPP" +BUILTIN_STL_SUPPORT = YES +EXTRACT_ALL = YES +INPUT = "@CMAKE_CURRENT_SOURCE_DIR@/index.md" \ + "@CMAKE_CURRENT_SOURCE_DIR@/../src" \ + "@CMAKE_CURRENT_SOURCE_DIR@/../include" \ + "@CMAKE_CURRENT_SOURCE_DIR@/../tests" \ + "@CMAKE_CURRENT_SOURCE_DIR@/../examples" + + +OUTPUT_DIRECTORY = "@CMAKE_CURRENT_SOURCE_DIR@/" +RECURSIVE = YES +HIDE_UNDOC_RELATIONS = NO +HAVE_DOT = YES +UML_LOOK = YES +CALL_GRAPH = YES +CALLER_GRAPH = YES +MARKDOWN_SUPPORT=YES +USE_MDFILE_AS_MAINPAGE = index.md \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c398034 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,54 @@ +# What is HiCMA++ +C++ implementation of KAUST ECRC HiCMA library. +The original C version of HiCMA : https://github.com/ecrc/hicma + +# Features +* GEMM +* Cholesky Factorization +* Shared and Distributed Memory Models +* Dynamic Runtime System + +# Installation + +Installation requires `CMake` of version 3.21.2 at least. To build HiCMA++, follow these instructions: + +1. Get HiCMA++ from git repository +``` +git clone git@github.com:ecrc/hicmapp +``` + +2. Go into HiCMA++ folder +``` +cd hicmapp +``` + +3. Run the Configuration Script +``` +./config.sh +``` + +4. You can also choose whether to build x86 support or CUDA support. +``` +./config.sh -t -e (-c for CUDA) (-s for SYCL) +``` + +5. Build HiCMA++ +``` +./clean-build.sh +``` + +6. Build local documentation (optional) +``` +cd bin && make docs +``` + +7. Install HiCMA++ +``` +make install +``` +8. Add line to your .bashrc file to use HiCMA++ as a library. +``` + export PKG_CONFIG_PATH=/path/to/install:$PKG_CONFIG_PATH +``` + +Now you can use `pkg-config` executable to collect compiler and linker flags for HiCMA++. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..965cb53 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,34 @@ +add_executable(simple_example ${CMAKE_CURRENT_SOURCE_DIR}/simple_example.cpp) +add_executable(hicmapp_par_fixed_rank ${CMAKE_CURRENT_SOURCE_DIR}/par_fixed_rank_main.cpp) +add_executable(example main.cpp) +add_executable(compressed_data_generation ${CMAKE_CURRENT_SOURCE_DIR}/compressed_data_generation.cpp) +add_executable(dgemm_example ${CMAKE_CURRENT_SOURCE_DIR}/dgemm_example.cpp) +add_executable(data_generation ${CMAKE_CURRENT_SOURCE_DIR}/data_generation.cpp) +add_executable(potrf_example ${CMAKE_CURRENT_SOURCE_DIR}/potrf_example.cpp) +add_executable(mpi_generation_gemm_example ${CMAKE_CURRENT_SOURCE_DIR}/mpi_generation_gemm_example.cpp) +add_executable(mpi_example ${CMAKE_CURRENT_SOURCE_DIR}/mpi_example.cpp) + + +if (HICMAPP_USE_MPI) + target_link_libraries(simple_example ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(hicmapp_par_fixed_rank ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(example ${PROJECT_NAME} MPI::MPI_CXX) + if(NOT USE_CUDA) + target_link_libraries(mpi_example ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(mpi_generation_gemm_example ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(potrf_example ${PROJECT_NAME} MPI::MPI_CXX) + endif() + target_link_libraries(data_generation ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(dgemm_example ${PROJECT_NAME} MPI::MPI_CXX) + target_link_libraries(compressed_data_generation ${PROJECT_NAME} MPI::MPI_CXX) +else () + target_link_libraries(simple_example ${PROJECT_NAME}) + target_link_libraries(hicmapp_par_fixed_rank ${PROJECT_NAME}) + target_link_libraries(example ${PROJECT_NAME}) + target_link_libraries(data_generation ${PROJECT_NAME}) + target_link_libraries(dgemm_example ${PROJECT_NAME}) + target_link_libraries(compressed_data_generation ${PROJECT_NAME}) + target_link_libraries(potrf_example ${PROJECT_NAME}) + target_link_libraries(mpi_generation_gemm_example ${PROJECT_NAME}) + target_link_libraries(mpi_example ${PROJECT_NAME}) +endif () \ No newline at end of file diff --git a/examples/compressed_data_generation.cpp b/examples/compressed_data_generation.cpp new file mode 100644 index 0000000..de0d577 --- /dev/null +++ b/examples/compressed_data_generation.cpp @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include +#include +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include +#include + + +using namespace hicmapp::primitives; + +int main(int argc, char *argv[]) { + int global_elements_in_rows = 1000; + int global_elements_in_cols = 1000; + + size_t num_of_sub_matrices = 1; + size_t tile_rows = 250; + int max_rank = 250; + size_t tile_cols = max_rank * 2; + size_t block_size = tile_rows * tile_cols; + int diagonal = 4; + global_elements_in_cols = diagonal * max_rank * 2; + auto sub_matrix_rows = global_elements_in_rows; + auto sub_matrix_cols = global_elements_in_cols; + + size_t num_of_processes_in_rows = 1; + size_t num_of_processes_in_cols = 1; + double accuracy; + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm comm = MPI_COMM_WORLD; + int id; + MPI_Init(&argc, &argv); + int size; + MPI_Comm_size(comm, &size); + std::cout << " TOTAL num of processes = " << size << "\n"; + MPI_Comm_rank(comm, &id); + std::cout << " Process Id : " << id << " \n"; + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + + hicmapp::runtime::HicmaContext context(communicator); + + TwoDimCyclicDecomposer decomposer(num_of_processes_in_rows, num_of_processes_in_cols); + + Matrix matrixDense(nullptr, global_elements_in_rows, + global_elements_in_rows, tile_rows, tile_rows, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + + Matrix matrixAUV(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context, max_rank); + + double *ArkArray = new double[diagonal * diagonal]; + + memset(ArkArray, 0, diagonal * diagonal * sizeof(double)); + LAPACKE_dlaset(LAPACK_COL_MAJOR, 'L', diagonal, diagonal, 0.0, tile_rows, + ArkArray, diagonal); + Matrix matrixARK(ArkArray, diagonal, + diagonal, 1, 1, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + ProblemManager problem_manager(hicmapp::common::ProblemType::PROBLEM_TYPE_SS); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_N, + global_elements_in_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NDIM, + 2); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BETA, + 0.1); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NU, + 0.5); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NOISE, + 1.e-2); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_WAVE_K, + 20); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_DIAG, + global_elements_in_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_SYM, + 'S'); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BLOCK_SIZE, + (int)tile_rows); + + hicmapp::operations::StarsHManager::SetStarsHFormat(problem_manager); + + hicmapp::api::Hicmapp::Init(); + + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaLower, matrixDense, false); + + accuracy = 1e-7; + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaLower, matrixAUV, + accuracy, false); + + hicmapp::api::Hicmapp::UncompressMatrix(hicmapp::common::Uplo::HicmaLower, matrixAUV, matrixARK, + matrixDense); + + double *array = new double[diagonal * diagonal]; + + hicmapp::utils::MatrixHelpers::MatrixToArray(matrixARK, array); + + hicmapp::utils::MatrixHelpers::PrintArray(array, diagonal, diagonal, + hicmapp::common::StorageLayout::HicmaCM); + + double *adense = new double[global_elements_in_rows * global_elements_in_rows]; + + hicmapp::utils::MatrixHelpers::MatrixToArray(matrixDense, adense); + + hicmapp::utils::MatrixHelpers::PrintArray(adense, global_elements_in_rows, global_elements_in_rows, + hicmapp::common::StorageLayout::HicmaCM); + + hicmapp::api::Hicmapp::Finalize(); + + delete[]array; + delete[]ArkArray; + delete[]adense; + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + +} \ No newline at end of file diff --git a/examples/data_generation.cpp b/examples/data_generation.cpp new file mode 100644 index 0000000..26e95bd --- /dev/null +++ b/examples/data_generation.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include + + +const int global_elements_in_rows = 10; +const int global_elements_in_cols = 10; + +using namespace hicmapp::primitives; + +int main(int argc, char *argv[]) { + + size_t tile_rows = 2; + size_t tile_cols = 2; + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm comm = MPI_COMM_WORLD; + int id; + MPI_Init(&argc, &argv); + int size; + MPI_Comm_size(comm, &size); + std::cout << " TOTAL num of processes = " << size << "\n"; + MPI_Comm_rank(comm, &id); + std::cout << " Process Id : " << id << " \n"; + communicator.SetMPICommunicator(comm); +#endif + + + hicmapp::runtime::HicmaContext context(communicator); + size_t num_of_processes_in_rows = 1; + size_t num_of_processes_in_cols = 1; + TwoDimCyclicDecomposer decomposer(num_of_processes_in_rows, num_of_processes_in_cols); + + Matrix matrix(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + + ProblemManager problem_manager(hicmapp::common::ProblemType::PROBLEM_TYPE_SS); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_N, + global_elements_in_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NDIM, + 2); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BETA, + 0.1); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NU, + 0.5); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NOISE, + 1.e-2); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_WAVE_K, + 20); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_DIAG, + global_elements_in_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_SYM, + 'S'); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BLOCK_SIZE, + (int)tile_rows); + + hicmapp::operations::StarsHManager::SetStarsHFormat(problem_manager); + hicmapp::api::Hicmapp::Init(); + + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaLower, matrix, false); + + double *array = new double[global_elements_in_rows * global_elements_in_cols]; + + hicmapp::utils::MatrixHelpers::MatrixToArray(matrix, array); + + hicmapp::utils::MatrixHelpers::PrintArray(array, global_elements_in_rows, global_elements_in_rows, + hicmapp::common::StorageLayout::HicmaCM); + + hicmapp::api::Hicmapp::Finalize(); + delete[]array; + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + +} \ No newline at end of file diff --git a/examples/dgemm_example.cpp b/examples/dgemm_example.cpp new file mode 100644 index 0000000..d666927 --- /dev/null +++ b/examples/dgemm_example.cpp @@ -0,0 +1,194 @@ +#include +#include +#include +#include +#include +#include +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include +#include "hicmapp/utils/MatrixHelpers.hpp" + +using namespace hicmapp::primitives; + +int main(int argc, char *argv[]) { + double accuracy = 1e-9; + double fixed_rank_decay = 2; + + int global_elements_in_rows = 4; + int global_elements_in_cols = 4; + size_t tile_rows = 4; + size_t tile_cols = 4; + +#ifdef HICMAPP_USE_MPI + int id; + MPI_Init(&argc, &argv); + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + std::cout << " TOTAL num of processes = " << size << "\n"; + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + hicmapp::runtime::HicmaContext context; + /** Empty matrices generation.. */ + size_t num_of_processes_in_rows = 1; + size_t num_of_processes_in_cols = 1; + TwoDimCyclicDecomposer decomposer(num_of_processes_in_rows, num_of_processes_in_cols); + + Matrix matrixA(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix matrixB(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix matrixC(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + + int max_rank = 4; + + Matrix matrixAUV(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context, max_rank); + + Matrix matrixBUV(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context, max_rank); + Matrix matrixCUV(nullptr, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context, max_rank); + + int rk_tile_num_of_rows = 1; + int rk_tile_num_of_cols = 1; + int rk_num_of_rows = matrixAUV.GetNumOfGlobalTilesInRows();//4; + int rk_num_of_cols = matrixAUV.GetNumOfGlobalTilesInCols();//4; + + Matrix matrixArk(nullptr, rk_num_of_rows, rk_num_of_cols, + rk_tile_num_of_rows, rk_tile_num_of_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix matrixBrk(nullptr, rk_num_of_rows, rk_num_of_cols, + rk_tile_num_of_rows, rk_tile_num_of_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix matrixCrk(nullptr, rk_num_of_rows, rk_num_of_cols, + rk_tile_num_of_rows, rk_tile_num_of_cols, + hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + + + /** Generate Random StarsH problem */ + ProblemManager problem_manager(hicmapp::common::ProblemType::PROBLEM_TYPE_RND); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NDIM, 2); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NOISE, 0.0); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_DECAY, fixed_rank_decay); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_N, global_elements_in_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_BLOCK_SIZE, (int)tile_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_MT, rk_num_of_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NT, rk_num_of_rows); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_SYM, 'S'); + + hicmapp::operations::StarsHManager::SetStarsHFormat(problem_manager); + hicmapp::api::Hicmapp::Init(); + + /** Generate Dense Matrices*/ + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixA, false); + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixB, false); + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixC, false); + + + /** Generate Compressed Matrices*/ + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixAUV, + accuracy, false); + + + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixBUV, + accuracy, false); + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaUpperLower, matrixCUV, + accuracy, false); + + { + std::cout << "========================= PRINTING AUV INPUT ============================== \n"; + + auto *AUVOutput = new double[global_elements_in_rows * global_elements_in_rows]; + double *tile_u_data = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(0); + auto rows_u = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfRows(); + auto cols_u = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + double *tile_v_data = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(1); + auto rows_v = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + auto cols_v = matrixAUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfCols(); + + cblas_dgemm( + CblasColMajor, + CblasNoTrans, CblasNoTrans, + (int) rows_u, (int) cols_v, (int) cols_u, + 1, (double *) tile_u_data, (int) rows_u, + (double *) tile_v_data, (int) rows_v, + 0, (double *) AUVOutput, (int) rows_u); + + hicmapp::utils::MatrixHelpers::PrintArray(AUVOutput, global_elements_in_rows, global_elements_in_rows, + hicmapp::common::StorageLayout::HicmaCM); + delete[]AUVOutput; + } + { + std::cout << "========================= PRINTING BUV INPUT ============================== \n"; + auto *BUVOutput = new double[global_elements_in_rows * global_elements_in_rows]; + + double *tile_u_data = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(0); + auto rows_u = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfRows(); + auto cols_u = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + double *tile_v_data = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(1); + auto rows_v = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + auto cols_v = matrixBUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfCols(); + + cblas_dgemm( + CblasColMajor, + CblasNoTrans, CblasNoTrans, + (int) rows_u, (int) cols_v, (int) rows_v, + 1, (double *) tile_u_data, (int) rows_u, + (double *) tile_v_data, (int) rows_v, + 0, (double *) BUVOutput, (int) rows_u); + + hicmapp::utils::MatrixHelpers::PrintArray(BUVOutput, global_elements_in_rows, global_elements_in_rows, + hicmapp::common::StorageLayout::HicmaCM); + + delete[]BUVOutput; + } + + double alpha = 1; + double beta = 0; + CompressionParameters aSVDArguments = {accuracy}; + { + + hicmapp::api::Hicmapp::Gemm(matrixAUV, blas::Op::NoTrans, matrixBUV, blas::Op::NoTrans, + matrixCUV, alpha, beta, context, false, aSVDArguments); + + double *tile_u_data = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(0); + auto rows_u = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfRows(); + auto cols_u = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + double *tile_v_data = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileSubMatrix(1); + auto rows_v = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetTileRank(); + auto cols_v = matrixCUV.GetSubMatrix(0).GetTilePointer(0, 0)->GetNumOfCols(); + + auto *cOutput = new double[global_elements_in_rows * global_elements_in_rows]; + cblas_dgemm( + CblasColMajor, + CblasNoTrans, CblasNoTrans, + (int) rows_u, (int) cols_v, (int) rows_v, + alpha, (double *) tile_u_data, (int) rows_u, + (double *) tile_v_data, (int) rows_v, + beta, (double *) cOutput, (int) rows_u); + + + std::cout << "========================= PRINTING CUV OUTPUT ============================== \n"; + hicmapp::utils::MatrixHelpers::PrintArray(cOutput, global_elements_in_rows, global_elements_in_rows, + hicmapp::common::StorageLayout::HicmaCM); + + delete[]cOutput; + } + + hicmapp::api::Hicmapp::Finalize(); + + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + +} \ No newline at end of file diff --git a/examples/main.cpp b/examples/main.cpp new file mode 100644 index 0000000..587c87d --- /dev/null +++ b/examples/main.cpp @@ -0,0 +1,66 @@ +#include +#include "hicmapp/primitives/matrix.hpp" +#include "hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp" +#include "hicmapp/primitives/decomposer/concrete/two_dimension_cyclic_decomposer.hpp" + +const int global_elements_in_rows = 4; +const int global_elements_in_cols = 4; + +using namespace hicmapp::primitives; + +int main(int argc, char *argv[]) { + + hcorepp::kernels::RunContext context; + + // 2d array allocation + auto data = new float[global_elements_in_rows * global_elements_in_cols]; + + for (int j = 0; j < global_elements_in_cols; j++) { + for (int i = 0; i < global_elements_in_rows; i++) { + int idx = j * global_elements_in_rows + i; + data[idx] = idx; + } + } + + for (int j = 0; j < global_elements_in_cols; j++) { + for (int i = 0; i < global_elements_in_rows; i++) { + int idx = j * global_elements_in_rows + i; + std::cout << " Input [" << i << "][" << j << "] = " << data[idx] << " \t"; + } + std::cout << "\n"; + } + + + int rank = 1; + CompressionParameters parameters = {1e-3}; + auto *compressed_tile = new CompressedTile(global_elements_in_rows, global_elements_in_cols, (float *) data, + global_elements_in_rows, parameters, blas::Layout::ColMajor, + context); + + auto COutput = new float[global_elements_in_rows * global_elements_in_cols]; + + auto *new_tile = new CompressedTile(global_elements_in_rows, global_elements_in_cols, nullptr, + global_elements_in_rows, parameters, blas::Layout::ColMajor, context); + new_tile = compressed_tile; + + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, global_elements_in_rows, + global_elements_in_cols, + new_tile->GetTileRank(), 1.0f, + new_tile->GetUMatrix(), + new_tile->GetNumOfRows(), + new_tile->GetVMatrix(), + new_tile->GetTileRank(), 0.0f, + COutput, global_elements_in_rows); + + + for (int j = 0; j < global_elements_in_cols; j++) { + for (int i = 0; i < global_elements_in_rows; i++) { + int idx = j * global_elements_in_rows + i; + std::cout << " Output [" << i << "][" << j << "] = " << std::round(COutput[idx]) << " \t"; + } + std::cout << "\n"; + } + + + delete compressed_tile; +} \ No newline at end of file diff --git a/examples/mpi_example.cpp b/examples/mpi_example.cpp new file mode 100644 index 0000000..5b21d44 --- /dev/null +++ b/examples/mpi_example.cpp @@ -0,0 +1,462 @@ +#include +#include +#include +#include +#include +#include +#include "hcorepp/kernels/kernels.hpp" +#include +#include +#include + +using namespace std::chrono; +using namespace hcorepp::operators; +using namespace hcorepp::helpers; +using namespace hcorepp::kernels; + +template +void GetTileData(T *aDataPtr, const Tile *aTile, hicmapp::runtime::HicmaContext &aContext) { + if (aTile->isDense()) { + auto m = aTile->GetNumOfRows(); + auto n = aTile->GetNumOfCols(); + auto *data = aTile->GetTileSubMatrix(0); + hcorepp::memory::Memcpy(aDataPtr, data, m * n, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + return; + } + + auto *comp_tile = static_cast *>(aTile); + auto m = aTile->GetNumOfRows(); + auto n = aTile->GetNumOfCols(); + auto rank = aTile->GetTileRank(); + size_t num_elements = rank * m; + T *cu = new T[num_elements]; + hcorepp::memory::Memcpy(cu, comp_tile->GetUMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + num_elements = n * rank; + T *cv = new T[num_elements]; + hcorepp::memory::Memcpy(cv, comp_tile->GetVMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + aContext.SyncMainContext(); + + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, + m, n, rank, 1.0, cu, + comp_tile->GetULeadingDim(), cv, + comp_tile->GetVLeadingDim(), 0.0, aDataPtr, m); + delete[] cu; + delete[] cv; +} + +double *ToDense(CompressedTile *aTile, hicmapp::runtime::HicmaContext &aContext) { + auto tile_size = aTile->GetNumOfRows() * aTile->GetTileRank() + aTile->GetTileRank() * aTile->GetNumOfCols(); + auto *dense_ptr = new double[aTile->GetNumOfRows() * aTile->GetNumOfCols()]; + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, + aTile->GetNumOfRows(), aTile->GetNumOfCols(), aTile->GetTileRank(), 1.0, aTile->GetUMatrix(), + aTile->GetULeadingDim(), aTile->GetVMatrix(), + aTile->GetVLeadingDim(), 0.0, dense_ptr, aTile->GetNumOfRows()); + + return dense_ptr; + +} + +template +size_t GetSubMatrixData(T **aRet, SubMatrix *aSubMatrix, hicmapp::runtime::HicmaContext &aContext) { + auto tiles_st_idx_row = aSubMatrix->GetTilesGlobalStIdxInRows(); + auto tiles_st_idx_col = aSubMatrix->GetTilesGlobalStIdxInCols(); + auto tiles_row = aSubMatrix->GetNumOfTilesinRows(); + auto tiles_col = aSubMatrix->GetNumOfTilesinCols(); + + + size_t data_offset = 0; + size_t submatrix_size = 0; + std::vector *> &tiles = aSubMatrix->GetTiles(); + for (auto *submatrix_tile: tiles) { + submatrix_size += (submatrix_tile->GetNumOfRows() * + submatrix_tile->GetNumOfCols()); + } + + auto *submatrix_data = new T[submatrix_size]; + memset(submatrix_data, 0, submatrix_size * sizeof(T)); + for (auto *submatrix_tile: tiles) { + auto tile_size = submatrix_tile->GetNumOfRows() * + submatrix_tile->GetNumOfCols(); + GetTileData(&submatrix_data[data_offset], submatrix_tile, aContext); + data_offset += tile_size; + aContext.SyncAll(); + } + + *aRet = submatrix_data; + return submatrix_size; +} + + +int main(int argc, char *argv[]) { + // single tile dimensions. + int tile_size = 512; + double fixed_rank_decay = 2; + // parameters needed for matrix multiplication driver to operate correctly. + double alpha = 1; + double beta = 1; + blas::Op trans_a = blas::Op::NoTrans; + blas::Op trans_b = blas::Op::NoTrans; + // parameters for matrix generation. + int64_t mode = 0; + blas::real_type cond = 1; + // Target accuracy. + std::vector accuracy_list = {1e-1, 1e-4, 1e-6}; + // Assuming square matrix, default tile matrix is 2 x 2 tiles. + int matrix_tiles = 2; + int per_tile_generation = 0; + int num_of_threads = 2; + int num_of_cpus = 1; + int n_processes_rows = 1; + int n_processes_cols = 2; + + // Parse optional arguments from command line. + if (argc > 1) { + matrix_tiles = atoi(argv[1]); + if (argc > 2) { + accuracy_list.clear(); + std::string acc_str = argv[2]; + std::stringstream ss(acc_str); + for (double i; ss >> i;) { + accuracy_list.push_back(i); + if (ss.peek() == ',') + ss.ignore(); + } + if (argc > 3) { + tile_size = atoi(argv[3]); + if (argc > 4) { + per_tile_generation = atoi(argv[4]); + if (argc > 5) { + num_of_cpus = atoi(argv[5]); + } + } + } + } + } + // Check for verbosity + bool print_header = true; + { + const char *val = std::getenv("HICMAPP_VERBOSE"); + if (val != nullptr) { // invalid to assign nullptr to std::string + std::string value = val; + if (value == "ON") { + print_header = true; + } + } + } + + { + hicmapp::runtime::HicmaContext context; + + int a_mt = matrix_tiles; + int a_nt = matrix_tiles; + int b_mt = a_nt; + int b_nt = matrix_tiles; + int c_mt = a_mt; + int c_nt = b_nt; + int row_tile_size = tile_size; + int column_tile_size = tile_size; + size_t ref_flops; + size_t dense_flops = 0; + + int64_t iseed[4] = {0, 0, 0, 1}; + + int size = 1; + int id = 0; + +#ifdef HICMAPP_USE_MPI + int required = 0; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &required); + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + + hicmapp::api::Hicmapp::Init(size, 0, -1); + + // Create full matrices with automatic generation. + hcorepp::helpers::Timer timer; + generators::Generator *generator; + if (per_tile_generation > 0) { + generator = new generators::TileLatmsGenerator(iseed, mode, cond, tile_size); + } else { + generator = new generators::LatmsGenerator(iseed, mode, cond); + } + RawMatrix full_a(a_mt * row_tile_size, a_nt * column_tile_size, *generator); + RawMatrix full_b(b_mt * row_tile_size, b_nt * column_tile_size, *generator); + RawMatrix full_c(c_mt * row_tile_size, c_nt * column_tile_size); + + delete generator; + auto initial_c = full_c.Clone(); + timer.Snapshot("generation"); + // Solve reference solution + { + auto a_device = hcorepp::memory::AllocateArray(full_a.GetM() * full_a.GetN(), + context.GetMainContext()); + auto b_device = hcorepp::memory::AllocateArray(full_b.GetM() * full_b.GetN(), + context.GetMainContext()); + auto c_device = hcorepp::memory::AllocateArray(full_c.GetM() * full_c.GetN(), + context.GetMainContext()); + hcorepp::memory::Memcpy(a_device, full_a.GetData(), + full_a.GetM() * full_a.GetN(), context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(b_device, full_b.GetData(), full_b.GetM() * full_b.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(c_device, full_c.GetData(), full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + context.SyncMainContext(); + timer.StartSnapshot(); + hcorepp::kernels::HCoreKernels::Gemm(blas::Layout::ColMajor, trans_a, trans_b, full_c.GetM(), + full_c.GetN(), full_a.GetN(), alpha, a_device, + full_a.GetM(), b_device, + full_b.GetM(), beta, c_device, full_c.GetM(), + context.GetMainContext()); + context.SyncMainContext(); + timer.Snapshot("ref_gemm"); + ref_flops = 2 * full_c.GetM() * full_c.GetN() * full_a.GetN(); + hcorepp::memory::Memcpy(full_c.GetData(), c_device, full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + hcorepp::memory::DestroyArray(a_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(b_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(c_device, context.GetMainContext()); + } + // Get memory footprint in KB + size_t ref_memory_footprint = (full_a.GetMemoryFootprint() + full_b.GetMemoryFootprint() + + full_c.GetMemoryFootprint()) / 1024; + // Norm for error calculations + blas::real_type a_norm = full_a.Norm(); + blas::real_type b_norm = full_b.Norm(); + blas::real_type c_init_norm = initial_c.Norm(); + + size_t dense_memory_footprint = 0; + double dense_error = 0; + double dense_error_normalized = 0; +// auto decomposer = TwoDimCyclicDecomposer(size, 1); + auto decomposer = SlowestDimDecomposer(size, hicmapp::common::StorageLayout::HicmaCM); + + // Dense Warmup + { + Matrix a_dense(full_a.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix b_dense(full_b.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix c_dense(initial_c.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + context.SyncMainContext(); + + hicmapp::api::Hicmapp::Gemm(a_dense, blas::Op::NoTrans, b_dense, blas::Op::NoTrans, c_dense, + alpha, beta, context, false); + } + // Dense Flow + { + timer.StartSnapshot(); + // Create dense tile matrix + Matrix a_dense(full_a.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix b_dense(full_b.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + Matrix c_dense(initial_c.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + context.SyncAll(); + timer.Snapshot("dense_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + dense_flops = hicmapp::api::Hicmapp::Gemm(a_dense, blas::Op::NoTrans, b_dense, blas::Op::NoTrans, + c_dense, alpha, beta, context, true); + context.SyncAll(); + timer.Snapshot("dense_gemm"); + +// std::ofstream fs = std::ofstream("DenseHicmaMatrix.txt." + std::to_string(id)); +// c_dense.Print(fs); +// fs.close(); + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_dense_c = c_dense.ToRawMatrix(context); +// if(id == 0) { +// fs = std::ofstream("DenseRawMatrix.txt"); +// full_dense_c.Print(fs); +// fs.close(); +// fs = std::ofstream("ReferenceMatrix.txt"); +// full_c.Print(fs); +// fs.close(); +// } + + context.SyncAll(); + + full_dense_c.ReferenceDifference(full_c); + + + dense_error = full_dense_c.Norm(); + + dense_error_normalized = dense_error / ((a_norm + b_norm + c_init_norm) * + + std::numeric_limits::epsilon() * + + std::min(initial_c.GetN(), initial_c.GetM())); + + timer.Snapshot("dense_error_calc"); + +// Error checking. + + if (dense_error_normalized >= 10 && id == 0) { + + std::cout << "Example didn't pass, dense HCore++ error > 10 " << std::endl; + + } + +// Get memory footprint in KB + + dense_memory_footprint = (a_dense.GetMemoryFootprint() + b_dense.GetMemoryFootprint() + + + c_dense.GetMemoryFootprint()) / 1024; + // Get memory footprint in KB + dense_memory_footprint = (a_dense.GetMemoryFootprint() + b_dense.GetMemoryFootprint() + + c_dense.GetMemoryFootprint()) / 1024; + } + + // Compressed flow + bool first_print = true; + int idx = 0; + for (auto &accuracy: accuracy_list) { + CompressionParameters svd_parameters(accuracy); + std::vector> comp_ranks; + // Compressed Warmup + { + Matrix a_comp(full_a.GetData(), a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + Matrix b_comp(full_b.GetData(), b_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + + Matrix c_comp(initial_c.GetData(), a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + context.SyncMainContext(); + hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, blas::Op::NoTrans, c_comp, + alpha, + beta, context, true, svd_parameters); + } + //Reset all compression timers + timer.ResetSnapshot("comp_creation"); + timer.ResetSnapshot("comp_gemm"); + timer.ResetSnapshot("comp_error_calc"); + timer.StartSnapshot(); + // Create compressed tiles matrix + Matrix a_comp(full_a.GetData(), a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + Matrix b_comp(full_b.GetData(), b_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + { + Matrix c_comp(initial_c.GetData(), a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, + context, + svd_parameters); + context.SyncMainContext(); + auto cc_nt = c_comp.GetNumOfGlobalTilesInCols(); + auto cc_mt = c_comp.GetNumOfGlobalTilesInRows(); + comp_ranks.resize(cc_mt); + for (int j = 0; j < cc_mt; j++) { + comp_ranks[j].resize(cc_nt); + } + for (int i = 0; i < cc_nt; i++) { + for (int j = 0; j < cc_mt; j++) { + auto rank = 128; + comp_ranks[j][i] = rank; + } + } + + timer.Snapshot("comp_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + auto comp_flops = hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, + blas::Op::NoTrans, c_comp, alpha, beta, + context, + true, svd_parameters); + + context.SyncMainContext(); + timer.Snapshot("comp_gemm"); + +// std::ofstream fs = std::ofstream("CompressedHicmaMatrix.txt." + std::to_string(id)); +// c_comp.Print(fs); +// fs.close(); + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_approximate_c = c_comp.ToRawMatrix(context); +// if(id == 0 && idx == 2) { +// fs = std::ofstream("CompressedRawMatrix.txt"); +// full_approximate_c.Print(fs); +// fs.close(); +// fs = std::ofstream("ReferenceMatrix.txt"); +// full_c.Print(fs); +// fs.close(); +// } +// Retrieve results back from tile format for verification. + timer.StartSnapshot(); + // Calculate compressed tile matrix reference error + full_approximate_c.ReferenceDifference(full_c); + double comp_error = full_approximate_c.Norm(); + double comp_error_normalized = comp_error / ((a_norm + b_norm + c_init_norm) * accuracy * + std::min(initial_c.GetN(), initial_c.GetM())); + timer.Snapshot("comp_error_calc"); + // Error checking. + if (comp_error_normalized >= 10 && id == 0) { + std::cout << "Example didn't pass, compressed HCore++ error > 10 " << std::endl; + } + // Get memory footprint in KB + size_t compressed_memory_footprint = (a_comp.GetMemoryFootprint() + b_comp.GetMemoryFootprint() + + c_comp.GetMemoryFootprint()) / 1024; + idx++; + if (id == 0) { + // Print results + if (first_print) { + if (print_header) { + printf("tile_count, tile_size, matrix_size, type, error, error_normalized, memory(KB), creation(ms), gemm_time(ms), flops\n"); + print_header = false; + } + printf("%d, %d, %d, ref, 0, 0, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, + ref_memory_footprint, timer.GetSnapshot("generation"), + timer.GetSnapshot("ref_gemm"), ref_flops); + printf("%d, %d, %d, dense, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, dense_error, + dense_error_normalized, + dense_memory_footprint, timer.GetSnapshot("dense_creation"), + timer.GetSnapshot("dense_gemm"), dense_flops); + first_print = false; + } + printf("%d, %d, %d, %2.1e, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, accuracy, comp_error, + comp_error_normalized, + compressed_memory_footprint, timer.GetSnapshot("comp_creation"), + timer.GetSnapshot("comp_gemm"), comp_flops); + } + context.SyncMainContext(); + } + } + + hicmapp::api::Hicmapp::Finalize(); + } + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + return 0; +} \ No newline at end of file diff --git a/examples/mpi_generation_gemm_example.cpp b/examples/mpi_generation_gemm_example.cpp new file mode 100644 index 0000000..95c034b --- /dev/null +++ b/examples/mpi_generation_gemm_example.cpp @@ -0,0 +1,416 @@ +#include +#include +#include +#include +#include +#include "hcorepp/kernels/kernels.hpp" +#include "hicmapp/primitives/ProblemManager.hpp" +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include +#include +#ifdef HICMAPP_USE_MPI +#include +#endif + +using namespace std::chrono; +using namespace hcorepp::operators; +using namespace hcorepp::helpers; +using namespace hcorepp::kernels; + +template +void GetTileData(T *aDataPtr, const Tile *aTile, hicmapp::runtime::HicmaContext &aContext) { + if (aTile->isDense()) { + auto m = aTile->GetNumOfRows(); + auto n = aTile->GetNumOfCols(); + auto *data = aTile->GetTileSubMatrix(0); + hcorepp::memory::Memcpy(aDataPtr, data, m * n, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + return; + } + + auto *comp_tile = static_cast *>(aTile); + auto m = aTile->GetNumOfRows(); + auto n = aTile->GetNumOfCols(); + auto rank = aTile->GetTileRank(); + size_t num_elements = rank * m; + T *cu = new T[num_elements]; + hcorepp::memory::Memcpy(cu, comp_tile->GetUMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + num_elements = n * rank; + T *cv = new T[num_elements]; + hcorepp::memory::Memcpy(cv, comp_tile->GetVMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + aContext.SyncMainContext(); + + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, + m, n, rank, 1.0, cu, + comp_tile->GetULeadingDim(), cv, + comp_tile->GetVLeadingDim(), 0.0, aDataPtr, m); + delete[] cu; + delete[] cv; +} + +double *ToDense(CompressedTile *aTile, hicmapp::runtime::HicmaContext &aContext) { + auto tile_size = aTile->GetNumOfRows() * aTile->GetTileRank() + aTile->GetTileRank() * aTile->GetNumOfCols(); + auto *dense_ptr = new double[aTile->GetNumOfRows() * aTile->GetNumOfCols()]; + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, + aTile->GetNumOfRows(), aTile->GetNumOfCols(), aTile->GetTileRank(), 1.0, aTile->GetUMatrix(), + aTile->GetULeadingDim(), aTile->GetVMatrix(), + aTile->GetVLeadingDim(), 0.0, dense_ptr, aTile->GetNumOfRows()); + + return dense_ptr; + +} + +template +size_t GetSubMatrixData(T **aRet, SubMatrix *aSubMatrix, hicmapp::runtime::HicmaContext &aContext) { + auto tiles_st_idx_row = aSubMatrix->GetTilesGlobalStIdxInRows(); + auto tiles_st_idx_col = aSubMatrix->GetTilesGlobalStIdxInCols(); + auto tiles_row = aSubMatrix->GetNumOfTilesinRows(); + auto tiles_col = aSubMatrix->GetNumOfTilesinCols(); + + + size_t data_offset = 0; + size_t submatrix_size = 0; + std::vector *> &tiles = aSubMatrix->GetTiles(); + for (auto *submatrix_tile: tiles) { + submatrix_size += (submatrix_tile->GetNumOfRows() * + submatrix_tile->GetNumOfCols()); + } + + auto *submatrix_data = new T[submatrix_size]; + memset(submatrix_data, 0, submatrix_size * sizeof(T)); + for (auto *submatrix_tile: tiles) { + auto tile_size = submatrix_tile->GetNumOfRows() * + submatrix_tile->GetNumOfCols(); + GetTileData(&submatrix_data[data_offset], submatrix_tile, aContext); + data_offset += tile_size; + aContext.SyncAll(); + } + + *aRet = submatrix_data; + return submatrix_size; +} + + +int main(int argc, char *argv[]) { + // single tile dimensions. + int tile_size = 512; + // parameters needed for matrix multiplication driver to operate correctly. + double alpha = 1; + double beta = 1; + blas::Op trans_a = blas::Op::NoTrans; + blas::Op trans_b = blas::Op::NoTrans; + // parameters for matrix generation. + int64_t mode = 0; + blas::real_type cond = 1; + // Target accuracy. + double accuracy = 1e-6; + // Assuming square matrix, default tile matrix is 2 x 2 tiles. + int matrix_tiles = 2; + // Parse optional arguments from command line. + if (argc > 1) { + matrix_tiles = atoi(argv[1]); + if (argc > 2) { + accuracy = atof(argv[2]); + if (argc > 3) { + tile_size = atoi(argv[3]); + } + } + } + + ProblemManager problem_manager(hicmapp::common::ProblemType::PROBLEM_TYPE_SS); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_N, + tile_size * matrix_tiles); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NDIM, + 2); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BETA, + 0.1); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NU, + 0.5); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_NOISE, + 1.e-4); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_SYM, + 'S'); + problem_manager.SetProblemProperty(hicmapp::primitives::ProblemProperty::HICMA_PROB_PROPERTY_BLOCK_SIZE, + (int) tile_size); + + hicmapp::operations::StarsHManager::SetStarsHFormat(problem_manager); + hicmapp::runtime::HicmaContext context; + + int a_mt = matrix_tiles; + int a_nt = matrix_tiles; + int b_mt = a_nt; + int b_nt = matrix_tiles; + int c_mt = a_mt; + int c_nt = b_nt; + int row_tile_size = tile_size; + int column_tile_size = tile_size; + size_t ref_flops; + size_t dense_flops = 0; + + int size = 1; + int id = 0; + +#ifdef HICMAPP_USE_MPI + int required = 0; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &required); + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + + hicmapp::api::Hicmapp::Init(size, 0, -1); + + // Create full matrices with automatic generation. + hcorepp::helpers::Timer timer; + Matrix gen_a(nullptr, a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix gen_b(nullptr, a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix gen_c(nullptr, a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + + timer.StartSnapshot(); + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaUpperLower, gen_a, false); + hicmapp::api::Hicmapp::GenerateDenseMatrix(hicmapp::common::Uplo::HicmaUpperLower, gen_b, false); + + auto full_a = gen_a.ToRawMatrix(context); + auto full_b = gen_b.ToRawMatrix(context); + auto full_c = gen_c.ToRawMatrix(context); + + auto initial_c = full_c.Clone(); + timer.Snapshot("generation"); + // Solve reference solution + { + auto a_device = hcorepp::memory::AllocateArray(full_a.GetM() * full_a.GetN(), + context.GetMainContext()); + auto b_device = hcorepp::memory::AllocateArray(full_b.GetM() * full_b.GetN(), + context.GetMainContext()); + auto c_device = hcorepp::memory::AllocateArray(full_c.GetM() * full_c.GetN(), + context.GetMainContext()); + hcorepp::memory::Memcpy(a_device, full_a.GetData(), + full_a.GetM() * full_a.GetN(), context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(b_device, full_b.GetData(), full_b.GetM() * full_b.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(c_device, full_c.GetData(), full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + context.SyncMainContext(); + timer.StartSnapshot(); + hcorepp::kernels::HCoreKernels::Gemm(blas::Layout::ColMajor, trans_a, trans_b, full_c.GetM(), + full_c.GetN(), full_a.GetN(), alpha, a_device, + full_a.GetM(), b_device, + full_b.GetM(), beta, c_device, full_c.GetM(), + context.GetMainContext()); + context.SyncMainContext(); + timer.Snapshot("ref_gemm"); + ref_flops = 2 * full_c.GetM() * full_c.GetN() * full_a.GetN(); + hcorepp::memory::Memcpy(full_c.GetData(), c_device, full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + hcorepp::memory::DestroyArray(a_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(b_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(c_device, context.GetMainContext()); + } + // Get memory footprint in KB + size_t ref_memory_footprint = (full_a.GetMemoryFootprint() + full_b.GetMemoryFootprint() + + full_c.GetMemoryFootprint()) / 1024; + // Norm for error calculations + blas::real_type a_norm = full_a.Norm(); + blas::real_type b_norm = full_b.Norm(); + blas::real_type c_init_norm = initial_c.Norm(); + + size_t dense_memory_footprint = 0; + double dense_error = 0; + double dense_error_normalized = 0; +// auto decomposer = TwoDimCyclicDecomposer(size, 1); + auto decomposer = SlowestDimDecomposer(size, hicmapp::common::StorageLayout::HicmaCM); + + // Dense Warmup + { + Matrix c_dense(nullptr, a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + context.SyncMainContext(); + + hicmapp::api::Hicmapp::Gemm(gen_a, blas::Op::NoTrans, gen_b, blas::Op::NoTrans, c_dense, + alpha, beta, context, false); + } + // Dense Flow + { + timer.StartSnapshot(); + // Create dense tile matrix + Matrix c_dense(nullptr, c_mt * row_tile_size, c_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context); + context.SyncAll(); + timer.Snapshot("dense_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + dense_flops = hicmapp::api::Hicmapp::Gemm(gen_a, blas::Op::NoTrans, gen_b, blas::Op::NoTrans, + c_dense, alpha, beta, context, true); + context.SyncAll(); + timer.Snapshot("dense_gemm"); + + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_dense_c = c_dense.ToRawMatrix(context); + + // if(id == 0) { +// fs = std::ofstream("DenseRawMatrix.txt"); +// full_dense_c.Print(fs); +// fs.close(); +// fs = std::ofstream("ReferenceMatrix.txt"); +// full_c.Print(fs); +// fs.close(); +// } + + + context.SyncAll(); + + full_dense_c.ReferenceDifference(full_c); + + + dense_error = full_dense_c.Norm(); + + dense_error_normalized = dense_error / ((a_norm + b_norm + c_init_norm) * + + std::numeric_limits::epsilon() * + + std::min(initial_c.GetN(), initial_c.GetM())); + + timer.Snapshot("dense_error_calc"); + +// Error checking. + + if (dense_error_normalized >= 10 && id == 0) { + + std::cout << "Example didn't pass, dense HCore++ error > 10 " << std::endl; + + } + +// Get memory footprint in KB + + dense_memory_footprint = (gen_a.GetMemoryFootprint() + gen_b.GetMemoryFootprint() + + + c_dense.GetMemoryFootprint()) / 1024; + // Get memory footprint in KB + dense_memory_footprint = (gen_a.GetMemoryFootprint() + gen_b.GetMemoryFootprint() + + c_dense.GetMemoryFootprint()) / 1024; + } + + // Compressed flow + CompressionParameters svd_parameters(accuracy); + std::vector> comp_ranks; + + Matrix a_comp(nullptr, a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + Matrix b_comp(nullptr, b_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaUpperLower, a_comp, + svd_parameters, false); + hicmapp::api::Hicmapp::GenerateCompressedMatrix(hicmapp::common::Uplo::HicmaUpperLower, b_comp, + svd_parameters, false); + // Compressed Warmup + { + + Matrix c_comp(nullptr, c_mt * row_tile_size, + c_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, context, + svd_parameters); + context.SyncMainContext(); + hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, blas::Op::NoTrans, c_comp, + alpha, + beta, context, true, svd_parameters); + } + //Reset all compression timers + timer.ResetSnapshot("comp_creation"); + timer.ResetSnapshot("comp_gemm"); + timer.ResetSnapshot("comp_error_calc"); + timer.StartSnapshot(); + { + Matrix c_comp(nullptr, a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, decomposer, + context, + svd_parameters); + context.SyncMainContext(); + + timer.Snapshot("comp_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + auto comp_flops = hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, + blas::Op::NoTrans, c_comp, alpha, beta, + context, + true, svd_parameters); + + context.SyncMainContext(); + timer.Snapshot("comp_gemm"); + +// std::ofstream fs = std::ofstream("CompressedHicmaMatrix.txt." + std::to_string(id)); +// c_comp.Print(fs); +// fs.close(); + + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_approximate_c = c_comp.ToRawMatrix(context); +// if(id == 0 && idx == 2) { +// fs = std::ofstream("CompressedRawMatrix.txt"); +// full_approximate_c.Print(fs); +// fs.close(); +// fs = std::ofstream("ReferenceMatrix.txt"); +// full_c.Print(fs); +// fs.close(); +// } +// Retrieve results back from tile format for verification. + + timer.StartSnapshot(); + // Calculate compressed tile matrix reference error + full_approximate_c.ReferenceDifference(full_c); + double comp_error = full_approximate_c.Norm(); + double comp_error_normalized = comp_error / ((a_norm + b_norm + c_init_norm) * accuracy * + std::min(initial_c.GetN(), initial_c.GetM())); + timer.Snapshot("comp_error_calc"); + // Error checking. + if (comp_error_normalized >= 10 && id == 0) { + std::cout << "Example didn't pass, compressed HCore++ error > 10 " << std::endl; + } + // Get memory footprint in KB + size_t compressed_memory_footprint = (a_comp.GetMemoryFootprint() + b_comp.GetMemoryFootprint() + + c_comp.GetMemoryFootprint()) / 1024; + if (id == 0) { + printf("tile_count, tile_size, matrix_size, type, error, error_normalized, memory(KB), creation(ms), gemm_time(ms), flops\n"); + printf("%d, %d, %d, ref, 0, 0, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, + ref_memory_footprint, timer.GetSnapshot("generation"), + timer.GetSnapshot("ref_gemm"), ref_flops); + printf("%d, %d, %d, dense, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, dense_error, + dense_error_normalized, + dense_memory_footprint, timer.GetSnapshot("dense_creation"), + timer.GetSnapshot("dense_gemm"), dense_flops); + printf("%d, %d, %d, %2.1e, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, accuracy, comp_error, + comp_error_normalized, + compressed_memory_footprint, timer.GetSnapshot("comp_creation"), + timer.GetSnapshot("comp_gemm"), comp_flops); + } + context.SyncMainContext(); + } + hicmapp::api::Hicmapp::Finalize(); +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + return 0; +} \ No newline at end of file diff --git a/examples/par_fixed_rank_main.cpp b/examples/par_fixed_rank_main.cpp new file mode 100644 index 0000000..c04d33a --- /dev/null +++ b/examples/par_fixed_rank_main.cpp @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include +#include "hcorepp/kernels/kernels.hpp" + + +#define STREAMS 16 + + +using namespace std::chrono; +using namespace hcorepp::operators; +using namespace hcorepp::helpers; +using namespace hcorepp::kernels; + + +int main(int argc, char *argv[]) { + // single tile dimensions. + int tile_size = 512; + double fixed_rank_decay = 2; + // parameters needed for matrix multiplication driver to operate correctly. + double alpha = 1; + double beta = 1; + blas::Op trans_a = blas::Op::NoTrans; + blas::Op trans_b = blas::Op::NoTrans; + // parameters for matrix generation. + int64_t mode = 0; + blas::real_type cond = 1; + // Target accuracy. + std::vector accuracy_list = {1e-1, 1e-4, 1e-6}; + // Assuming square matrix, default tile matrix is 2 x 2 tiles. + int matrix_tiles = 2; + int per_tile_generation = 0; + int num_of_threads = -1; + int size = 1; + int id = 0; +#ifdef HICMAPP_USE_MPI + int required = 0; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &required); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + std::cout << " TOTAL num of processes = " << size << "\n"; + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + // Parse optional arguments from command line. + if (argc > 1) { + matrix_tiles = atoi(argv[1]); + if (argc > 2) { + accuracy_list.clear(); + std::string acc_str = argv[2]; + std::stringstream ss(acc_str); + for (double i; ss >> i;) { + accuracy_list.push_back(i); + if (ss.peek() == ',') + ss.ignore(); + } + if (argc > 3) { + tile_size = atoi(argv[3]); + if (argc > 4) { + per_tile_generation = atoi(argv[4]); + if (argc > 5) { + num_of_threads = atoi(argv[5]); + } + } + } + } + } + // Check for verbosity + bool print_header = true; + { + const char *val = std::getenv("HICMAPP_VERBOSE"); + if (val != nullptr) { // invalid to assign nullptr to std::string + std::string value = val; + if (value == "ON") { + print_header = true; + } + } + } + + hicmapp::runtime::HicmaContext context; + + int a_mt = matrix_tiles; + int a_nt = matrix_tiles; + int b_mt = a_nt; + int b_nt = matrix_tiles; + int c_mt = a_mt; + int c_nt = b_nt; + int row_tile_size = tile_size; + int column_tile_size = tile_size; + size_t ref_flops; + size_t dense_flops; + + + int64_t iseed[4] = {0, 0, 0, 1}; + + hicmapp::api::Hicmapp::Init(size, 0, num_of_threads); + // Create full matrices with automatic generation. + hcorepp::helpers::Timer timer; + generators::Generator *generator; + if (per_tile_generation > 0) { + generator = new generators::TileLatmsGenerator(iseed, mode, cond, tile_size); + } else { + generator = new generators::LatmsGenerator(iseed, mode, cond); + } + RawMatrix full_a(a_mt * row_tile_size, a_nt * column_tile_size, *generator); + RawMatrix full_b(b_mt * row_tile_size, b_nt * column_tile_size, *generator); + RawMatrix full_c(c_mt * row_tile_size, c_nt * column_tile_size); + + delete generator; + auto initial_c = full_c.Clone(); + timer.Snapshot("generation"); + { + auto *warm_a = hcorepp::memory::AllocateArray(full_a.GetM() * full_a.GetN(), + context.GetMainContext()); + auto *warm_b = hcorepp::memory::AllocateArray(full_b.GetM() * full_b.GetN(), + context.GetMainContext()); + auto *warm_c = hcorepp::memory::AllocateArray(full_c.GetM() * full_c.GetN(), + context.GetMainContext()); + hcorepp::memory::Memcpy(warm_a, full_a.GetData(), + full_a.GetM() * full_a.GetN(), context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(warm_b, full_b.GetData(), full_b.GetM() * full_b.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(warm_c, full_c.GetData(), full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + context.SyncMainContext(); + hcorepp::kernels::HCoreKernels::Gemm(blas::Layout::ColMajor, trans_a, trans_b, full_c.GetM(), + full_c.GetN(), full_a.GetN(), alpha, warm_a, + full_a.GetM(), warm_b, + full_b.GetM(), beta, warm_c, full_c.GetM(), + context.GetMainContext()); + hcorepp::memory::DestroyArray(warm_a, context.GetMainContext()); + hcorepp::memory::DestroyArray(warm_b, context.GetMainContext()); + hcorepp::memory::DestroyArray(warm_c, context.GetMainContext()); + } + // Solve reference solution + { + auto a_device = hcorepp::memory::AllocateArray(full_a.GetM() * full_a.GetN(), + context.GetMainContext()); + auto b_device = hcorepp::memory::AllocateArray(full_b.GetM() * full_b.GetN(), + context.GetMainContext()); + auto c_device = hcorepp::memory::AllocateArray(full_c.GetM() * full_c.GetN(), + context.GetMainContext()); + hcorepp::memory::Memcpy(a_device, full_a.GetData(), + full_a.GetM() * full_a.GetN(), context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(b_device, full_b.GetData(), full_b.GetM() * full_b.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + hcorepp::memory::Memcpy(c_device, full_c.GetData(), full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::HOST_TO_DEVICE); + context.SyncMainContext(); + timer.StartSnapshot(); + hcorepp::kernels::HCoreKernels::Gemm(blas::Layout::ColMajor, trans_a, trans_b, full_c.GetM(), + full_c.GetN(), full_a.GetN(), alpha, a_device, + full_a.GetM(), b_device, + full_b.GetM(), beta, c_device, full_c.GetM(), + context.GetMainContext()); + context.SyncMainContext(); + timer.Snapshot("ref_gemm"); + ref_flops = 2 * full_c.GetM() * full_c.GetN() * full_a.GetN(); + hcorepp::memory::Memcpy(full_c.GetData(), c_device, full_c.GetM() * full_c.GetN(), + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + hcorepp::memory::DestroyArray(a_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(b_device, context.GetMainContext()); + hcorepp::memory::DestroyArray(c_device, context.GetMainContext()); + } + // Get memory footprint in KB + size_t ref_memory_footprint = (full_a.GetMemoryFootprint() + full_b.GetMemoryFootprint() + + full_c.GetMemoryFootprint()) / 1024; + // Norm for error calculations + blas::real_type a_norm = full_a.Norm(); + blas::real_type b_norm = full_b.Norm(); + blas::real_type c_init_norm = initial_c.Norm(); + + size_t dense_memory_footprint; + double dense_error; + double dense_error_normalized; + // Dense Warmup + { + Matrix a_dense(full_a.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix b_dense(full_b.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix c_dense(initial_c.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + context.SyncMainContext(); + hicmapp::api::Hicmapp::Gemm(a_dense, blas::Op::NoTrans, b_dense, blas::Op::NoTrans, c_dense, alpha, + beta, context, true); + } + // Dense Flow + { + timer.StartSnapshot(); + // Create dense tile matrix + Matrix a_dense(full_a.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix b_dense(full_b.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + Matrix c_dense(initial_c.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + context.SyncAll(); + timer.Snapshot("dense_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + dense_flops = hicmapp::api::Hicmapp::Gemm(a_dense, blas::Op::NoTrans, b_dense, blas::Op::NoTrans, + c_dense, alpha, beta, context, true); + context.SyncAll(); + timer.Snapshot("dense_gemm"); + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_dense_c = c_dense.ToRawMatrix(context); + context.SyncAll(); + full_dense_c.ReferenceDifference(full_c); + + dense_error = full_dense_c.Norm(); + + dense_error_normalized = dense_error / ((a_norm + b_norm + c_init_norm) * + std::numeric_limits::epsilon() * + std::min(initial_c.GetN(), initial_c.GetM())); + timer.Snapshot("dense_error_calc"); + // Error checking. + if (dense_error_normalized >= 10) { + std::cout << "Example didn't pass, dense HCore++ error > 10 " << std::endl; + } + // Get memory footprint in KB + dense_memory_footprint = (a_dense.GetMemoryFootprint() + b_dense.GetMemoryFootprint() + + c_dense.GetMemoryFootprint()) / 1024; + } + // Compressed flow + bool first_print = true; + + for (auto &accuracy: accuracy_list) { + CompressionParameters svd_parameters(accuracy); + std::vector> comp_ranks; + // Compressed Warmup + { + Matrix a_comp(full_a.GetData(), a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + Matrix b_comp(full_b.GetData(), b_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + Matrix c_comp(initial_c.GetData(), a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + context.SyncMainContext(); + hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, blas::Op::NoTrans, c_comp, alpha, beta , context, true, svd_parameters); + + auto cc_nt = c_comp.GetNumOfGlobalTilesInCols(); + auto cc_mt = c_comp.GetNumOfGlobalTilesInRows(); + comp_ranks.resize(cc_mt); + for (int j = 0; j < cc_mt; j++) { + comp_ranks[j].resize(cc_nt); + } + for (int i = 0; i < cc_nt; i++) { + for (int j = 0; j < cc_mt; j++) { + auto* c_tile = c_comp.GetSubMatrices()[0]->GetTiles()[j * cc_mt + i]; + auto rank = c_tile->GetTileRank(); + comp_ranks[j][i] = rank; + } + } + } + //Reset all compression timers + timer.ResetSnapshot("comp_creation"); + timer.ResetSnapshot("comp_gemm"); + timer.ResetSnapshot("comp_error_calc"); + timer.StartSnapshot(); + // Create compressed tiles matrix + Matrix a_comp(full_a.GetData(), a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + Matrix b_comp(full_b.GetData(), b_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + { + Matrix c_comp(initial_c.GetData(), a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + context.SyncMainContext(); + timer.Snapshot("comp_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + auto comp_flops = hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, blas::Op::NoTrans, c_comp, alpha, beta , context, true, svd_parameters); + context.SyncMainContext(); + timer.Snapshot("comp_gemm"); + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_approximate_c = c_comp.ToRawMatrix(context); + // Calculate compressed tile matrix reference error + full_approximate_c.ReferenceDifference(full_c); + double comp_error = full_approximate_c.Norm(); + double comp_error_normalized = comp_error / ((a_norm + b_norm + c_init_norm) * accuracy * + std::min(initial_c.GetN(), initial_c.GetM())); + timer.Snapshot("comp_error_calc"); + // Error checking. + if (comp_error_normalized >= 10) { + std::cout << "Example didn't pass, compressed HCore++ error > 10 " << std::endl; + } + // Get memory footprint in KB + size_t compressed_memory_footprint = (a_comp.GetMemoryFootprint() + b_comp.GetMemoryFootprint() + + c_comp.GetMemoryFootprint()) / 1024; + // Print results + if (first_print) { + if (print_header) { + printf("tile_count, tile_size, matrix_size, type, error, error_normalized, memory(KB), creation(ms), gemm_time(ms), flops\n"); + print_header = false; + } + printf("%d, %d, %d, ref, 0, 0, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, + ref_memory_footprint, timer.GetSnapshot("generation"), + timer.GetSnapshot("ref_gemm"), ref_flops); + printf("%d, %d, %d, dense, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, dense_error, dense_error_normalized, + dense_memory_footprint, timer.GetSnapshot("dense_creation"), + timer.GetSnapshot("dense_gemm"), dense_flops); + first_print = false; + } + printf("%d, %d, %d, %2.1e, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, accuracy, comp_error, comp_error_normalized, + compressed_memory_footprint, timer.GetSnapshot("comp_creation"), + timer.GetSnapshot("comp_gemm"), comp_flops); + } + + timer.ResetSnapshot("comp_creation"); + timer.ResetSnapshot("comp_gemm"); + timer.ResetSnapshot("comp_error_calc"); + { + Matrix c_comp(initial_c.GetData(), a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context, svd_parameters); + context.SyncMainContext(); + timer.Snapshot("comp_creation"); + // Do matrix multiplication. + timer.StartSnapshot(); + auto comp_flops = hicmapp::api::Hicmapp::Gemm(a_comp, blas::Op::NoTrans, b_comp, blas::Op::NoTrans, c_comp, alpha, beta , context, true, svd_parameters, comp_ranks); + timer.Snapshot("comp_gemm"); + // Retrieve results back from tile format for verification. + timer.StartSnapshot(); + auto full_approximate_c = c_comp.ToRawMatrix(context); + // Calculate compressed tile matrix reference error + full_approximate_c.ReferenceDifference(full_c); + double comp_error = full_approximate_c.Norm(); + double comp_error_normalized = comp_error / ((a_norm + b_norm + c_init_norm) * accuracy * + std::min(initial_c.GetN(), initial_c.GetM())); + timer.Snapshot("comp_error_calc"); + // Error checking. + if (comp_error_normalized >= 10) { + std::cout << "Example didn't pass, compressed HCore++ error > 10 " << std::endl; + } + // Get memory footprint in KB + size_t compressed_memory_footprint = (a_comp.GetMemoryFootprint() + b_comp.GetMemoryFootprint() + + c_comp.GetMemoryFootprint()) / 1024; + printf("%d, %d, %d, %2.1e-fixed-rank, %e, %e, %zu, %f, %f, %zu\n", + matrix_tiles, tile_size, matrix_tiles * tile_size, accuracy, comp_error, comp_error_normalized, + compressed_memory_footprint, timer.GetSnapshot("comp_creation"), + timer.GetSnapshot("comp_gemm"), comp_flops); + } + } + + hicmapp::api::Hicmapp::Finalize(); + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + return 0; +} \ No newline at end of file diff --git a/examples/potrf_example.cpp b/examples/potrf_example.cpp new file mode 100644 index 0000000..0acf539 --- /dev/null +++ b/examples/potrf_example.cpp @@ -0,0 +1,190 @@ +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include +#include +#include +#include "hcorepp/api/HCore.hpp" +#include "hcorepp/kernels/kernels.hpp" +#include "hcorepp/helpers/Timer.hpp" + +int main(int argc, char *argv[]) { + // single tile dimensions. + int tile_size = 512; + // Target accuracy. + double accuracy = 1e-6; + // Assuming square matrix, default tile matrix is 2 x 2 tiles. + int matrix_tiles = 2; + // The number of threads to run with. + int thread_number = 1; + // Parse optional arguments from command line. + if (argc > 1) { + matrix_tiles = atoi(argv[1]); + if (argc > 2) { + accuracy = atof(argv[2]); + if (argc > 3) { + tile_size = atoi(argv[3]); + if (argc > 4) { + thread_number = atoi(argv[4]); + } + } + } + } + std::cout << "Running with Accuracy = " << accuracy; + std::cout << ", Tile Size = " << tile_size; + std::cout << ", Tiles Per Row = " << matrix_tiles + << " and Total Matrix = " << tile_size * matrix_tiles << std::endl; + int starsh_decay = 2; + size_t global_elements_in_rows = matrix_tiles * tile_size; + int size = 1; + int id = 0; + hicmapp::runtime::HicmaContext context; + +#ifdef HICMAPP_USE_MPI + int required = 0; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &required); + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + + hicmapp::api::Hicmapp::Init(thread_number, 0, -1); + + /** Generate Random StarsH problem */ + ProblemManager problem_manager(hicmapp::common::ProblemType::PROBLEM_TYPE_SS); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NDIM, 2); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_BETA, 0.1); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NU, 0.5); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NOISE, 1e-2); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_SYM, 'S'); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_DECAY, starsh_decay); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_N, tile_size * matrix_tiles); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_BLOCK_SIZE, tile_size); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_MT, matrix_tiles); + problem_manager.SetProblemProperty(hicmapp::primitives::HICMA_PROB_PROPERTY_NT, matrix_tiles); + + hicmapp::operations::StarsHManager::SetStarsHFormat(problem_manager); + + int max_rank = tile_size / MAX_RANK_RATIO; + + CompressionParameters svd_parameters(accuracy); + + int a_mt = matrix_tiles; + int a_nt = matrix_tiles; + int b_mt = a_nt; + int b_nt = matrix_tiles; + int row_tile_size = tile_size; + int column_tile_size = tile_size; + + hcorepp::helpers::Timer timer; + + Matrix matrix_Diag(nullptr, a_mt * row_tile_size, + column_tile_size, row_tile_size, + column_tile_size, + hicmapp::common::StorageLayout::HicmaCM, context, + 0, true); + Matrix matrix_dense(nullptr, a_mt * row_tile_size, + a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, + context); + Matrix matrix_AUV(nullptr, a_mt * row_tile_size, + b_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, + context, svd_parameters); + + Matrix matrix_RK(nullptr, b_mt, b_nt, 1, 1, hicmapp::common::StorageLayout::HicmaCM, context); + + timer.StartSnapshot(); + hicmapp::api::Hicmapp::GenerateDiagonalTiles(hicmapp::common::Uplo::HicmaLower, matrix_AUV, matrix_RK, + matrix_Diag, 0, max_rank, accuracy, 0, matrix_dense, + context); + timer.Snapshot("Generation"); + + auto Adense = matrix_dense.ToRawMatrix(context); + + auto *swork = (double *) calloc(2 * global_elements_in_rows, sizeof(double)); + + auto raw_matrix = matrix_dense.ToRawMatrix(context); + Matrix matrix_dense2(raw_matrix.GetData(), a_mt * row_tile_size, a_nt * column_tile_size, row_tile_size, + column_tile_size, hicmapp::common::StorageLayout::HicmaCM, context); + + int fixed_rank = 0; + timer.StartSnapshot(); + hicmapp::api::Hicmapp::Cholesky(hicmapp::common::Uplo::HicmaLower, matrix_AUV, matrix_Diag, matrix_RK, + fixed_rank, max_rank, accuracy, context); + + timer.Snapshot("Cholesky"); + + { + /// checking accuracy... + auto Adense2 = matrix_dense.ToRawMatrix(context); + for (size_t j = 0; j < Adense2.GetM(); j++) { + for (size_t i = 0; i < j; i++) { + Adense2.GetData()[j * Adense2.GetM() + i] = 0.0; + } + } + auto normA = Adense2.Normmest(swork); + hicmapp::api::Hicmapp::UncompressMatrix(hicmapp::common::Uplo::HicmaLower, matrix_AUV, matrix_RK, + matrix_dense2); + hicmapp::api::Hicmapp::DiagVecToMat(matrix_Diag, matrix_dense2, context); + auto AhicmaT = matrix_dense2.ToRawMatrix(context); + auto Ahicma = matrix_dense2.ToRawMatrix(context); + + double normAhicma = 0; + { + size_t i, j; + for (j = 0; j < Ahicma.GetM(); j++) { + for (i = 0; i < j; i++) { + Ahicma.GetData()[j * Ahicma.GetM() + i] = 0.0; + } + } + + hcorepp::helpers::RawMatrix orgAhicma_raw_matrix = Ahicma.Clone(); + normAhicma = orgAhicma_raw_matrix.Normmest(swork); + } + + { + size_t i, j; + for (j = 0; j < Adense.GetM(); j++) { + for (i = 0; i < j; i++) { + Adense.GetData()[j * Adense.GetM() + i] = 0.0; + } + } + } + + LAPACKE_dge_trans(LAPACK_COL_MAJOR, (lapack_int) Ahicma.GetM(), (lapack_int) Ahicma.GetM(), + (const double *) Ahicma.GetData(), (lapack_int) Ahicma.GetM(), + (double *) AhicmaT.GetData(), (lapack_int) AhicmaT.GetM()); + + blas::trmm(blas::Layout::ColMajor, blas::Side::Left, blas::Uplo::Lower, blas::Op::NoTrans, blas::Diag::NonUnit, + AhicmaT.GetM(), AhicmaT.GetN(), 1.0, Ahicma.GetData(), Ahicma.GetM(), AhicmaT.GetData(), + Ahicma.GetM()); + + { + size_t i, j; + for (j = 0; j < AhicmaT.GetM(); j++) { + for (i = 0; i < j; i++) { + AhicmaT.GetData()[j * AhicmaT.GetM() + i] = 0.0; + } + } + } + + size_t nelm = AhicmaT.GetM() * AhicmaT.GetN(); + + cblas_daxpy(nelm, -1.0, AhicmaT.GetData(), 1, Adense.GetData(), 1); + + auto normDenseAppDiff = Adense.Normmest(swork); + double accuracyDenseAppDiff = normDenseAppDiff / normA; + printf("\n\nnormA:%.2e normDenseAppdiff:%.2e Accuracy: %.2e\n", normA, normDenseAppDiff, accuracyDenseAppDiff); + std::cout << "Generation Time " << timer.GetSnapshot("Generation") << " ms" << std::endl; + std::cout << "Cholesky Time " << timer.GetSnapshot("Cholesky") << " ms" << std::endl; + + } + + hicmapp::api::Hicmapp::Finalize(); + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + return 0; + +} \ No newline at end of file diff --git a/examples/simple_example.cpp b/examples/simple_example.cpp new file mode 100644 index 0000000..6a981d7 --- /dev/null +++ b/examples/simple_example.cpp @@ -0,0 +1,97 @@ + +#include +#include +#include +#include + +const int global_elements_in_rows = 8; +const int global_elements_in_cols = 8; + +using namespace hicmapp::primitives; + +int main(int argc, char *argv[]) { + + // 2d array allocation + auto data = new float[global_elements_in_rows][global_elements_in_cols]; + + size_t index = 0; + for (int i = 0; i < global_elements_in_rows; i++) { + for (int j = 0; j < global_elements_in_cols; j++) { + data[i][j] = index; + index++; + } + } + + for (int i = 0; i < global_elements_in_rows; i++) { + for (int j = 0; j < global_elements_in_cols; j++) { + std::cout << " data [" << i << "][" << j << "] = " << data[i][j] << " \t"; + } + std::cout << "\n"; + } + + size_t num_of_sub_matrices = 2; + size_t tile_rows = 2; + size_t tile_cols = 2; +// SlowestDimDecomposer decomposer(num_of_sub_matrices, hicmapp::common::StorageLayout::HicmaRM); + +#ifdef HICMAPP_USE_MPI + int id; + MPI_Init(&argc, &argv); + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + std::cout << " TOTAL num of processes = " << size << "\n"; + MPI_Comm_rank(MPI_COMM_WORLD, &id); + std::cout << " Process Id : " << id << " \n"; +#endif + + hicmapp::runtime::HicmaContext context; + + size_t num_of_processes_in_rows = 2; + size_t num_of_processes_in_cols = 3; + TwoDimCyclicDecomposer decomposer(num_of_processes_in_rows, num_of_processes_in_cols); + + Matrix matrix((float *) data, global_elements_in_rows, + global_elements_in_cols, tile_rows, tile_cols, + hicmapp::common::StorageLayout::HicmaRM, decomposer, context); + + auto sub_matrices = matrix.GetSubMatrices(); + + int sub_matrix_idx = 0; +#ifdef HICMAPP_USE_MPI +// for (int i = 0; i < size; i++) { + if (id == 5) { + std::cout << " PRINTING SUbmatrices for Process :: " << id << "\n"; +#endif + + for (auto sub_matrix: sub_matrices) { + std::cout << " ============== SUBMATRIX " << sub_matrix_idx << " ================ \n"; + auto num_of_tiles = sub_matrix->GetNumberofTiles(); + std::cout << " NUM of TIles in subMatrix : " << num_of_tiles << "\n"; + auto tile_idx = 0; + auto tiles = sub_matrix->GetTiles(); + for (auto tile: tiles) { + auto tile_rows = tile->GetNumOfRows(); + auto tile_cols = tile->GetNumOfCols(); + auto tile_data = tile->GetTileSubMatrix(0); + std::cout << " Printing daata in tile : " << tile_idx << "\n"; + for (int i = 0; i < tile_rows; i++) { + for (int j = 0; j < tile_cols; j++) { + std::cout << " data [" << i << "][" << j << "] = " << tile_data[i * tile_cols + j] << "\t"; + } + std::cout << "\n"; + } + tile_idx++; + } + sub_matrix_idx++; + } +#ifdef HICMAPP_USE_MPI + } +// } +#endif + delete[] data; + +#ifdef HICMAPP_USE_MPI + MPI_Finalize(); +#endif + +} \ No newline at end of file diff --git a/hicmappConfig.cmake.in b/hicmappConfig.cmake.in new file mode 100644 index 0000000..a05f738 --- /dev/null +++ b/hicmappConfig.cmake.in @@ -0,0 +1,62 @@ +# defined since 2.8.3 +if (CMAKE_VERSION VERSION_LESS 2.8.3) + get_filename_component(CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) +endif () + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if (_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif () + +set(USE_CUDA "@USE_CUDA@") +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/Modules/cmake) + +set(ENV{PKG_CONFIG_PATH} "${_IMPORT_PREFIX}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}") +include_directories(${_IMPORT_PREFIX}/include) +link_directories(${_IMPORT_PREFIX}/lib) +set(BLA_PREFER_PKGCONFIG "ON") +find_package(OpenMP REQUIRED) +if (USE_CUDA) + message("-- HiCMA++ built CUDA Support") + find_package(CUDAToolkit REQUIRED) + find_package(BLAS REQUIRED) + find_package(blaspp REQUIRED) + unset(BLA_VENDOR) + find_package(LAPACK REQUIRED) +else () + message("-- HiCMA++ built x86 Support") + set(gpu_backend CACHE "none" FORCE) + find_package(blaspp REQUIRED) + find_package(lapackpp REQUIRED) +endif () + +# Add component-configs. +include("${CMAKE_CURRENT_LIST_DIR}/hicmappCoreConfig.cmake") + +# Compute the installation prefix relative to this file. +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +if (_IMPORT_PREFIX STREQUAL "/") + set(_IMPORT_PREFIX "") +endif () +set(hicmapp_LIBRARIES hicmapp) +set(hicmapp_LIBRARY_DIRS "${_IMPORT_PREFIX}/lib") +set(hicmapp_INCLUDE_DIRS "${_IMPORT_PREFIX}/include") + +find_package_handle_standard_args(hicmapp + NAME_MISMATCHED + REQUIRED_VARS hicmapp_INCLUDE_DIRS hicmapp_LIBRARY_DIRS hicmapp_LIBRARIES + VERSION_VAR hicmapp_VERSION + ) + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) +if (CMAKE_VERSION VERSION_LESS 2.8.3) + set(CMAKE_CURRENT_LIST_DIR) +endif () diff --git a/include/hicmapp/api/Hicmapp.hpp b/include/hicmapp/api/Hicmapp.hpp new file mode 100644 index 0000000..517f7e6 --- /dev/null +++ b/include/hicmapp/api/Hicmapp.hpp @@ -0,0 +1,55 @@ +#ifndef HICMAPP_API_HICMAPP_HPP +#define HICMAPP_API_HICMAPP_HPP + +#include +#include +#include + +using namespace hicmapp::primitives; + +namespace hicmapp::api { + template + class Hicmapp { + public: + static void + GenerateDenseMatrix(common::Uplo auplo, Matrix &apMatrix, bool aASync); + + static void + Init(int aCPUs = 1, int aGPUs = 0, int aThreadsPerWorker = -1); + + static void + Finalize(); + + static void + GenerateCompressedMatrix(common::Uplo auplo, Matrix &apMatrix, + const CompressionParameters &aSVDArguments, bool aASync); + + static size_t + Gemm(Matrix &apMatrixA, const blas::Op &aAOp, Matrix &apMatrixB, + const blas::Op &aBOp, Matrix &apMatrixC, T &aAlpha, T &aBeta, + runtime::HicmaContext &aContext, bool aAllocatePool = false, + const CompressionParameters &aSVDArguments = {1e-9}, + const std::vector> &aRanks = {}); + + static void + UncompressMatrix(common::Uplo auplo, Matrix &apMatrixUV, Matrix &apMatrixRK, Matrix &apMatrixD); + + static size_t + GenerateDiagonalTiles(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &apMatrixD, unsigned long long int seed, + int maxrank, double tol, int compress_diag, Matrix &apMatrixDense, + runtime::HicmaContext &aContext); + + static size_t + Cholesky(common::Uplo aUpperLower, Matrix &aMatrixAUV, Matrix &aMatrixAD, + Matrix &aMatrixARK, int aRank, int aMaxRank, double aAccuracy, + runtime::HicmaContext &aContext); + + static size_t + DiagVecToMat(Matrix &aMatrixDiag, Matrix &aMatrixDense, runtime::HicmaContext &aContext); + + private: + Hicmapp() = default; + }; + } +#endif //HICMAPP_API_HICMAPP_HPP \ No newline at end of file diff --git a/include/hicmapp/common/definitions.h b/include/hicmapp/common/definitions.h new file mode 100644 index 0000000..63b213d --- /dev/null +++ b/include/hicmapp/common/definitions.h @@ -0,0 +1,76 @@ +#ifndef HICMAPP_COMMON_DEFINITIONS_HPP +#define HICMAPP_COMMON_DEFINITIONS_HPP + +// Macro definition to instantiate the HiCMA template classes with supported types. +#define HICMAPP_INSTANTIATE_CLASS(TEMPLATE_CLASS) template class TEMPLATE_CLASS; \ + template class TEMPLATE_CLASS; + +namespace hicmapp::common { + /** + * @brief + * Enum denoting the storage layout of a matrix or tile. + */ + enum class StorageLayout { + HicmaCM = 101, + HicmaRM = 102, + HicmaCCRB = 103, + HicmaCRRB = 104, + HicmaRCRB = 105, + HicmaRRRB = 106 + }; + + /** + * @brief + * Enum denoting the Data Type used. + */ + enum class DataType { + HicmaByte = 0, + HicmaInteger = 1, + HicmaRealFloat = 2, + HicmaRealDouble = 3, + HicmaComplexFloat = 4, + HicmaComplexDouble = 5 + }; + + /** + * @brief + * Enum denoting the Problem Type used. + */ + enum class ProblemType { + PROBLEM_TYPE_RND = 1, + PROBLEM_TYPE_SS = 2, + PROBLEM_TYPE_RNDUSR = 3, + PROBLEM_TYPE_FILE = 4, + PROBLEM_TYPE_GEOSTAT = 5, + PROBLEM_TYPE_EDSIN = 6, + PROBLEM_TYPE_GEOSTAT_POINT = 7, + PROBLEM_TYPE_ST_3D_EXP = 8, + PROBLEM_TYPE_ST_3D_SQEXP = 9, + PROBLEM_TYPE_3D_RBF_VIRUS = 12, + PROBLEM_TYPE_3D_RBF_CUBE = 13, + PROBLEM_TYPE_AC_3D = 14, + PROBLEM_TYPE_ST_2D_EXP = 15, + PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE = 108, + PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE_POINT = 109, + PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE = 110, + PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE_POINT = 111 + }; + + /// @Todo : should be replaced with an enum in HCOREPP to avoid redundancy. + /** + * @brief + * Enum denoting Uplo + */ + enum class Uplo { + HicmaUpper = 121, + HicmaLower = 122, + HicmaUpperLower = 123 + }; + + enum class RunTimeLibrary{ + DEFAULT, + STARPU + }; + }//namespace hicmapp + +#endif //HICMAPP_COMMON_DEFINITIONS_HPP \ No newline at end of file diff --git a/include/hicmapp/matrix-operations/interface/MatrixOperations.hpp b/include/hicmapp/matrix-operations/interface/MatrixOperations.hpp new file mode 100644 index 0000000..2b04dbf --- /dev/null +++ b/include/hicmapp/matrix-operations/interface/MatrixOperations.hpp @@ -0,0 +1,65 @@ +#ifndef HICMAPP_OPERATIONS_MATRIX_OPERATIONS_HPP +#define HICMAPP_OPERATIONS_MATRIX_OPERATIONS_HPP + +#include + +using namespace hicmapp::primitives; + +namespace hicmapp { + namespace operations { + + template + class MatrixOperations { + + public: + + static int + GenerateCompressedMatrix(common::Uplo auplo, Matrix &apAUV, + const CompressionParameters &aSVDArguments, bool aAsync); + + + static int + GenerateDenseMatrix(common::Uplo auplo, Matrix &aMatrix, bool aASync); + + static int + UncompressMatrix(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, Matrix &aMatrixD); + + static size_t + Gemm(Matrix &aMatrixA, const blas::Op &aAOp, Matrix &aMatrixB, + const blas::Op &aBOp, Matrix &aMatrixC, T &aAlpha, T &aBeta, + runtime::HicmaContext &aContext, const CompressionParameters &aSVDArguments, + const std::vector> &aRanks, bool aAllocatePool = false); + + static size_t + Cholesky(common::Uplo aUpperLower, Matrix &aMatrixAUV, Matrix &aMatrixADiagonal, + Matrix &aMatrixARK, int aRank, int aMaxRank, double aAccuracy, + runtime::HicmaContext &aContext); + + static size_t + DiagVecToMat(Matrix &aMatrixDiag, Matrix &aMatrixDense, runtime::HicmaContext &aContext); + + static size_t + GenerateDiagonalTiles(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &aMatrixD, unsigned long long int seed, + int maxrank, double tol, int compress_diag, Matrix &aMatrixDense, + runtime::HicmaContext &aContext); + + static std::vector + CalculateGemmPoolSize(Matrix &aMatrixA, const blas::Op &aAOp, Matrix &aMatrixB, + const blas::Op &aBOp, Matrix &aMatrixC, T &aAlpha, T &aBeta, + runtime::HicmaContext& aContext, + const CompressionParameters &aSVDArguments, + const std::vector> &aRanks); + + private: + /** + * @brief + * Prevent Class Instantiation for Operations Wrapper Class. + */ + MatrixOperations() = default; + + }; + + } +} +#endif //HICMAPP_OPERATIONS_MATRIX_OPERATIONS_HPP diff --git a/include/hicmapp/primitives/ProblemManager.hpp b/include/hicmapp/primitives/ProblemManager.hpp new file mode 100644 index 0000000..6c7808e --- /dev/null +++ b/include/hicmapp/primitives/ProblemManager.hpp @@ -0,0 +1,123 @@ +#ifndef HICMAPP_PROBLEMMANAGER_HPP +#define HICMAPP_PROBLEMMANAGER_HPP +extern "C" { +#include "starsh.h" +} + +#include +#include +#include +#include +#include +#include + +namespace hicmapp { + namespace primitives { + + enum ProblemProperty { + HICMA_PROB_PROPERTY_MESH_POINTS, + HICMA_PROB_PROPERTY_MORDERING, + HICMA_PROB_PROPERTY_MESH_FILE, + HICMA_PROB_PROPERTY_INTERPL_FILE, + HICMA_PROB_PROPERTY_NTRIAN, + HICMA_PROB_PROPERTY_NIPP, + HICMA_PROB_PROPERTY_DIAG, + HICMA_PROB_PROPERTY_WAVE_K, + HICMA_PROB_PROPERTY_DECAY, + HICMA_PROB_PROPERTY_NOISE, + HICMA_PROB_PROPERTY_THETA, + HICMA_PROB_PROPERTY_POINT, + HICMA_PROB_PROPERTY_REG, + HICMA_PROB_PROPERTY_ISREG, + HICMA_PROB_PROPERTY_RAD, + HICMA_PROB_PROPERTY_NUMOBJ, + HICMA_PROB_PROPERTY_DENST, + HICMA_PROB_PROPERTY_NU, + HICMA_PROB_PROPERTY_BETA, + HICMA_PROB_PROPERTY_NDIM, + HICMA_PROB_PROPERTY_N, + HICMA_PROB_PROPERTY_BLOCK_SIZE, + HICMA_PROB_PROPERTY_SYM, + HICMA_PROB_PROPERTY_MT, + HICMA_PROB_PROPERTY_NT + + }; + + class ProblemManager { + public: + explicit ProblemManager(hicmapp::common::ProblemType aProblemType); + + ~ProblemManager(); + + /// + void InitAC3DProblemMetadata(); + + /// + void InitEDSINProblemMetadata(); + + /// + void InitGeostatNonGaussianProblemMetadata(); + + /// + void InitGeostatNonGaussianPointProblemMetadata(); + + /// + void InitGeostatParsimoniousBivariatePointProblemMetadata(); + + /// + void InitGeostatParsimoniousBivariateProblemMetadata(); + + /// + void InitGeostatPointProblemMetadata(); + + /// + void InitGeostatProblemMetadata(); + + /// + void InitRBFCube3DProblemMetadata(); + + /// + void InitRBFVirus3DProblemMetadata(); + + /// + void InitRNDProblemMetadata(); + + /// + void InitSSProblemMetadata(); + + /// + void InitST2DExpProblemMetadata(); + + /// + void InitST3DExpProblemMetadata(); + + /// + void InitST3DSQExpProblemMetadata(); + + common::ProblemType GetProblemType() const; + // template +// T SetProblemProperty(ProblemProperty aProperty, T aPropertyValue); + + template + T GetProblemProperty(ProblemProperty aProperty); + + void SetProblemProperty(ProblemProperty aProperty, int aPropertyValue); + void SetProblemProperty(ProblemProperty aProperty, double aPropertyValue); + void SetProblemProperty(ProblemProperty aProperty, double* aPropertyValue); + void SetProblemProperty(ProblemProperty aProperty, char aPropertyValue); + void SetProblemProperty(ProblemProperty aProperty, char* aPropertyValue); + + + private: + + enum hicmapp::common::ProblemType mProblemType; + std::set mProblemProperties; + std::unordered_map mCharProperties; + std::unordered_map mIntProblemProperties; + std::unordered_map mDoubleProblemProperties; + std::unordered_map mP2DoubleProblemProperties; + std::unordered_map mStringProblemProperties; + }; + } +} +#endif //HICMAPP_PROBLEMMANAGER_HPP diff --git a/include/hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp b/include/hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp new file mode 100644 index 0000000..3ce67e6 --- /dev/null +++ b/include/hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp @@ -0,0 +1,57 @@ + +#ifndef HICMAPP_PRIMITIVES_CONCRETE_MATRIX_SLOWEST_DIM_DECOMPOSER_HPP +#define HICMAPP_PRIMITIVES_CONCRETE_MATRIX_SLOWEST_DIM_DECOMPOSER_HPP + +#include +#include + +#include "hicmapp/primitives/decomposer/matrix_decomposer.hpp" +#include "hicmapp/common/definitions.h" +#include "hicmapp/runtime/interface/HicmaContext.hpp" + +namespace hicmapp { + namespace primitives { + + /** + * Slowest Dimension decomposer will be decomposing the matrix tiles equally across the number of given + * sub-matrices according to the slowest dimension. + */ + class SlowestDimDecomposer : public MatrixDecomposer { + + public: + /** + * @brief SlowestDimDecomposer constructor + * + * @param aNumOfSubMatrices + * Number of sub-matrices across global matrix + * @param aStorageLayout + * Global matrix layout, either RowMajor or ColMajor. + */ + SlowestDimDecomposer(size_t aNumOfSubMatrices, common::StorageLayout aStorageLayout); + + /** + * @brief Decompose the matrix into sub-matrices, + * by dividing the number of tiles by the number of sub-matrices. + * + * @param aGlobalMatrixTilesInRows + * Number of tiles across the global matrix rows. + * @param aGlobalMatrixTilesInCols + * Number of tiles across the global matrix cols. + * @return + */ + std::vector + Decompose(size_t aGlobalMatrixTilesInRows, size_t aGlobalMatrixTilesInCols, + bool aDiagonalMatrix = false) override; + + DecomposerType + GetType() override { + return SLOWESTDIM; + }; + + private: + common::StorageLayout mStorageLayout; + size_t mNumOfSubMatrices; + }; + } +} +#endif //HICMAPP_PRIMITIVES_CONCRETE_MATRIX_SLOWEST_DIM_DECOMPOSER_HPP diff --git a/include/hicmapp/primitives/decomposer/concrete/two_dimension_cyclic_decomposer.hpp b/include/hicmapp/primitives/decomposer/concrete/two_dimension_cyclic_decomposer.hpp new file mode 100644 index 0000000..8fead88 --- /dev/null +++ b/include/hicmapp/primitives/decomposer/concrete/two_dimension_cyclic_decomposer.hpp @@ -0,0 +1,51 @@ + +#ifndef HICMAPP_PRIMITIVES_CONCRETE_MATRIX_TWO_DIM_CYCLIC_DECOMPOSER_HPP +#define HICMAPP_PRIMITIVES_CONCRETE_MATRIX_TWO_DIM_CYCLIC_DECOMPOSER_HPP + +#include +#include + +#include "hicmapp/primitives/decomposer/matrix_decomposer.hpp" +#include "hicmapp/runtime/interface/HicmaContext.hpp" + +namespace hicmapp::primitives { + + class TwoDimCyclicDecomposer : public MatrixDecomposer { + + public: + + /** + * @brief TwoDimCyclicDecomposer constructor + * + * @param aNumOfProcessesInRows + * Number of processes across global matrix rows. + * @param aNumOfProcessesInCols + * Number of processes across global matrix columns. + */ + TwoDimCyclicDecomposer(size_t aNumOfProcessesInRows, size_t aNumOfProcessesInCols); + + /** + * @brief Decomposes the Matrix into sub-matrices, where every sub-matrix is assigned to a process. + * When number of sub-matrices > number of processes the assignment is done in a round-robin schedule. + * The TwoDimCyclicDecomposer supports RowMajor only.(TODO: add ColumnMajor support.) + * + * @param aGlobalMatrixTilesInRows + * Number of tiles across global matrix rows. + * @param aGlobalMatrixTilesInCols + * Number of tiles across global matrix columns. + */ + std::vector + Decompose(size_t aGlobalMatrixTilesInRows, size_t aGlobalMatrixTilesInCols, + bool aDiagonalMatrix = false) override; + + DecomposerType + GetType() override { + return CYCLIC2D; + }; + + private: + size_t mNumOfProcessesInRows; + size_t mNumOfProcessesInCols; + }; + } +#endif //HICMAPP_PRIMITIVES_CONCRETE_MATRIX_TWO_DIM_CYCLIC_DECOMPOSER_HPP diff --git a/include/hicmapp/primitives/decomposer/matrix_decomposer.hpp b/include/hicmapp/primitives/decomposer/matrix_decomposer.hpp new file mode 100644 index 0000000..cf6f8d2 --- /dev/null +++ b/include/hicmapp/primitives/decomposer/matrix_decomposer.hpp @@ -0,0 +1,139 @@ + +#ifndef HICMAPP_PRIMITIVES_DECOMPOSER_MATRIX_DECOMPOSER_HPP +#define HICMAPP_PRIMITIVES_DECOMPOSER_MATRIX_DECOMPOSER_HPP + +#include +#include +#include + +namespace hicmapp::primitives { + + enum DecomposerType { + SLOWESTDIM = 0, + CYCLIC2D = 1, + }; + + class MatrixSpecifications { + + public: + MatrixSpecifications(size_t aNumOfTilesInRow, size_t aNumOfTilesInCol, size_t aStartingIndexInRows, + size_t aStartingIndexInCols, size_t aNumberOfProcessInRow, + size_t aNumberOfProcessInCol, int aOwnerId) : mNumOfTilesInRow(aNumOfTilesInRow), + mNumOfTilesInCol(aNumOfTilesInCol), + mStartingIndexInRows( + aStartingIndexInRows), + mStartingIndexInCols( + aStartingIndexInCols), + mNumberOfProcessInRow( + aNumberOfProcessInRow), + mNumberOfProcessInCol( + aNumberOfProcessInCol), + mOwnerId(aOwnerId) { + + } + + MatrixSpecifications() = default; + + ~MatrixSpecifications() = default; + + [[nodiscard]] size_t GetNumOfTilesInRow() const { + return mNumOfTilesInRow; + } + + [[nodiscard]] size_t GetNumOfTilesInCol() const { + return mNumOfTilesInCol; + } + + [[nodiscard]] size_t GetStartingIndexInRows() const { + return mStartingIndexInRows; + } + + [[nodiscard]] size_t GetStartingIndexInCols() const { + return mStartingIndexInCols; + } + + [[nodiscard]] size_t GetNumberOfProcessInRow() const { + return mNumberOfProcessInRow; + } + + [[nodiscard]] size_t GetNumberOfProcessInCol() const { + return mNumberOfProcessInCol; + } + + void SetNumOfTilesInRow(size_t aNumOfTileRows) { + mNumOfTilesInRow = aNumOfTileRows; + } + + void SetNumOfTilesInCol(size_t aNumOfTileCols) { + mNumOfTilesInCol = aNumOfTileCols; + } + + void SetStartingIndexInRows(size_t aStartingIndexInRows) { + mStartingIndexInRows = aStartingIndexInRows; + } + + void SetStartingIndexInCols(size_t aStartingIndexInCols) { + mStartingIndexInCols = aStartingIndexInCols; + } + + void SetNumberOfProcessInRow(size_t aNumberOfProcessInRow) { + mNumberOfProcessInRow = aNumberOfProcessInRow; + } + + void SetNumberOfProcessInCol(size_t aNumberOfProcessInCol) { + mNumberOfProcessInCol = aNumberOfProcessInCol; + } + + void SetOwnerId(int aOwnerId) { + mOwnerId = aOwnerId; + } + + [[nodiscard]] int GetOwnerId() const { + return mOwnerId; + } + + void SetTotalSubMatrixNumOfElementsInRows(size_t aTotalSubMatrixNumOfElementsInRows) { + mTotalSubMatrixNumOfElementsInRows = aTotalSubMatrixNumOfElementsInRows; + } + + void SetTotalSubMatrixNumOfElementsInCols(size_t aTotalSubMatrixNumOfElementsInCols) { + mTotalSubMatrixNumOfElementsInCols = aTotalSubMatrixNumOfElementsInCols; + } + + [[nodiscard]] size_t GetTotalSubMatrixNumOfElementsInRows() const { + return mTotalSubMatrixNumOfElementsInRows; + } + + [[nodiscard]] size_t GetTotalSubMatrixNumOfElementsInCols() const { + return mTotalSubMatrixNumOfElementsInCols; + } + + private: + size_t mNumOfTilesInRow{}; + size_t mNumOfTilesInCol{}; + /// Tile index in rows. + size_t mStartingIndexInRows{}; + /// Tile index in columns. + size_t mStartingIndexInCols{}; + size_t mNumberOfProcessInRow = 0; + size_t mNumberOfProcessInCol = 0; + int mOwnerId = 0; + size_t mTotalSubMatrixNumOfElementsInRows{}; + size_t mTotalSubMatrixNumOfElementsInCols{}; + }; + + class MatrixDecomposer { + public: + + MatrixDecomposer() = default; + + virtual std::vector + Decompose(size_t aGlobalMatrixTilesInRows, size_t aGlobalMatrixTilesinCols, + bool aDiagonalMatrix = false) = 0; + + virtual + DecomposerType + GetType() = 0; + }; + } +#endif //HICMAPP_PRIMITIVES_DECOMPOSER_MATRIX_DECOMPOSER_HPP diff --git a/include/hicmapp/primitives/matrix.hpp b/include/hicmapp/primitives/matrix.hpp new file mode 100644 index 0000000..a917e90 --- /dev/null +++ b/include/hicmapp/primitives/matrix.hpp @@ -0,0 +1,287 @@ + +#ifndef HICMAPP_PRIMITIVES_MATRIX_HPP +#define HICMAPP_PRIMITIVES_MATRIX_HPP + +#include +#include + +#include +#include +#include "submatrix.hpp" +#include +#include "hicmapp/primitives/decomposer/concrete/slowest_dimension_decomposer.hpp" +#include +#include + +namespace hicmapp::primitives { +/** + * Tile matrix descriptor + * + * Matrices are stored in a contiguous data chunk containning in order + * A11, A21, A12, A22 with : + * + * n1 n2 + * +----------+---+ + * | | | With m1 = lm - (lm%mb) + * | | | m2 = lm%mb + * m1 | A11 |A12| n1 = ln - (ln%nb) + * | | | n2 = ln%nb + * | | | + * +----------+---+ + * m2 | A21 |A22| + * +----------+---+ + * + */ + + template + class Matrix { + public: + + /** + * Matrix Class constructor. + * + * @param apMatrixData + * Pointer to Matrix data elements, data will be distributed across submatrices, each containing a 2d array of + * tiles and size of each tile. + * @param aTotalGlobalNumOfRows + * Total Num of Rows in Global Matrix. + * @param aTotalGlobalNumOfCols + * Total NUm of Cols in Global Matrix. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aStorageLayout + * Storage Layout in memory. + * @param aMatrixDecomposer + * Customized matrix decomposer. + * @param aRank + * Matrix rank. + * @param aCommunicator + */ + Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, size_t aTileNumOfRows, + size_t aTileNumOfCols, common::StorageLayout aStorageLayout, MatrixDecomposer &aMatrixDecomposer, + hicmapp::runtime::HicmaContext &aContext, + size_t aRank = 0, bool aDiagonalMatrix = false); + + /** + * Matrix Class constructor, uses a default SlowestDimDecomposer. + * + * @param apMatrixData + * Pointer to Matrix data elements, data will be distributed across submatrices, each containing a 2d array of + * tiles and size of each tile. + * @param aTotalGlobalNumOfRows + * Total Num of Rows in Global Matrix. + * @param aTotalGlobalNumOfCols + * Total NUm of Cols in Global Matrix. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aStorageLayout + * Storage Layout in memory. + * @param aRank + * Matrix rank. + * @param aCommunicator + */ + Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, size_t aTileNumOfRows, + size_t aTileNumOfCols, common::StorageLayout aStorageLayout, runtime::HicmaContext &aContext, + size_t aRank = 0, bool aDiagonalMatrix = false); + + /** + * Matrix Class constructor, uses a default SlowestDimDecomposer. + * + * @param apMatrixData + * Pointer to Matrix data elements, data will be distributed across submatrices, each containing a 2d array of + * tiles and size of each tile. + * @param aTotalGlobalNumOfRows + * Total Num of Rows in Global Matrix. + * @param aTotalGlobalNumOfCols + * Total NUm of Cols in Global Matrix. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aStorageLayout + * Storage Layout in memory. + * @param aRank + * Matrix rank. + * @param aCommunicator + */ + Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, size_t aTileNumOfRows, + size_t aTileNumOfCols, common::StorageLayout aStorageLayout, runtime::HicmaContext &aContext, + const CompressionParameters &aParams); + + /** + * Matrix Class constructor. + * + * @param apMatrixData + * Pointer to Matrix data elements, data will be distributed across submatrices, each containing a 2d array of + * tiles and size of each tile. + * @param aTotalGlobalNumOfRows + * Total Num of Rows in Global Matrix. + * @param aTotalGlobalNumOfCols + * Total NUm of Cols in Global Matrix. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aStorageLayout + * Storage Layout in memory. + * @param aMatrixDecomposer + * Customized matrix decomposer. + * @param aRank + * Matrix rank. + */ + Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, size_t aTileNumOfRows, + size_t aTileNumOfCols, common::StorageLayout aStorageLayout, MatrixDecomposer &aMatrixDecomposer, + hicmapp::runtime::HicmaContext &aContext, const CompressionParameters &aParams); + + /** + * Matrix destructor. + */ + ~Matrix(); + + /** + * Get Current Matrix Id. + * + * @return + * Matrix Id. + */ + [[nodiscard]] size_t + GetMatrixId() const; + + /** + * Get number of sub-matrices assigned to current process + * + * @return + * Number of Sub matrices. + */ + [[nodiscard]] size_t + GetNumOfSubMatrices() const; + + /** + * Get total number of sub-matrices decomposed across all processes + * + * @return + * Total number of Sub matrices. + */ + [[nodiscard]] size_t + GetTotalNumOfSubMatrices() const; + + /** + * Get vector of pointers to Sub matrices. + * + * @return + * + */ + std::vector *> & + GetSubMatrices(); + + SubMatrix & + GetSubMatrix(size_t aSubMatrixIndex) const; + + [[nodiscard]] size_t + GetNumOfGlobalTilesInRows() const; + + [[nodiscard]] size_t + GetNumOfGlobalTilesInCols() const; + + [[nodiscard]] bool + ContainsTile(size_t aTileIdxInRows, size_t aTileIdxInCols) const; + + Tile * + GetTilePointer(size_t aTileIdxInRows, size_t aTileIdxInCols); + + [[nodiscard]] common::StorageLayout + GetStorageLayout() const; + + [[nodiscard]] int + GetSubMatrixOwnerId(size_t aTileIdxInRows, size_t aTileIdxInCols) const; + + [[nodiscard]] int + GetTileOwnerId(size_t aTileIdxInRows, size_t aTileIdxInCols) const; + + [[nodiscard]] runtime::HicmaContext & + GetContext() const; + + [[nodiscard]] bool + IsMatrixValid() const; + + [[nodiscard]] size_t GetNumOfRowsInTile() const; + + [[nodiscard]] size_t GetNumOfColsInTile() const; + + [[nodiscard]] size_t GetGlobalNumOfRowsInMatrix() const; + + [[nodiscard]] size_t GetGlobalNumOfColsInMatrix() const; + + size_t GetTileLeadingDim(size_t aTileIdx); + + hcorepp::helpers::RawMatrix + ToRawMatrix(runtime::HicmaContext &aContext); + + size_t + GetMemoryFootprint(); + + [[nodiscard]] const std::vector &GetMatrixSpecs() const { + return mSpecs; + } + + [[nodiscard]] size_t GetMatrixFixedRank() const { + return mFixedRank; + } + + [[nodiscard]] TileType GetMatrixTileType() const { + return mTileType; + } + + TileMetadata *GetTileMetadata(size_t aTileRowIdx, size_t aTileColIdx); + + void Print(std::ostream &aOutStream); + + private: + void + Initialize(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, hicmapp::runtime::HicmaContext &aContext, size_t aRank = 0); + + void + Initialize(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, hicmapp::runtime::HicmaContext &aContext, + const CompressionParameters &aParams = {1e-9}); + + private: + // 2d array of sub matrices representing the GLobal matrix. + std::vector *> mSubMatrices; + // number of rows in a tile. + size_t mGlobalNumOfRowsInTile; + // number of columns in a tile. + size_t mGlobalNumOfColsInTile; + // Total number of rows in global matrix. + size_t mGlobalNumOfRowsInMatrix; + // Total number of columns in global matrix. + size_t mGlobalNumOfColsInMatrix; + // Matrix Id + size_t mMatrixId; + // Global number of Tiles in Rows; + size_t mGlobalNumOfTilesInRows; + // Global number of Tiles in Cols; + size_t mGlobalNumOfTilesInCols; + // Storage layout + common::StorageLayout mStorageLayout; + //Hicma context + runtime::HicmaContext &mContext; + //Memory Footprint + size_t mMemory; + // MatrixDecomposerType + DecomposerType mDecomposerType = SLOWESTDIM; + // Decomposition Specs + std::vector mSpecs; + size_t mFixedRank = -1; + TileType mTileType; + bool mDiagonalMatrix; + }; + } +#endif //HICMAPP_PRIMITIVES_MATRIX_HPP diff --git a/include/hicmapp/primitives/submatrix.hpp b/include/hicmapp/primitives/submatrix.hpp new file mode 100644 index 0000000..ac18871 --- /dev/null +++ b/include/hicmapp/primitives/submatrix.hpp @@ -0,0 +1,168 @@ + +#ifndef HICMAPP_PRIMITIVES_SUBMATRIX_HPP +#define HICMAPP_PRIMITIVES_SUBMATRIX_HPP + +#include +#include +#include +#include + +using namespace hcorepp::operators; + +namespace hicmapp::primitives { + + template + class SubMatrix { + public: + + /** + * SubMatrix Class constructor. + * + * @param apSubMatrixData + * Pointer to the submatrix data elements, containing a 2d array of tiles to be holding the data. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aGlobalMatrixRows + * Num of global matrix rows. + * @param aGlobalMatrixCols + * Num of global matrix cols. + * @param aSubMatrixNumOfRows + * Num of sub-matrix rows. + * @param aSubMatrixNumOfCols + * Num of sub-matrix cols. + * @param aTilesGlobalStIdxInRows + * Tile's global index across the matrix in rows. + * @param aTilesGlobalStIdxInCols + * Tile's global index across the matrix in cols. + * @param aStorageLayout + * Storage Layout in memory. + * @param aOwnerId + * Sub-matrix' owner ID. + * @param aRank + * Sub-matrix' rank. + */ + SubMatrix(T *apSubMatrixData, size_t aTileNumOfRows, size_t aTileNumOfCols, size_t aGlobalMatrixRows, + size_t aGlobalMatrixCols, size_t aSubMatrixNumOfRows, size_t aSubMatrixNumOfCols, + size_t aTilesGlobalStIdxInRows, size_t aTilesGlobalStIdxInCols, + common::StorageLayout aStorageLayout, size_t aOwnerId, runtime::HicmaContext& aContext, size_t aRank = 0); + + /** + * SubMatrix Class constructor. + * + * @param apSubMatrixData + * Pointer to the submatrix data elements, containing a 2d array of tiles to be holding the data. + * @param aTileNumOfRows + * Num of Rows in each tile, except the remainder tile if it exists. + * @param aTileNumOfCols + * Num of cols in each tile, except the remainder tile if it exists. + * @param aGlobalMatrixRows + * Num of global matrix rows. + * @param aGlobalMatrixCols + * Num of global matrix cols. + * @param aSubMatrixNumOfRows + * Num of sub-matrix rows. + * @param aSubMatrixNumOfCols + * Num of sub-matrix cols. + * @param aTilesGlobalStIdxInRows + * Tile's global index across the matrix in rows. + * @param aTilesGlobalStIdxInCols + * Tile's global index across the matrix in cols. + * @param aStorageLayout + * Storage Layout in memory. + * @param aOwnerId + * Sub-matrix' owner ID. + * @param aRank + * Sub-matrix' rank. + */ + SubMatrix(T *apSubMatrixData, size_t aTileNumOfRows, size_t aTileNumOfCols, size_t aGlobalMatrixRows, + size_t aGlobalMatrixCols, size_t aSubMatrixNumOfRows, size_t aSubMatrixNumOfCols, + size_t aTilesGlobalStIdxInRows, size_t aTilesGlobalStIdxInCols, + common::StorageLayout aStorageLayout, size_t aOwnerId, runtime::HicmaContext& aContext, const CompressionParameters& aParams); + + /** + * Sub Matrix destructor. + */ + ~SubMatrix(); + + /** + * Get number of tiles in a sub-matrix + * + * @return + * Number of tiles + */ + size_t + GetNumberofTiles(); + + /** + * Get reference to sub-matrix tiles. + * + * @return + * Sub matrix tiles + */ + std::vector *> & + GetTiles(); + + bool + ContainsTile(size_t aTileIdxInRows, size_t aTileIdxInCols); + + Tile * + GetTilePointer(size_t aTileIdxInRows, size_t aTileIdxInCols); + + int + GetSubMatrixOwnerId(); + + bool + IsValid(); + + size_t + GetNumOfTilesinRows(); + + size_t + GetNumOfTilesinCols(); + + size_t + GetTileRows(); + + size_t + GetTileCols(); + + size_t + GetTilesGlobalStIdxInRows(); + + size_t + GetTilesGlobalStIdxInCols(); + + size_t + GetMemoryFootprint(); + + private: + // 2d array of tiles representing the matrix. + std::vector *> mTiles; + // storage layout. + common::StorageLayout mStorageLayout; + // number of rows in a single tile. + size_t mTileRows; + // number of columns in a single tile. + size_t mTileCols; + // Total number of rows in sub-matrix. + size_t mSubMatrixRows; + // Total number of columns in sub-matrix. + size_t mSubMatrixCols; + + size_t mGlobalMatrixRows; + size_t mGlobalMatrixCols; + + size_t mTilesGlobalStIdxInRows; + size_t mTilesGlobalStIdxInCols; + + size_t mSubMatrixOwnerId; + + size_t mNumOfTilesinRows; + size_t mNumOfTilesinCols; + + size_t mMemory; + }; + } +#endif //HICMAPP_PRIMITIVES_SUBMATRIX_HPP diff --git a/include/hicmapp/problem-manager/StarshManager.hpp b/include/hicmapp/problem-manager/StarshManager.hpp new file mode 100644 index 0000000..da2c335 --- /dev/null +++ b/include/hicmapp/problem-manager/StarshManager.hpp @@ -0,0 +1,28 @@ + +#ifndef HICMAPP_TILE_OPERATIONS_STARSH_MANAGER_HPP +#define HICMAPP_TILE_OPERATIONS_STARSH_MANAGER_HPP + +#include +#include + +#include + +namespace hicmapp { + namespace operations { + class StarsHManager { + public: + + static void DestroyStarsHManager(); + + static STARSH_blrf *GetStarsHFormat(); + + static void SetStarsHFormat(primitives::ProblemManager &aProblemManager); + + private: + StarsHManager(); + static STARSH_blrf *starsh_format; + }; + } +} + +#endif //HICMAPP_TILE_OPERATIONS_STARSH_MANAGER_HPP diff --git a/include/hicmapp/runtime/communicator/mpi/Communicator.hpp b/include/hicmapp/runtime/communicator/mpi/Communicator.hpp new file mode 100644 index 0000000..b13b92d --- /dev/null +++ b/include/hicmapp/runtime/communicator/mpi/Communicator.hpp @@ -0,0 +1,34 @@ +#ifndef HICMAPP_RUNTIME_MPI_HICMA_COMMUNICATOR_HPP +#define HICMAPP_RUNTIME_MPI_HICMA_COMMUNICATOR_HPP + +#include + +namespace hicmapp { + namespace runtime { + + class HicmaCommunicator { + public: + HicmaCommunicator() = default; + + explicit HicmaCommunicator(MPI_Comm aComm) : mCommunicator{aComm} { + + } + + [[nodiscard]] MPI_Comm + GetMPICommunicatior() const { + return mCommunicator; + } + + void + SetMPICommunicator(MPI_Comm aCommunicator) { + mCommunicator = aCommunicator; + } + + private: + /*** MPI Communicator */ + MPI_Comm mCommunicator = MPI_COMM_WORLD; + }; + } +} + +#endif //HICMAPP_RUNTIME_MPI_HICMA_COMMUNICATOR_HPP diff --git a/include/hicmapp/runtime/communicator/serial/Communicator.hpp b/include/hicmapp/runtime/communicator/serial/Communicator.hpp new file mode 100644 index 0000000..67ab571 --- /dev/null +++ b/include/hicmapp/runtime/communicator/serial/Communicator.hpp @@ -0,0 +1,18 @@ +#ifndef HICMAPP_RUNTIME_SERIAL_HICMA_COMMUNICATOR_HPP +#define HICMAPP_RUNTIME_SERIAL_HICMA_COMMUNICATOR_HPP + +#include + +namespace hicmapp { + namespace runtime { +/*** + * Dummy Communicator Class if MPI is disabled + */ + class HicmaCommunicator { + public: + + }; + } +} + +#endif //HICMAPP_RUNTIME_SERIAL_HICMA_COMMUNICATOR_HPP \ No newline at end of file diff --git a/include/hicmapp/runtime/concrete/default/default_runtime.hpp b/include/hicmapp/runtime/concrete/default/default_runtime.hpp new file mode 100644 index 0000000..d70aeeb --- /dev/null +++ b/include/hicmapp/runtime/concrete/default/default_runtime.hpp @@ -0,0 +1,89 @@ +#ifndef HICMAPP_DEFAULT_RUNTIME_HPP +#define HICMAPP_DEFAULT_RUNTIME_HPP + +#include +#include + +using namespace hicmapp::primitives; + +namespace hicmapp { + namespace runtime { + + template + class DefaultRuntime : public RunTimeInterface { + public: + explicit DefaultRuntime([[maybe_unused]] hicmapp::runtime::HicmaHardware &aHardware); + + ~DefaultRuntime(); + + int GenerateDenseMatrix(Matrix &aMatrix, size_t aTileIdxInCols, + size_t aTileIdxInRows) override; + + int GenerateCompressedMatrix(Matrix &aMatrix, + size_t aTileIdxInRows, size_t aTileIdxInCols, + const CompressionParameters &aSVDArguments) override; + + size_t + Gemm(T aAlpha, Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, const blas::Op &aAOp, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, const blas::Op &aBOp, T aBeta, + Matrix &aMatrixC, const size_t &aRowIdxC, const size_t &aColIdxC, + const hcorepp::kernels::RunContext &aContext, const CompressionParameters &aSVDArguments, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, bool aCholesky = false) override; + + int Sync() override; + + void Flush(const Matrix &aMatrix) override; + + void Finalize() override; + + void UnRegisterHandles(Matrix &A) override; + + void RegisterHandles(Matrix &A) override; + + void Flush(const Matrix &aMatrix, const size_t aRowIdx, const size_t aColIdx) override; + + common::RunTimeLibrary LibraryType() override { + return common::RunTimeLibrary::DEFAULT; + } + + size_t + Syrk(Matrix &aMatrixA, const size_t &aRowIdxA, + const size_t &aColIdxA, const blas::Op &aAOp, Matrix &aMatrixC, + const size_t &aRowIdxC, const size_t &aColIdxC, const blas::Uplo aUplo, T aAlpha, + T aBeta, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + Potrf(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, const blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + GenerateDiagonalTile(Matrix &aMatrixUV, Matrix &aMatrixDiag, const size_t &aRowIdxDiag, + const size_t &aColIdxDiag, Matrix &aMatrixRK, const size_t &aRowIdx, + const size_t &aColIdx, unsigned long long int seed, size_t maxrank, double tol, + size_t compress_diag, Matrix &aMatrixDense, + const hcorepp::kernels::RunContext &aContext, bool diagonal_tile) override; + + size_t + LaCpy(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext) override; + + size_t + Uncompress(Matrix &aMatrixUV, Matrix &aMatrixDense, Matrix &aMatrixRk, const size_t &aRowIdx, + const size_t &aColIdx) override; + + }; + + } +} + +#endif //HICMAPP_DEFAULT_RUNTIME_HPP \ No newline at end of file diff --git a/include/hicmapp/runtime/concrete/starpu/factory.hpp b/include/hicmapp/runtime/concrete/starpu/factory.hpp new file mode 100644 index 0000000..eaa9f7a --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/factory.hpp @@ -0,0 +1,69 @@ +#include +#include +#include "hicmapp/common/definitions.h" + +#ifndef HICMAPP_FACTORY_HPP +#define HICMAPP_FACTORY_HPP +namespace hicmapp { + namespace runtime { + + /*** + * Types of Supported Codelets + */ + enum CodeletType { + GENERATE_DENSE_DATA, + GENERATE_COMPRESSED_DATA, + GEMM, + SYRK, + POTRF, + TRSM, + DGYTLR, + DGYTLR_DIAG, + UNCOMPRESS, + LACPY + }; + + template + class MakerInterface { + public: + MakerInterface() = default; + + virtual StarpuCodelet *CreateObject() = 0; + + virtual ~MakerInterface() = default; + }; + + template + class CodeletFactory { + + public: + + static void RegisterMaker(CodeletType aType, MakerInterface *aMaker); + + static StarpuCodelet *CreateCodelet(CodeletType aType); + + private: + static std::unordered_map *> &GetMakersMap(); + + static std::unordered_map &GetCodeletsMap(); + }; + + template typename Object, typename T> + class CodeletMaker : public MakerInterface { + public: + + explicit CodeletMaker(const CodeletType aType) noexcept { + CodeletFactory::RegisterMaker(aType, this); + } + + StarpuCodelet *CreateObject() override { + return new Object(); + } + + ~CodeletMaker() = default; + }; + } + +} + +#endif //HICMAPP_FACTORY_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/hicma_runtime_workspace.hpp b/include/hicmapp/runtime/concrete/starpu/hicma_runtime_workspace.hpp new file mode 100644 index 0000000..3f8a9eb --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/hicma_runtime_workspace.hpp @@ -0,0 +1,54 @@ +/** + * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + */ +/** + * + * @file runtime_workspace.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU workspace header + * + * @version 1.0.0 + * @author Cedric Augonnet + * @date 2011-06-01 + * + */ +#ifndef _HICMA_STARPU_WORKSPACE_H_ +#define _HICMA_STARPU_WORKSPACE_H_ + +/* + * Allocate workspace in host memory: CPU for any worker + * or allocate workspace in worker's memory: main memory for cpu workers, + * and embedded memory for CUDA devices. + */ +#define HICMA_HOST_MEM 0 +#define HICMA_WORKER_MEM 1 + +struct hicma_starpu_ws_s { + size_t size; + size_t memory_location; + int which_workers; + void *workspaces[STARPU_NMAXWORKERS]; +}; + +typedef struct hicma_starpu_ws_s HICMA_starpu_ws_t; + +/* + * This function creates a workspace on each type of worker in "which_workers" + * (eg. HICMA_CUDA|HICMA_CPU for all CPU and GPU workers). The + * memory_location argument indicates whether this should be a buffer in host + * memory or in worker's memory (HICMA_HOST_MEM or HICMA_WORKER_MEM). This function + * returns 0 upon successful completion. + */ +int HICMA_RUNTIME_starpu_ws_alloc ( HICMA_starpu_ws_t **workspace, size_t size, int which_workers, size_t memory_location); +int HICMA_RUNTIME_starpu_ws_free ( HICMA_starpu_ws_t *workspace); +void *HICMA_RUNTIME_starpu_ws_getlocal( HICMA_starpu_ws_t *workspace); + +#endif /* _HICMA_STARPU_WORKSPACE_H_ */ diff --git a/include/hicmapp/runtime/concrete/starpu/hicma_starpu.hpp b/include/hicmapp/runtime/concrete/starpu/hicma_starpu.hpp new file mode 100644 index 0000000..3ecb7c7 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/hicma_starpu.hpp @@ -0,0 +1,105 @@ +/** + * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + */ +/** + * + * @file hicma_starpu.hpp + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon StarPU runtime header + * + * @version 1.0.0 + * @author Mathieu Faverge + * @author Cedric Castagnede + * @author Florent Pruvost + * @date 2011-06-01 + * + */ +#ifndef _HICMA_STARPU_H_ +#define _HICMA_STARPU_H_ + +/* StarPU options */ +/* #undef HAVE_STARPU_FXT_PROFILING */ +/* #undef HAVE_STARPU_IDLE_PREFETCH */ +/* #undef HAVE_STARPU_ITERATION_PUSH */ +/* #undef HAVE_STARPU_DATA_WONT_USE */ +/* #undef HAVE_STARPU_DATA_SET_COORDINATES */ +/* #undef HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS */ +/* #undef HAVE_STARPU_MPI_DATA_MIGRATE */ +/* #undef HAVE_STARPU_MPI_DATA_REGISTER */ +/* #undef HAVE_STARPU_MPI_COMM_RANK */ +/* #undef HAVE_STARPU_MPI_CACHED_RECEIVE */ +/* #undef HAVE_STARPU_MPI_COMM_GET_ATTR */ + +#if defined(HICMAPP_USE_MPI) +#include +#else +#include +#endif + +#include + +#if defined(USE_CUDA) +#include +#include + +#include +#include +#if defined(HICMA_USE_CUBLAS_V2) +#include +#include +#endif +#endif + +#if defined(HICMA_SIMULATION) +# if !defined(STARPU_SIMGRID) +# error "Starpu was not built with simgrid support (--enable-simgrid). Can not run Hicma with simulation support." +# endif +#else +# if defined(STARPU_SIMGRID) +# warning "Starpu was built with simgrid support. Better build Hicma with simulation support (-DHICMA_SIMULATION=YES) NOT SUPPORTED YET." +# endif +#endif + +#include + +typedef struct starpu_conf starpu_conf_t; + +/**/ + +/* + * MPI Redefinitions + */ +#if defined(HICMAPP_USE_MPI) +#undef STARPU_REDUX +#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__) +#endif + +/* + * cuBlasAPI v2 - StarPU enable the support for cublas handle + */ +#if defined(USE_CUDA) && defined(HICMA_USE_CUBLAS_V2) +#define RUNTIME_getStream(_stream_) \ + cublasHandle_t _stream_ = starpu_cublas_get_local_handle(); +#else +#define RUNTIME_getStream(_stream_) \ + cudaStream_t _stream_ = starpu_cuda_get_local_stream(); \ + cublasSetKernelStream( stream ); + +#endif + +/* + * Enable codelets names + */ +#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1)) +#define CHAMELEON_CODELETS_HAVE_NAME +#endif + +#endif /* _HICMA_STARPU_H_ */ diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/gemm-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/gemm-codelet.hpp new file mode 100644 index 0000000..c46f8c5 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/gemm-codelet.hpp @@ -0,0 +1,31 @@ +#include +#include + +#ifndef HICMAPP_GEMM_CODELET_HPP +#define HICMAPP_GEMM_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class GemmCodelet : public StarpuCodelet { + + public: + GemmCodelet(); + + starpu_codelet *GetCodelet() override; + + ~GemmCodelet() = default; + + private: + + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_dgemm{}; + + static void cl_dgemm_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_GEMM_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-codelet.hpp new file mode 100644 index 0000000..b8fd0f4 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-codelet.hpp @@ -0,0 +1,30 @@ +#include +#include + +#ifndef HICMAPP_DGYTLR_CODELET_HPP +#define HICMAPP_DGYTLR_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class GenerateDgytlrCodelet : public StarpuCodelet { + + public: + GenerateDgytlrCodelet(); + + starpu_codelet *GetCodelet() override; + + ~GenerateDgytlrCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_dgytlr{}; + + static void cl_dgytlr_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_DGYTLR_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-diag-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-diag-codelet.hpp new file mode 100644 index 0000000..cc7ecff --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate-dgytlr-diag-codelet.hpp @@ -0,0 +1,30 @@ +#include +#include + +#ifndef HICMAPP_DGYTLRDIAG_CODELET_HPP +#define HICMAPP_DGYTLRDIAG_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class GenerateDgytlrDiagonalCodelet : public StarpuCodelet { + + public: + GenerateDgytlrDiagonalCodelet(); + + starpu_codelet *GetCodelet() override; + + ~GenerateDgytlrDiagonalCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_dgytlr_diag{}; + + static void cl_dgytlr_diag_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_DGYTLRDIAG_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_codelet.hpp new file mode 100644 index 0000000..41f3832 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_codelet.hpp @@ -0,0 +1,31 @@ +#include +#include + +#ifndef HICMAPP_GENERATE_CODELET_HPP +#define HICMAPP_GENERATE_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class GenerateCodelet : public StarpuCodelet { + + public: + GenerateCodelet(); + + starpu_codelet *GetCodelet() override; + + ~GenerateCodelet() = default; + + private: + + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_dhagdm{}; + + static void cl_dhagdm_cpu_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_GENERATE_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_compressed_data_codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_compressed_data_codelet.hpp new file mode 100644 index 0000000..c992851 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/generate_compressed_data_codelet.hpp @@ -0,0 +1,33 @@ + +#include +#include + +#ifndef HICMAPP_GENERATE_COMPRESSED_DATA_CODELET_HPP +#define HICMAPP_GENERATE_COMPRESSED_DATA_CODELET_HPP + +namespace hicmapp { + namespace runtime { + template + class GenerateCompressedDataCodelet : public StarpuCodelet { + + public: + GenerateCompressedDataCodelet(); + + starpu_codelet *GetCodelet() override; + + ~GenerateCompressedDataCodelet() = default; + + private: + + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_dhagcm{}; + + static void cl_dhagcm_cpu_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_GENERATE_COMPRESSED_DATA_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/lacpy-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/lacpy-codelet.hpp new file mode 100644 index 0000000..0a5d981 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/lacpy-codelet.hpp @@ -0,0 +1,29 @@ +#ifndef HICMAPP_LACPY_CODELET_HPP +#define HICMAPP_LACPY_CODELET_HPP + +#include +#include + +namespace hicmapp::runtime { + template + class LacpyCodelet : public StarpuCodelet { + + public: + LacpyCodelet(); + + starpu_codelet *GetCodelet() override; + + ~LacpyCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_lacpy{}; + + static void cl_lacpy_func(void *descr[], void *cl_arg); + }; + + } +#endif //HICMAPP_LACPY_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/potrf-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/potrf-codelet.hpp new file mode 100644 index 0000000..9bdd3ab --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/potrf-codelet.hpp @@ -0,0 +1,30 @@ +#include +#include + +#ifndef HICMAPP_POTRF_CODELET_HPP +#define HICMAPP_POTRF_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class PotrfCodelet : public StarpuCodelet { + + public: + PotrfCodelet(); + + starpu_codelet *GetCodelet() override; + + ~PotrfCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_potrf{}; + + static void cl_potrf_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_POTRF_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/syrk-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/syrk-codelet.hpp new file mode 100644 index 0000000..4e1b1c6 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/syrk-codelet.hpp @@ -0,0 +1,31 @@ +#include +#include + +#ifndef HICMAPP_SYRK_CODELET_HPP +#define HICMAPP_SYRK_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class SyrkCodelet : public StarpuCodelet { + + public: + SyrkCodelet(); + + starpu_codelet *GetCodelet() override; + + ~SyrkCodelet() = default; + + private: + + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_syrk{}; + + static void cl_syrk_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_SYRK_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/trsm-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/trsm-codelet.hpp new file mode 100644 index 0000000..b544c0c --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/trsm-codelet.hpp @@ -0,0 +1,30 @@ +#include +#include + +#ifndef HICMAPP_TRSM_CODELET_HPP +#define HICMAPP_TRSM_CODELET_HPP +namespace hicmapp { + namespace runtime { + template + class TrsmCodelet : public StarpuCodelet { + + public: + TrsmCodelet(); + + starpu_codelet *GetCodelet() override; + + ~TrsmCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_trsm{}; + + static void cl_trsm_func(void *descr[], void *cl_arg); + }; + + } +} +#endif //HICMAPP_TRSM_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu-codelets/uncompress-codelet.hpp b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/uncompress-codelet.hpp new file mode 100644 index 0000000..89f2d2c --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu-codelets/uncompress-codelet.hpp @@ -0,0 +1,28 @@ +#include +#include + +#ifndef HICMAPP_UNCOMPRESS_CODELET_HPP +#define HICMAPP_UNCOMPRESS_CODELET_HPP +namespace hicmapp::runtime { + template + class UncompressCodelet : public StarpuCodelet { + + public: + UncompressCodelet(); + + starpu_codelet *GetCodelet() override; + + ~UncompressCodelet() = default; + + private: + static bool Register(); + + static const bool registered_; + + struct starpu_codelet cl_uncompress{}; + + static void cl_uncompress_func(void *descr[], void *cl_arg); + }; + + } +#endif //HICMAPP_UNCOMPRESS_CODELET_HPP diff --git a/include/hicmapp/runtime/concrete/starpu/starpu.hpp b/include/hicmapp/runtime/concrete/starpu/starpu.hpp new file mode 100644 index 0000000..7f6af78 --- /dev/null +++ b/include/hicmapp/runtime/concrete/starpu/starpu.hpp @@ -0,0 +1,110 @@ +#ifndef HICMAPP_RUNTIME_CONCRETE_STARPU_HPP +#define HICMAPP_RUNTIME_CONCRETE_STARPU_HPP + +#include +#include +#include "hicma_starpu.hpp" +#include + +using namespace hicmapp::primitives; + +namespace hicmapp::runtime { + + typedef std::vector TileHandles; + typedef std::unordered_map TileHandlesMap; + + template + class StarPu : public RunTimeInterface { + + public: + explicit StarPu(hicmapp::runtime::HicmaHardware &aHardware); + + ~StarPu(); + + int GenerateDenseMatrix(Matrix& apMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols) override; + + int GenerateCompressedMatrix(Matrix &apMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols, + const CompressionParameters& aSVDArguments) override; + + size_t + Gemm(T aAlpha, Matrix &apMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, const blas::Op &aAOp, + Matrix &apMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, const blas::Op &aBOp, T aBeta, + Matrix &apMatrixC, const size_t &aRowIdxC, const size_t &aColIdxC, + const hcorepp::kernels::RunContext &aContext, const CompressionParameters &aSVDArguments, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, bool aCholesky = false) override; + + int Sync() override; + + void Flush(const Matrix &apMatrix, const size_t aRowIdx, const size_t aColIdx) override; + + void Flush(const Matrix &apMatrix) override; + + TileHandlesMap &GetMatrixHandles(size_t aMatrixId); + + void Finalize() override; + + void UnRegisterHandles(Matrix &A) override; + + void UnRegisterTileHandles(TileHandles &aHandles); + + void RegisterHandles(Matrix &A) override; + + common::RunTimeLibrary LibraryType() override { + return common::RunTimeLibrary::STARPU; + } + + size_t + Syrk(Matrix &apMatrixA, const size_t &aRowIdxA, + const size_t &aColIdxA, const blas::Op &aAOp, Matrix &apMatrixC, + const size_t &aRowIdxC, const size_t &aColIdxC, blas::Uplo aUplo, T aAlpha, + T aBeta, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + Potrf(Matrix &apMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + Matrix &apMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &apMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) override; + + size_t + GenerateDiagonalTile(Matrix &apMatrixUV, Matrix &apMatrixDiag, const size_t &aRowIdxDiag, + const size_t &aColIdxDiag, Matrix &apMatrixRK, const size_t &aRowIdx, + const size_t &aColIdx, unsigned long long int seed, size_t maxrank, double tol, + size_t compress_diag, Matrix &apMatrixDense, + const hcorepp::kernels::RunContext &aContext, bool diagonal_tile) override; + + size_t + LaCpy(Matrix &apMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &apMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext) override; + + size_t + Uncompress(Matrix &apMatrixUV, Matrix &apMatrixDense, Matrix &apMatrixRk, const size_t &aRowIdx, + const size_t &aColIdx) override; + + private: + void + RegisterTileHandles(Matrix &A, size_t aM, size_t aN); + + TileHandles& + GetTileHandles(Matrix &A, size_t aM, size_t aN); + + TileHandles & + GetTileHandles(TileHandlesMap &aHandlesMap, size_t aHandleIdx); + + private: + /*** RunTimeHandles Map, Each matrix has one TileHandlesMap. MatrixID -> TileHandlesMap */ + std::unordered_map mRunTimeHandles{}; + /*** TileMetadata Map, Tile Idx -> (row_idx, col_idx, tile_metadata) */ + std::unordered_map>> mTileMetadata{}; + /*** StarPu Configurations object */ + starpu_conf_t *mConf; + }; + } +#endif //HICMAPP_RUNTIME_CONCRETE_STARPU_HPP diff --git a/include/hicmapp/runtime/interface/HicmaCommunicator.hpp b/include/hicmapp/runtime/interface/HicmaCommunicator.hpp new file mode 100644 index 0000000..c7e8248 --- /dev/null +++ b/include/hicmapp/runtime/interface/HicmaCommunicator.hpp @@ -0,0 +1,11 @@ +#ifndef HICMAPP_RUNTIME_HICMA_COMMUNICATOR_HPP +#define HICMAPP_RUNTIME_HICMA_COMMUNICATOR_HPP + +#ifdef HICMAPP_USE_MPI +#include +#else +#include +#endif + +#endif //HICMAPP_RUNTIME_HICMA_COMMUNICATOR_HPP + diff --git a/include/hicmapp/runtime/interface/HicmaContext.hpp b/include/hicmapp/runtime/interface/HicmaContext.hpp new file mode 100644 index 0000000..d028402 --- /dev/null +++ b/include/hicmapp/runtime/interface/HicmaContext.hpp @@ -0,0 +1,88 @@ +#ifndef HICMAPP_RUNTIME_HICMA_CONTEXT_HPP +#define HICMAPP_RUNTIME_HICMA_CONTEXT_HPP + +#include +#include +#include + +namespace hicmapp::runtime { + + class HicmaContext { + public: + /*** + * HicmaContext Default Constructor + */ + explicit HicmaContext(); + + /*** + * HicmaContext Constructor with a specific communicator + * @param aComm + */ + explicit HicmaContext(HicmaCommunicator aComm); + + /*** + * Default HicmaContext Destructor + */ + ~HicmaContext() = default; + + /*** + * Get number of Hcorepp contexts + * @return + */ + size_t + GetNumOfContexts(); + + /*** + * Get the main context + * @return + */ + const hcorepp::kernels::RunContext& + GetMainContext(); + + /*** + * Get a specific context + * @param aIdx index of context to fetch + * @return + */ + const hcorepp::kernels::RunContext& + GetContext(size_t aIdx = 0); + + /*** + * Synchronize main context + */ + void + SyncMainContext(); + + /*** + * Synchronize context at a specific index + * @param aIdx + */ + void + SyncContext(size_t aIdx = 0); + + /*** + * Synchronize all contexts + */ + void + SyncAll(); + + /*** + * Get HicmaCommunicator member + * @return + */ + HicmaCommunicator& + GetCommunicator(); + + /*** + * Set Communicator to be used + * @param aCommunicator + */ + void + SetCommunicator(HicmaCommunicator& aCommunicator); + + private: + /*** Communicator member */ + HicmaCommunicator mCommunicator; + }; + } +#endif //HICMAPP_RUNTIME_HICMA_CONTEXT_HPP diff --git a/include/hicmapp/runtime/interface/HicmaHardware.hpp b/include/hicmapp/runtime/interface/HicmaHardware.hpp new file mode 100644 index 0000000..196076a --- /dev/null +++ b/include/hicmapp/runtime/interface/HicmaHardware.hpp @@ -0,0 +1,22 @@ +#ifndef HICMAPP_RUNTIME_HICMA_HARDWARE_HPP +#define HICMAPP_RUNTIME_HICMA_HARDWARE_HPP + +namespace hicmapp::runtime { + + struct HicmaHardware { + /*** + * Struct specifying hardware the runtime instance will use during runtime. + * @param aCPUs + * @param aGPUs + * @param aThreadsPerWorker + */ + HicmaHardware(int aCPUs, int aGPUs, int aThreadsPerWorker) : mCPUs(aCPUs), mGPUs(aGPUs), mThreadsPerWorker(aThreadsPerWorker) { + + } + int mCPUs = 1; + int mGPUs = 0; + int mThreadsPerWorker = -1; + }; + } + +#endif //HICMAPP_RUNTIME_HICMA_HARDWARE_HPP \ No newline at end of file diff --git a/include/hicmapp/runtime/interface/RunTimeFactory.hpp b/include/hicmapp/runtime/interface/RunTimeFactory.hpp new file mode 100644 index 0000000..ca43116 --- /dev/null +++ b/include/hicmapp/runtime/interface/RunTimeFactory.hpp @@ -0,0 +1,37 @@ +#ifndef HICMAPP_RUNTIME_RUNTIME_FACTORY_HPP +#define HICMAPP_RUNTIME_RUNTIME_FACTORY_HPP + +#include + +#ifdef HICMAPP_STARPU +#include +#else +#include +#endif +namespace hicmapp::runtime { + /*** + * Runtime factory to return runtime instance. This would be extended and refactored if more runtimes are supported + * @tparam T + */ + template + class RunTimeFactory { + + public: + /*** + * Create an instance of a runtime object + * @param aHardware HicmaHardware for initialization of runtime + * @return + */ + static RunTimeInterface *CreateRunTimeInstance(hicmapp::runtime::HicmaHardware& aHardware) { + +#ifdef HICMAPP_STARPU + return new StarPu(aHardware); +#else + return new DefaultRuntime(aHardware); + +#endif + } + }; + HICMAPP_INSTANTIATE_CLASS(RunTimeFactory); + } +#endif //HICMAPP_RUNTIME_RUNTIME_FACTORY_HPP \ No newline at end of file diff --git a/include/hicmapp/runtime/interface/RunTimeInterface.hpp b/include/hicmapp/runtime/interface/RunTimeInterface.hpp new file mode 100644 index 0000000..732005a --- /dev/null +++ b/include/hicmapp/runtime/interface/RunTimeInterface.hpp @@ -0,0 +1,237 @@ +#ifndef HICMAPP_RUNTIME_RUN_TIME_INTERFACE_HPP +#define HICMAPP_RUNTIME_RUN_TIME_INTERFACE_HPP + +#include +#include + +using namespace hicmapp::primitives; + +namespace hicmapp::runtime { + + /*** + * This is the interface that should be supported by all runtime libraries + * @tparam T + */ + template + class RunTimeInterface { + + public: + /*** + * Default Constructor + */ + RunTimeInterface() = default; + /*** + * Default Destructor + */ + ~RunTimeInterface() = default; + + /*** + * Dense Matrix Generation + * @param aMatrix Full Matrix + * @param aTileIdxInRows Index of Tile in Rows to be Generated + * @param aTileIdxInCols Index of Tile in Cols to be Generated + * @return Error Code + */ + virtual int + GenerateDenseMatrix(Matrix &aMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols) = 0; + + /*** + * Compressed Matrix Generation + * @param aMatrix Full Matrix + * @param aTileIdxInRows Index of Tile in Rows to be Generated + * @param aTileIdxInCols Index of Tile in Cols to be Generated + * @param aSVDArguments Compression Parameters + * @return + */ + virtual int + GenerateCompressedMatrix(Matrix &aMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols, + const CompressionParameters &aSVDArguments) = 0; + + /*** + * Gemm Operation + * @param aAlpha + * @param aMatrixA + * @param aRowIdxA + * @param aColIdxA + * @param aAOp + * @param aMatrixB + * @param aRowIdxB + * @param aColIdxB + * @param aBOp + * @param aBeta + * @param aMatrixC + * @param aRowIdxC + * @param aColIdxC + * @param aContext + * @param aSVDArguments + * @param aMemoryUnit + * @param aCholesky + * @return + */ + virtual size_t + Gemm(T aAlpha, Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, const blas::Op &aAOp, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, const blas::Op &aBOp, T aBeta, + Matrix &aMatrixC, const size_t &aRowIdxC, const size_t &aColIdxC, + const hcorepp::kernels::RunContext &aContext, const CompressionParameters &aSVDArguments, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, bool aCholesky = false) = 0; + + /*** + * Syrk Operation + * @param aMatrixA + * @param aRowIdxA + * @param aColIdxA + * @param aAOp + * @param aMatrixC + * @param aRowIdxC + * @param aColIdxC + * @param aUplo + * @param aAlpha + * @param aBeta + * @param aContext + * @param aMemoryUnit + * @return + */ + virtual size_t + Syrk(Matrix &apMatrixA, const size_t &aRowIdxA, + const size_t &aColIdxA, const blas::Op &aAOp, Matrix &apMatrixC, + const size_t &aRowIdxC, const size_t &aColIdxC, blas::Uplo aUplo, T aAlpha, + T aBeta, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) = 0; + + /*** + * + * @param aMatrixA + * @param aRowIdxA + * @param aColIdxA + * @param aUplo + * @param aContext + * @param aMemoryUnit + * @return + */ + virtual size_t + Potrf(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, hcorepp::dataunits::MemoryUnit &aMemoryUnit) = 0; + + /*** + * Trsm operation + * @param aSide + * @param aUplo + * @param aTrans + * @param aDiag + * @param aAlpha + * @param aMatrixA + * @param aRowIdxA + * @param aColIdxA + * @param aMatrixB + * @param aRowIdxB + * @param aColIdxB + * @param aContext + * @param aMemoryUnit + * @return + */ + virtual size_t + Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) = 0; + + /*** + * Generation of Diagonal Tiles + * @param aMatrixUV + * @param aMatrixDiag + * @param aRowIdxDiag + * @param aColIdxDiag + * @param apMatrixRK + * @param aRowIdx + * @param aColIdx + * @param seed + * @param maxrank + * @param tol + * @param compress_diag + * @param aMatrixDense + * @param aContext + * @param diagonal_tile + * @return + */ + virtual size_t + GenerateDiagonalTile(Matrix& aMatrixUV, Matrix& aMatrixDiag, const size_t &aRowIdxDiag, + const size_t &aColIdxDiag, Matrix& apMatrixRK, const size_t &aRowIdx, + const size_t &aColIdx, unsigned long long int seed, size_t maxrank, double tol, + size_t compress_diag, Matrix& aMatrixDense, + const hcorepp::kernels::RunContext &aContext, bool diagonal_tile) = 0; + + /*** + * Copy Operation + * @param aMatrixA + * @param aRowIdxA + * @param aColIdxA + * @param aMatrixB + * @param aRowIdxB + * @param aColIdxB + * @param aContext + * @return + */ + virtual size_t + LaCpy(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext) = 0; + + /*** + * Uncompress Operation + * @param aMatrixUV + * @param aMatrixDense + * @param pMatrixRk + * @param aRowIdx + * @param aColIdx + * @return + */ + virtual size_t + Uncompress(Matrix &aMatrixUV, Matrix &aMatrixDense, Matrix &pMatrixRk, const size_t &aRowIdx, + const size_t &aColIdx) = 0; + + /*** + * Synchronize. Waits on all Tasks and places an MPI barrier if MPI is enabled (for StarPU) + * @return + */ + virtual int Sync() = 0; + + /*** + * Flush specific tile data + * @param aMatrix + * @param aRowIdx + * @param aColIdx + */ + virtual void Flush(const Matrix &aMatrix, size_t aRowIdx, size_t aColIdx) = 0; + + /*** + * Flush entire matrix data + * @param aMatrix + */ + virtual void Flush(const Matrix &apMatrix) = 0; + + /*** + * Finalize Runtime + */ + virtual void Finalize() = 0; + + /*** + * Register all tiles of Matrix A + * @param A + */ + virtual void RegisterHandles(Matrix &A) = 0; + + /*** + * Unregister all tiles of Matrix A + * @param A + */ + virtual void UnRegisterHandles(Matrix &A) = 0; + + /*** + * Getter for Library Type + * @return + */ + virtual common::RunTimeLibrary LibraryType() = 0; + }; + } +#endif //HICMAPP_RUNTIME_RUN_TIME_INTERFACE_HPP diff --git a/include/hicmapp/runtime/interface/RunTimeSingleton.hpp b/include/hicmapp/runtime/interface/RunTimeSingleton.hpp new file mode 100644 index 0000000..5149895 --- /dev/null +++ b/include/hicmapp/runtime/interface/RunTimeSingleton.hpp @@ -0,0 +1,39 @@ + +#ifndef HICMAPP_RUNTIME_RUNTIME_SINGLETON_HPP +#define HICMAPP_RUNTIME_RUNTIME_SINGLETON_HPP + +#include +#include +#include + +namespace hicmapp::runtime { + template + /** Singleton Class for Runtime Instance to be used across the project */ + class RunTimeSingleton { + public: + /*** + * Getter for Runtime Instance. Throws exception if not initialized + * @return + */ + static RunTimeInterface *GetRunTimeInstance(); + + /*** + * Initialize Runtime Instance. Currently only Default and StarPu are supported + * @param aHardware Hicma Hardware initialization + */ + static void setRunTimeInstance(hicmapp::runtime::HicmaHardware& aHardware); + + private: + /*** + * Private constructor + */ + RunTimeSingleton(); + + /** + * Singleton member + */ + static hicmapp::runtime::RunTimeInterface *runtime_instance; + }; + } + +#endif //HICMAPP_RUNTIME_RUNTIME_SINGLETON_HPP \ No newline at end of file diff --git a/include/hicmapp/runtime/interface/StarpuCodelet.hpp b/include/hicmapp/runtime/interface/StarpuCodelet.hpp new file mode 100644 index 0000000..7b022eb --- /dev/null +++ b/include/hicmapp/runtime/interface/StarpuCodelet.hpp @@ -0,0 +1,28 @@ +#include +#include + +#ifndef HICMAPP_STARPUCODELET_HPP +#define HICMAPP_STARPUCODELET_HPP + +using namespace std; + +namespace hicmapp::runtime { + + /** Parent Abstract Class for all StarPu Codelets */ + class StarpuCodelet { + + public: + + /** Getter for starpu_codelet */ + virtual starpu_codelet *GetCodelet() = 0; + + /*** + * StarPuCodelet Destructor + */ + virtual ~StarpuCodelet() = default; + + private: + + }; + } +#endif //HICMAPP_STARPUCODELET_HPP diff --git a/include/hicmapp/tile-operations/TileOperations.hpp b/include/hicmapp/tile-operations/TileOperations.hpp new file mode 100644 index 0000000..b7ca16f --- /dev/null +++ b/include/hicmapp/tile-operations/TileOperations.hpp @@ -0,0 +1,187 @@ +#ifndef HICMAPP_TILE_OPERATIONS_HPP +#define HICMAPP_TILE_OPERATIONS_HPP + +#include +#include + +namespace hicmapp::operations { + template + class TileOperations { + + public: + + /** + * Generates a single Compressed Tile based on specific Compression Parameters (Tolerance, Fixed Rank etc.) + * @param[out] aCompressedTile CompressedTile to be Generated + * @param[in] aTileRowIdx Index of tile in global matrix rows needed by STARSH generation + * @param[in] aTileColIdx Index of tile in global matrix cols needed by STARSH generation + * @param[in] aSVDArguments Compression Parameters + * @return + */ + static int + GenerateCompressedMatrix(hcorepp::operators::CompressedTile &aCompressedTile, + size_t aTileRowIdx, size_t aTileColIdx, + const hcorepp::operators::CompressionParameters &aSVDArguments); + + /** + * Generates a single Dense Tile + * @param[out] aDenseTile Dense Tile to be generated + * @param[in] aTileRowIdx Index of tile in global matrix rows needed by STARSH generation + * @param[in] aTileColIdx Index of tile in global matrix cols needed by STARSH generation + * @return + */ + static int + GenerateDenseTile(hcorepp::operators::DenseTile &DenseTile, size_t aTileRowIdx, size_t aTileColIdx); + + /*** + * Uncompress U and V matrices into a single dense buffer + * @param aNumOfRows Num of Rows + * @param aNumOfCols Num of Cols + * @param aAlpha alpha factor + * @param apAU U submatrix + * @param apArk Rank matrix + * @param aLeadingDimA Leading dim of U + * @param apBV V submatrix + * @param aLeadingDimB Leading Dimension of B + * @param aBeta beta factor + * @param apC Allocated dense buffer + * @param aLeadingDimC Leading dim of dense buffer + * @return + */ + static int + UnCompressTile(size_t aNumOfRows, size_t aNumOfCols, double aAlpha, const T *apAU, + const T *apArk, + size_t aLeadingDimA, const T *apBV, size_t aLeadingDimB, + double aBeta, + T *apC, size_t aLeadingDimC); + + + /** + * Computes Gemm between two tile objects A and B into the output tile object C where + * C = alpha * A * B + beta * C + * @param aAlpha alpha factor + * @param aTileA Input Tile A + * @param aAOp Operation to be performed on Tile A + * @param aTileB Input Tile B + * @param aBOp Operation to be performed on Tile B + * @param aBeta beta factor + * @param[out] aTileC Output Tile + * @param aContext Hcorepp context + * @param aMemoryUnit Memory Unit for intermediate allocations + * @param aSVDArguments Compression Parameters for Compressed Tiles + * @param aCholesky Flag specifying if the gemm oepration was done in the context of cholesky + * @return Flops + */ + static size_t + Gemm(T aAlpha, hcorepp::operators::Tile const &aTileA, blas::Op const &aAOp, + hcorepp::operators::Tile const &aTileB, blas::Op const &aBOp, T aBeta, + hcorepp::operators::Tile &aTileC, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, + const hcorepp::operators::CompressionParameters &aSVDArguments = {1e-9}, bool aCholesky = false); + + + /*** + * Syrk operation to be performed on tiles A and B + * @param aAlpha alpha factor + * @param aA Input tile A + * @param aAOp Operation to be performed on Tile A + * @param aUplo Upper, Lower , or UpperLower + * @param aBeta beta factor + * @param[out] aB Output Tile B + * @param aContext Hcorepp context to be used + * @param aMemoryUnit Memory unit for intermediate allocations + * @return flops + */ + static size_t + Syrk(T aAlpha, const hcorepp::operators::Tile &aA, const blas::Op &aAOp, blas::Uplo aUplo, + T aBeta, hcorepp::operators::Tile &aB, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit); + + + /*** + * Potrf operation to be performed on tiles A + * @param aAlpha alpha factor + * @param aA Input tile A + * @param aAOp Operation to be performed on Tile A + * @param aUplo Upper, Lower , or UpperLower + * @param aBeta beta factor + * @param[out] aB Output Tile B + * @param aContext Hcorepp context to be used + * @param aMemoryUnit Memory unit for intermediate allocations + * @return flops + */ + static size_t + Potrf(hcorepp::operators::Tile &aA, blas::Uplo aUplo, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit); + + + /*** + * Trsm operation to be performed on Tiles A and B + * @param aSide Left or Right Sided Operation + * @param aUplo Upper, Lower or UpperLower + * @param aTrans Transpose operation to be performed + * @param aDiag NonUnit or Unit Diagonal + * @param aAlpha alpha factor + * @param aA Tile A + * @param aB Tile B + * @param aContext Hcorepp context to be used + * @param aMemoryUnit Memory unit for intermediate allocations + * @return flops + */ + static size_t + Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + hcorepp::operators::Tile &aA, hcorepp::operators::Tile &aB, + const hcorepp::kernels::RunContext &aContext, hcorepp::dataunits::MemoryUnit &aMemoryUnit); + + /*** + * Generate Diagonal Tiles + * @param aAUV Compressed AUV tile + * @param aRanks Ranks of A matrix + * @param aDenseA Dense A Tile + * @param aDiagonalA Diagonal Tiles in A + * @param aTileRowIdx Row Index of Tile A + * @param aTileColIdx Column Index of Tile A + * @param aSeed Seed for Generation + * @param aMaxRank Maximum Rank + * @param aTolerance Tolerance and Accuracy for Compression + * @param aCompressDiagonal Boolean whether diagonal to be compressed or not + * @param aLeadingDimA Leading Dimension of A tile + * @param aLeadingDimU Leading Dimension of U submatrix + * @param aLeadingDimV Leading Dimension of V submatrix + * @param aRows Number of rows + * @param aCols Number of columns in A + * @param aContext Hcorepp Context + * @return Flops + */ + static size_t + GenerateDiagonalTile(hcorepp::operators::Tile *aAUV, hcorepp::operators::Tile *aRanks, + hcorepp::operators::Tile *aDenseA, hcorepp::operators::Tile *aDiagonalA, + int aTileRowIdx, int aTileColIdx, unsigned long long int aSeed, + int aMaxRank, double aTolerance, int aCompressDiagonal, + int aLeadingDimA, int aLeadingDimU, int aLeadingDimV, int aRows, + int aCols, const hcorepp::kernels::RunContext &aContext); + + + /*** + * Copies Data from Tile A into Tile B + * @param aRows Number of rows to copy + * @param aCols Number of Columns to copy + * @param aA Tile to be copied from + * @param aB Tile be copied into + * @param aContext Hcorepp context + * @return + */ + static size_t + LaCpy(int aRows, int aCols, const hcorepp::operators::Tile &aA, hcorepp::operators::Tile &aB, + const hcorepp::kernels::RunContext &aContext); + + private: + /** + * @brief + * Prevent Class Instantiation for Operations Wrapper Class. + */ + TileOperations() = default; + + }; +} +#endif //HICMAPP_TILE_OPERATIONS_HPP diff --git a/include/hicmapp/utils/MatrixHelpers.hpp b/include/hicmapp/utils/MatrixHelpers.hpp new file mode 100644 index 0000000..6feb961 --- /dev/null +++ b/include/hicmapp/utils/MatrixHelpers.hpp @@ -0,0 +1,37 @@ +#ifndef HICMAPP_UTILS_MATRIX_HELPERS_HPP +#define HICMAPP_UTILS_MATRIX_HELPERS_HPP + +#include + +/** This file is for Matrix Utilities. These helper functions are to be used in tests and examples for logging and debugging, + * and is dedicated for utilities that should not necessarily be part of the Matrix Class API + */ + +namespace hicmapp::utils { + template + class MatrixHelpers { + + public: + + /** + * This converts a Matrix Object to a contiguous array in memory. NOTE: This does not support MPI and the + * decomposition associated with an MPI workflow. Please use ToRawMatrix function instead. + * @param[in] aMatrix Matrix to be copied + * @param[out] aArray Allocated array of size equal to the matrix being copied + */ + static void + MatrixToArray(primitives::Matrix &aMatrix, T *&aArray); + + /** + * Print Utility to print a two-dimensional array based on data layout + * @param aArray Array to be printed + * @param aRows Number of rows + * @param aCols Number of Columns + * @param aLayout Layout (ColMajor or RowMajor) + */ + static void + PrintArray(T *&aArray, size_t aRows, size_t aCols, hicmapp::common::StorageLayout aLayout); + }; + + } +#endif //HICMAPP_UTILS_MATRIX_HELPERS_HPP diff --git a/libs/hcorepp b/libs/hcorepp new file mode 160000 index 0000000..67a1da3 --- /dev/null +++ b/libs/hcorepp @@ -0,0 +1 @@ +Subproject commit 67a1da3ffbcd903702c961944b964e1a7dae1bbf diff --git a/libs/stars-h b/libs/stars-h new file mode 160000 index 0000000..27fc836 --- /dev/null +++ b/libs/stars-h @@ -0,0 +1 @@ +Subproject commit 27fc83618880241dbe8661301c8647f443bd9633 diff --git a/package.pc.in b/package.pc.in new file mode 100644 index 0000000..3ff810d --- /dev/null +++ b/package.pc.in @@ -0,0 +1,10 @@ +prefix="@CMAKE_INSTALL_PREFIX@" +exec_prefix="${prefix}" +libdir="${prefix}/lib" +includedir="${prefix}/include" + +Name: @PROJECT_NAME@ +Description: @CMAKE_PROJECT_DESCRIPTION@ +Version: @PROJECT_VERSION@ +Cflags: -I${includedir} +Libs: -L${libdir} -l@PROJECT_NAME@ \ No newline at end of file diff --git a/scripts/benchmark_script.sh b/scripts/benchmark_script.sh new file mode 100755 index 0000000..3f3d654 --- /dev/null +++ b/scripts/benchmark_script.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +if [[ $# -eq 0 ]] ; then + echo 'This script needs a single argument that is the hicmapp binary to benchmark' + exit 0 +fi + +acc="1e-4,1e-6,1e-8,1e-10" + +export HICMAPP_VERBOSE=ON +TileCount=(1 4 8 12 16 20) + +cat /dev/null > benchmark_ts1024_1.csv + +for tile_count in ${TileCount[@]}; do + $1 $tile_count $acc 1024 1 >> benchmark_ts1024_1.csv + unset HICMAPP_VERBOSE +done diff --git a/scripts/multithreading_benchmark_script.sh b/scripts/multithreading_benchmark_script.sh new file mode 100755 index 0000000..7410052 --- /dev/null +++ b/scripts/multithreading_benchmark_script.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). +# All rights reserved. +# +if [[ $# -eq 0 ]] ; then + echo 'This script needs a single argument that is the hicmapp binary to benchmark' + exit 0 +fi + +acc="1e-8" + +Threads=(1 4 8 16 32 64) + +export HICMAPP_VERBOSE=ON +TileCount=(8) + +for threads in ${Threads[@]}; do + cat /dev/null > benchmark_ts1024_${threads}.csv + export MKL_NUM_THREADS=1 + for tile_count in ${TileCount[@]}; do + $1 $tile_count $acc 1024 1 $threads >> benchmark_ts1024_${threads}.csv + unset HICMAPP_VERBOSE + done +done diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..a96e85a --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,34 @@ +add_subdirectory(primitives) +add_subdirectory(matrix-operations) +add_subdirectory(runtime) +add_subdirectory(problem-manager) +add_subdirectory(tile-operations) +add_subdirectory(api) +add_subdirectory(utils) + +set(LIB_NAME ${PROJECT_NAME}) + +add_library(${LIB_NAME} + STATIC + ${SOURCES} + ) + +target_link_libraries(${LIB_NAME} ${LIBS}) +target_compile_definitions(${LIB_NAME} PUBLIC ${COMPILE_DEFINITIONS}) + +## Installation steps for headers and libraries. +set_target_properties(${LIB_NAME} + PROPERTIES + VERSION ${${PROJECT_NAME}_VERSION} + ) +target_include_directories(${LIB_NAME} + PUBLIC + "$/include>" + ) +install(TARGETS ${LIB_NAME} + EXPORT ${LIB_NAME}CoreConfig + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) +install(EXPORT ${LIB_NAME}CoreConfig DESTINATION lib/cmake/${PROJECT_NAME}) + diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt new file mode 100644 index 0000000..e9805f1 --- /dev/null +++ b/src/api/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/Hicmapp.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/api/Hicmapp.cpp b/src/api/Hicmapp.cpp new file mode 100644 index 0000000..7f2db5f --- /dev/null +++ b/src/api/Hicmapp.cpp @@ -0,0 +1,77 @@ +#include +#include "hicmapp/runtime/interface/RunTimeSingleton.hpp" +#include "hicmapp/problem-manager/StarshManager.hpp" + +namespace hicmapp::api { + template + void Hicmapp::GenerateDenseMatrix(common::Uplo auplo, Matrix &aMatrix, bool aASync) { + hicmapp::operations::MatrixOperations::GenerateDenseMatrix(auplo, aMatrix, aASync); + } + + template + void Hicmapp::Init(int aCPUs, int aGPUs, int aThreadsPerWorker) { + auto hardware = hicmapp::runtime::HicmaHardware(aCPUs, aGPUs, aThreadsPerWorker); + hicmapp::runtime::RunTimeSingleton::setRunTimeInstance(hardware); + } + + template + void Hicmapp::Finalize() { + hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance()->Finalize(); + hicmapp::operations::StarsHManager::DestroyStarsHManager(); + } + + template + void Hicmapp::GenerateCompressedMatrix(common::Uplo auplo, Matrix &aMatrix, + const CompressionParameters &aSVDArguments, bool aASync) { + + hicmapp::operations::MatrixOperations::GenerateCompressedMatrix(auplo, aMatrix, aSVDArguments, aASync); + } + + template + void Hicmapp::UncompressMatrix(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &aMatrixD) { + hicmapp::operations::MatrixOperations::UncompressMatrix(auplo, aMatrixUV, aMatrixRK, aMatrixD); + } + + template + size_t Hicmapp::Gemm(Matrix &aMatrixA, const blas::Op &aAOp, Matrix &aMatrixB, const blas::Op &aBOp, + Matrix &aMatrixC, T &aAlpha, T &aBeta, runtime::HicmaContext &aContext, + bool aAllocatePool, const CompressionParameters &aSVDArguments, + const std::vector> &aRanks) { + return hicmapp::operations::MatrixOperations::Gemm(aMatrixA, aAOp, aMatrixB, aBOp, aMatrixC, aAlpha, + aBeta, aContext, aSVDArguments, aRanks, + aAllocatePool); + } + + template + size_t Hicmapp::GenerateDiagonalTiles(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &aMatrixD, unsigned long long int aSeed, int aMaxRank, + double aTol, int aCompressDiag, Matrix &aMatrixDense, + runtime::HicmaContext &aContext) { + return hicmapp::operations::MatrixOperations::GenerateDiagonalTiles(auplo, aMatrixUV, aMatrixRK, + aMatrixD, aSeed, aMaxRank, aTol, + aCompressDiag, aMatrixDense, + aContext); + + } + + template + size_t Hicmapp::Cholesky(common::Uplo aUpperLower, Matrix &aMatrixAUV, Matrix &aMatrixADiagonal, + Matrix &aMatrixARK, int aRank, int aMaxRank, double aAccuracy, + runtime::HicmaContext &aContext) { + + return hicmapp::operations::MatrixOperations::Cholesky(aUpperLower, aMatrixAUV, aMatrixADiagonal, + aMatrixARK, aRank, aMaxRank, aAccuracy, + aContext); + + } + + template + size_t + Hicmapp::DiagVecToMat(Matrix &aMatrixDiag, Matrix &aMatrixDense, runtime::HicmaContext &aContext) { + + return hicmapp::operations::MatrixOperations::DiagVecToMat(aMatrixDiag, aMatrixDense, aContext); + } + + HICMAPP_INSTANTIATE_CLASS(Hicmapp) + } diff --git a/src/matrix-operations/CMakeLists.txt b/src/matrix-operations/CMakeLists.txt new file mode 100644 index 0000000..5088c7d --- /dev/null +++ b/src/matrix-operations/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(omp) + +set(SOURCES + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/matrix-operations/omp/CMakeLists.txt b/src/matrix-operations/omp/CMakeLists.txt new file mode 100644 index 0000000..5387703 --- /dev/null +++ b/src/matrix-operations/omp/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/MatrixOperations.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/matrix-operations/omp/MatrixOperations.cpp b/src/matrix-operations/omp/MatrixOperations.cpp new file mode 100644 index 0000000..806c267 --- /dev/null +++ b/src/matrix-operations/omp/MatrixOperations.cpp @@ -0,0 +1,811 @@ +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" +#include +#include +#include + +#ifdef USE_OMP +#include +#include "hcorepp/kernels/kernels.hpp" + +#ifdef BLAS_HAVE_MKL +#include +#endif +#endif + +namespace hicmapp::operations { + + template + int MatrixOperations::GenerateCompressedMatrix(common::Uplo auplo, Matrix &aMatrix, + const CompressionParameters &aSVDArguments, + bool aAsync) { + + int process_id = 0; + int processes = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(aMatrix.GetContext().GetCommunicator().GetMPICommunicatior(), &process_id); + MPI_Comm_size(aMatrix.GetContext().GetCommunicator().GetMPICommunicatior(), &processes); +#endif + size_t num_of_global_tiles_in_rows = aMatrix.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols = aMatrix.GetNumOfGlobalTilesInCols(); + + if (!aMatrix.IsMatrixValid()) { + throw std::runtime_error("Matrix is invalid"); + } + + if (aMatrix.GetGlobalNumOfRowsInMatrix() == 0 || aMatrix.GetGlobalNumOfColsInMatrix() == 0) { + return 0; + } + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + RunTime_instance->RegisterHandles(aMatrix); + + if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaCM) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + if ((auplo == common::Uplo::HicmaLower && row_idx < col_idx) || + (auplo == common::Uplo::HicmaUpper && row_idx > col_idx) || + !aMatrix.ContainsTile(row_idx, col_idx)) { + continue; + } + + RunTime_instance->GenerateCompressedMatrix(aMatrix, row_idx, col_idx, aSVDArguments); + } + } + } else if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaRM) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + if ((auplo == common::Uplo::HicmaLower && row_idx < col_idx) || + (auplo == common::Uplo::HicmaUpper && row_idx > col_idx) || + !aMatrix.ContainsTile(row_idx, col_idx)) { + continue; + } + + RunTime_instance->GenerateCompressedMatrix(aMatrix, row_idx, col_idx, aSVDArguments); + + } + } + } + + RunTime_instance->Flush(aMatrix); + if (!aAsync) { + RunTime_instance->Sync(); + } + //unregister data handles + RunTime_instance->UnRegisterHandles(aMatrix); + + return 0; + } + + template + int MatrixOperations::GenerateDenseMatrix(common::Uplo auplo, Matrix &aMatrix, bool aASync) { + int process_id = 0; + int processes = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(aMatrix.GetContext().GetCommunicator().GetMPICommunicatior(), &process_id); + MPI_Comm_size(aMatrix.GetContext().GetCommunicator().GetMPICommunicatior(), &processes); +#endif + size_t num_of_global_tiles_in_rows = aMatrix.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols = aMatrix.GetNumOfGlobalTilesInCols(); + + if (!aMatrix.IsMatrixValid()) { + throw std::runtime_error("Matrix is invalid"); + } + + if (aMatrix.GetGlobalNumOfRowsInMatrix() == 0 || aMatrix.GetGlobalNumOfColsInMatrix() == 0) { + return 0; + } + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + RunTime_instance->RegisterHandles(aMatrix); + + if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaCM) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + if ((auplo == common::Uplo::HicmaLower && row_idx < col_idx) || + (auplo == common::Uplo::HicmaUpper && row_idx > col_idx) || + !aMatrix.ContainsTile(row_idx, col_idx)) { + continue; + } + + RunTime_instance->GenerateDenseMatrix(aMatrix, row_idx, col_idx); + } + } + } else if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaRM) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + if ((auplo == common::Uplo::HicmaLower && row_idx < col_idx) || + (auplo == common::Uplo::HicmaUpper && row_idx > col_idx) || + !aMatrix.ContainsTile(row_idx, col_idx)) { + continue; + } + + RunTime_instance->GenerateDenseMatrix(aMatrix, row_idx, col_idx); + + } + } + } + + + RunTime_instance->Flush(aMatrix); + if (!aASync) { + RunTime_instance->Sync(); + } + //unregister data handles + RunTime_instance->UnRegisterHandles(aMatrix); + + return 0; + } + + template + int MatrixOperations::UncompressMatrix(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &aMatrixD) { + + if (!aMatrixUV.IsMatrixValid()) { + return -1; + } + + if (!aMatrixRK.IsMatrixValid()) { + return -1; + } + + if (!aMatrixD.IsMatrixValid()) { + return -1; + } + + int process_id = 0; + int processes = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(aMatrixUV.GetContext().GetCommunicator().GetMPICommunicatior(), &process_id); + MPI_Comm_size(aMatrixUV.GetContext().GetCommunicator().GetMPICommunicatior(), &processes); +#endif + + size_t num_of_global_tiles_in_rows = aMatrixD.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols = aMatrixD.GetNumOfGlobalTilesInCols(); + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + + RunTime_instance->RegisterHandles(aMatrixUV); + RunTime_instance->RegisterHandles(aMatrixD); + RunTime_instance->RegisterHandles(aMatrixRK); + + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + + if (auplo == common::Uplo::HicmaLower && row_idx <= col_idx) { + continue; + } else if (auplo == common::Uplo::HicmaUpper && row_idx >= col_idx) { + continue; + } + if (!aMatrixUV.ContainsTile(row_idx, col_idx) + && !aMatrixRK.ContainsTile(row_idx, col_idx) + && !aMatrixD.ContainsTile(row_idx, col_idx)) { + continue; + } + + RunTime_instance->Uncompress(aMatrixUV, aMatrixD, aMatrixRK, row_idx, col_idx); + } + } + + RunTime_instance->Flush(aMatrixUV); + RunTime_instance->Flush(aMatrixD); + RunTime_instance->Flush(aMatrixRK); + RunTime_instance->Sync(); + RunTime_instance->UnRegisterHandles(aMatrixUV); + RunTime_instance->UnRegisterHandles(aMatrixD); + RunTime_instance->UnRegisterHandles(aMatrixRK); + + return 0; + } + + template + size_t MatrixOperations::Gemm(Matrix &aMatrixA, const blas::Op &aAOp, Matrix &aMatrixB, + const blas::Op &aBOp, Matrix &aMatrixC, T &aAlpha, T &aBeta, + runtime::HicmaContext &aContext, const CompressionParameters &aSVDArguments, + const std::vector> &aRanks, bool aAllocatePool) { + size_t num_of_global_tiles_in_rows_c = aMatrixC.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols_c = aMatrixC.GetNumOfGlobalTilesInCols(); + size_t num_of_global_tiles_in_rows_a = aMatrixA.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols_a = aMatrixA.GetNumOfGlobalTilesInCols(); + hcorepp::dataunits::MemoryHandler &memoryHandler = hcorepp::dataunits::MemoryHandler::GetInstance(); + + std::vector sizes; + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + +#ifdef USE_OMP +#ifdef BLAS_HAVE_MKL + size_t thread_number = mkl_get_max_threads(); + mkl_set_num_threads(std::ceil(thread_number / (num_of_global_tiles_in_rows_c * num_of_global_tiles_in_cols_c))); +#endif +#ifdef BLAS_HAVE_CUBLAS + omp_set_num_threads(num_of_global_tiles_in_rows_c * num_of_global_tiles_in_cols_c); +#endif +#endif + size_t flops = 0; + + if (!aMatrixA.IsMatrixValid()) { + throw std::runtime_error("Matrix A invalid"); + } + if (!aMatrixB.IsMatrixValid()) { + throw std::runtime_error("Matrix B invalid"); + } + if (!aMatrixC.IsMatrixValid()) { + throw std::runtime_error("Matrix C invalid"); + } + + if (aMatrixC.GetGlobalNumOfRowsInMatrix() == 0 || aMatrixC.GetGlobalNumOfColsInMatrix() == 0 || + ((aAlpha == 0.0 || aMatrixA.GetGlobalNumOfColsInMatrix() == 0) && aBeta == 1.0)) { + return flops; + } + +#ifdef USE_OMP +#pragma omp parallel default(none) shared(num_of_threads) + { + num_of_threads = omp_get_num_threads(); + } + + hcorepp::helpers::DebuggingTimer::SetTimersCount(num_of_threads); +#ifdef HICMAPP_USE_TIMER + for (size_t i = 0; i < num_of_threads; i++) { + hcorepp::helpers::DebuggingTimer *timer = hcorepp::helpers::DebuggingTimer::GetDebuggingTimer(i); + if (timer != nullptr) { + timer->ResetAllSnapshots(); + } + } +#endif +#else + hcorepp::helpers::DebuggingTimer *timer = hcorepp::helpers::DebuggingTimer::GetDebuggingTimer(); +#ifdef HICMAPP_USE_TIMER + if(timer != nullptr) { + timer->ResetAllSnapshots(); + } +#endif + if (aAllocatePool) { + sizes = CalculateGemmPoolSize(aMatrixA, aAOp, aMatrixB, aBOp, aMatrixC, + aAlpha, + aBeta, aContext, aSVDArguments, aRanks); + for (size_t i = 0; i < sizes.size(); i++) { + if (sizes[i] > 0) { + timer->StartSnapshot("HicmaPP::MatrixOperations::AllocatingPool"); + memoryHandler.Initialize(sizes[i], i); + timer->Snapshot("HicmaPP::MatrixOperations::AllocatingPool"); + } + } + } + +#endif + + /** + * A = m * k + * B = k * n + * C = m * n + */ + size_t idx = 0; + size_t streams = aContext.GetNumOfContexts(); + RunTime_instance->RegisterHandles(aMatrixA); + RunTime_instance->RegisterHandles(aMatrixB); + RunTime_instance->RegisterHandles(aMatrixC); + if (aMatrixC.GetStorageLayout() == common::StorageLayout::HicmaCM) { +#ifdef USE_OMP +#pragma omp parallel for collapse(2) default(none) shared(aMatrixA, aMatrixB, aMatrixC, aRanks, \ + num_of_global_tiles_in_cols_c, num_of_global_tiles_in_rows_c, num_of_global_tiles_in_rows_a, num_of_global_tiles_in_cols_a, aAlpha, aAOp, aBOp, aBeta, aSVDArguments, aContext, RunTime_instance) private(idx) reduction(+:flops) +#endif + for (size_t col_idx_c = 0; col_idx_c < num_of_global_tiles_in_cols_c; col_idx_c++) { + for (size_t row_idx_c = 0; row_idx_c < num_of_global_tiles_in_rows_c; row_idx_c++) { + CompressionParameters parameters = aSVDArguments; + if (!aRanks.empty()) { + parameters = CompressionParameters(aSVDArguments.GetAccuracy(), false, true, + false, aRanks[row_idx_c][col_idx_c]); + } + + if (aAOp == blas::Op::NoTrans) { + for (size_t col_idx_a = 0; col_idx_a < num_of_global_tiles_in_cols_a; col_idx_a++) { + if (!aMatrixA.ContainsTile(row_idx_c, col_idx_a) && + !aMatrixB.ContainsTile(col_idx_a, col_idx_c) && + !aMatrixC.ContainsTile(row_idx_c, col_idx_c)) { + continue; + } +#ifdef USE_OMP + hcorepp::kernels::RunContext context = aContext.GetActiveContext().ForkChildContext(); + hcorepp::dataunits::MemoryHandler memoryHandler(context); +#else + if (memoryHandler.IsInitialized(idx % streams)) { + memoryHandler.Reset(idx % streams); + } + const hcorepp::kernels::RunContext &context = aContext.GetContext(idx % streams); +#endif + + flops += RunTime_instance->Gemm(aAlpha, aMatrixA, row_idx_c, col_idx_a, aAOp, + aMatrixB, + col_idx_a, col_idx_c, + aBOp, aBeta, aMatrixC, row_idx_c, col_idx_c, context, + parameters, memoryHandler.GetMemoryUnit(idx % streams), + false); + } + idx++; + } else { + for (size_t row_idx_a = 0; row_idx_a < num_of_global_tiles_in_rows_a; row_idx_a++) { + if (!aMatrixA.ContainsTile(row_idx_c, row_idx_a) && + !aMatrixB.ContainsTile(row_idx_a, col_idx_c) && + !aMatrixC.ContainsTile(row_idx_c, col_idx_c)) { + continue; + } +#ifdef USE_OMP + hcorepp::kernels::RunContext context = aContext.GetActiveContext().ForkChildContext(); + hcorepp::dataunits::MemoryHandler memoryHandler(context); + +#else + if (memoryHandler.IsInitialized(idx % streams)) { + memoryHandler.Reset(idx % streams); + } + const hcorepp::kernels::RunContext &context = aContext.GetContext(idx % streams); +#endif + flops += RunTime_instance->Gemm(aAlpha, aMatrixA, row_idx_c, row_idx_a, aAOp, + aMatrixB, + row_idx_a, col_idx_c, + aBOp, aBeta, aMatrixC, row_idx_c, col_idx_c, context, + parameters, memoryHandler.GetMemoryUnit(idx % streams), + false); + } + idx++; + } + RunTime_instance->Flush(aMatrixC, row_idx_c, col_idx_c); + } + } + } else if (aMatrixC.GetStorageLayout() == common::StorageLayout::HicmaRM) { +#ifdef USE_OMP +#pragma omp parallel for collapse(2) default(none) shared(aMatrixA, aMatrixB, aMatrixC, aRanks, \ + num_of_global_tiles_in_cols_c, num_of_global_tiles_in_rows_c, num_of_global_tiles_in_rows_a, num_of_global_tiles_in_cols_a, aAlpha, aAOp, aBOp, aBeta, aSVDArguments, aContext, RunTime_instance, handlers) private(idx, streams) reduction(+:flops) +#endif + for (size_t row_idx_c = 0; row_idx_c < num_of_global_tiles_in_rows_c; row_idx_c++) { + for (size_t col_idx_c = 0; col_idx_c < num_of_global_tiles_in_cols_c; col_idx_c++) { + CompressionParameters parameters = aSVDArguments; + if (!aRanks.empty()) { + parameters = CompressionParameters(aSVDArguments.GetAccuracy(), false, true, + false, aRanks[row_idx_c][col_idx_c]); + } + if (aAOp == blas::Op::NoTrans) { + for (size_t col_idx_a = 0; col_idx_a < num_of_global_tiles_in_cols_a; col_idx_a++) { + if (!aMatrixA.ContainsTile(row_idx_c, col_idx_a) && + !aMatrixB.ContainsTile(col_idx_a, col_idx_c) && + !aMatrixC.ContainsTile(row_idx_c, col_idx_c)) { + continue; + } +#ifdef USE_OMP + hcorepp::kernels::RunContext context = aContext.GetActiveContext().ForkChildContext(); + hcorepp::dataunits::MemoryHandler memoryHandler(context); +#else + if (memoryHandler.IsInitialized(idx % streams)) { + memoryHandler.Reset(idx % streams); + } + + const hcorepp::kernels::RunContext &context = aContext.GetContext(idx % streams); +#endif + flops += RunTime_instance->Gemm(aAlpha, aMatrixA, row_idx_c, col_idx_a, aAOp, + aMatrixB, + col_idx_a, col_idx_c, + aBOp, aBeta, aMatrixC, row_idx_c, col_idx_c, context, + parameters, memoryHandler.GetMemoryUnit(idx % streams)); + } + idx++; + } else { + for (size_t row_idx_a = 0; row_idx_a < num_of_global_tiles_in_rows_a; row_idx_a++) { + if (!aMatrixA.ContainsTile(row_idx_c, row_idx_a) && + !aMatrixB.ContainsTile(row_idx_a, col_idx_c) && + !aMatrixC.ContainsTile(row_idx_c, col_idx_c)) { + continue; + } +#ifdef USE_OMP + hcorepp::kernels::RunContext context = aContext.GetActiveContext().ForkChildContext(); + hcorepp::dataunits::MemoryHandler memoryHandler(context); +#else + if (memoryHandler.IsInitialized(idx % streams)) { + memoryHandler.Reset(idx % streams); + } + const hcorepp::kernels::RunContext &context = aContext.GetContext(idx % streams); +#endif + flops += RunTime_instance->Gemm(aAlpha, aMatrixA, row_idx_c, row_idx_a, aAOp, + aMatrixB, + row_idx_a, col_idx_c, + aBOp, aBeta, aMatrixC, row_idx_c, col_idx_c, context, + parameters, memoryHandler.GetMemoryUnit(idx % streams), + false); + } + idx++; + } + RunTime_instance->Flush(aMatrixC, row_idx_c, col_idx_c); + } + } + } + + RunTime_instance->Flush(aMatrixA); + RunTime_instance->Flush(aMatrixB); + RunTime_instance->Sync(); + RunTime_instance->UnRegisterHandles(aMatrixA); + RunTime_instance->UnRegisterHandles(aMatrixB); + RunTime_instance->UnRegisterHandles(aMatrixC); + +#ifdef USE_OMP + aContext.GetActiveContext().Sync(); +#ifdef HICMAPP_USE_TIMER + std::vector snapshot_names = hcorepp::helpers::DebuggingTimer::GetDebuggingTimer()->GetSnapshotsNames(); + std::stringstream ss; + std::string fixed_or_variable = aRanks.empty()? "_0" : "_fixed"; + std::string dense_or_compressed; + if(aMatrixA.GetSubMatrix(0).GetTiles()[0]->GetNumberOfMatrices() > 1) { + dense_or_compressed = "comp_gemm"; + ss << aSVDArguments.GetAccuracy(); + } + else { + dense_or_compressed = "dense_gemm"; + ss << "0"; + } + std::ofstream time_file(dense_or_compressed + "_" + std::to_string(aMatrixC.GetGlobalNumOfColsInMatrix()) + "_" + ss.str() + fixed_or_variable + + + ".time"); + for (auto name: snapshot_names) { + time_file << name << ","; + for (size_t i = 0; i < num_of_threads; i++) { + hcorepp::helpers::DebuggingTimer *timer = hcorepp::helpers::DebuggingTimer::GetDebuggingTimer(i); + timer->PrintSnapshot(name, time_file); + } + time_file << "\n"; + } +#endif +#ifdef BLAS_HAVE_MKL + mkl_set_num_threads(thread_number); +#endif +#else + for (size_t i = 0; i < streams; i++) { + aContext.SyncContext(i); + timer->StartSnapshot("Hicmapp::MatrixOperations::DestroyingPool"); + memoryHandler.FreeMemoryUnit(i); + timer->Snapshot("Hicmapp::MatrixOperations::DestroyingPool"); + } +#ifdef HICMAPP_USE_TIMER + std::stringstream ss; + std::string fixed_or_variable = aRanks.empty()? "_0" : "_fixed"; + std::string dense_or_compressed; + if(aMatrixA.GetSubMatrix(0).GetTiles()[0]->isCompressed()) { + dense_or_compressed = "comp_gemm"; + ss << aSVDArguments.GetAccuracy(); + } + else { + dense_or_compressed = "dense_gemm"; + ss << "0"; + } + std::ofstream time_file(dense_or_compressed + "_" + std::to_string(aMatrixC.GetGlobalNumOfColsInMatrix()) + "_" + ss.str() + fixed_or_variable + + + ".time"); + + timer->PrintAllSnapshots(time_file); +#endif +#endif + return flops; + } + + template + size_t + MatrixOperations::Cholesky(common::Uplo aUpperLower, Matrix &aMatrixAUV, Matrix &aMatrixADiagonal, + Matrix &aMatrixARK, int aRank, int aMaxRank, double aAccuracy, + runtime::HicmaContext &aContext) { + size_t flops = 0; + + if (!aMatrixAUV.IsMatrixValid()) { + throw std::runtime_error("Matrix UV invalid"); + } + if (!aMatrixARK.IsMatrixValid()) { + throw std::runtime_error("Matrix RK invalid"); + } + if (!aMatrixADiagonal.IsMatrixValid()) { + throw std::runtime_error("Matrix Diagonal invalid"); + } + if (aMatrixADiagonal.GetNumOfRowsInTile() != aMatrixADiagonal.GetNumOfColsInTile()) { + throw std::runtime_error("Matrix Diagonal invalid, only squared matrices are supported.."); + } + if (aUpperLower != common::Uplo::HicmaLower && aUpperLower != common::Uplo::HicmaUpper) { + throw std::runtime_error("Illegal value for Upper lower used during cholesky.."); + } + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + + hcorepp::dataunits::MemoryHandler &memoryHandler = hcorepp::dataunits::MemoryHandler::GetInstance(); + + RunTime_instance->RegisterHandles(aMatrixAUV); + RunTime_instance->RegisterHandles(aMatrixADiagonal); + RunTime_instance->RegisterHandles(aMatrixARK); + + /// Only hicma lower is supported.. + for (size_t k = 0; k < aMatrixADiagonal.GetNumOfGlobalTilesInRows(); k++) { + T alpha = 1; + auto row_idx = k; + auto col_idx = 0; + + if (aMatrixADiagonal.ContainsTile(row_idx, col_idx)) { + RunTime_instance->Potrf(aMatrixADiagonal, row_idx, col_idx, blas::Uplo::Lower, + aContext.GetContext(), memoryHandler.GetMemoryUnit()); + } + + for (size_t m = k + 1; m < aMatrixADiagonal.GetNumOfGlobalTilesInRows(); m++) { + if (aMatrixADiagonal.ContainsTile(row_idx, col_idx) || + aMatrixAUV.ContainsTile(m, k)) { + RunTime_instance->Trsm(blas::Side::Left, blas::Uplo::Lower, blas::Op::NoTrans, + blas::Diag::NonUnit, alpha, aMatrixADiagonal, row_idx, col_idx, + aMatrixAUV, m, k, aContext.GetContext(), + memoryHandler.GetMemoryUnit()); + } + } + RunTime_instance->Flush(aMatrixAUV, k, k); + + + for (size_t n = k + 1; n < aMatrixADiagonal.GetNumOfGlobalTilesInRows(); n++) { + alpha = -1; + T beta = 1; + + if (aMatrixAUV.ContainsTile(n, k) || aMatrixADiagonal.ContainsTile(n, 0)) { + RunTime_instance->Syrk(aMatrixAUV, n, k, blas::Op::NoTrans, aMatrixADiagonal, n, 0, + blas::Uplo::Lower, alpha, beta, aContext.GetContext(), + memoryHandler.GetMemoryUnit()); + } + + for (size_t m = n + 1; m < aMatrixADiagonal.GetNumOfGlobalTilesInRows(); m++) { + if (aMatrixAUV.ContainsTile(m, k) || aMatrixAUV.ContainsTile(n, k) + || aMatrixAUV.ContainsTile(m, n)) { + auto parameters = CompressionParameters(aAccuracy); + RunTime_instance->Gemm(alpha, aMatrixAUV, m, k, blas::Op::NoTrans, aMatrixAUV, + n, k, blas::Op::Trans, beta, aMatrixAUV, m, n, + aContext.GetContext(), + parameters, memoryHandler.GetMemoryUnit(), true); + } + } + RunTime_instance->Flush(aMatrixAUV, n, k); + } + } + + RunTime_instance->Flush(aMatrixAUV); + RunTime_instance->Flush(aMatrixADiagonal); + RunTime_instance->Flush(aMatrixARK); + RunTime_instance->Sync(); + RunTime_instance->UnRegisterHandles(aMatrixAUV); + RunTime_instance->UnRegisterHandles(aMatrixADiagonal); + RunTime_instance->UnRegisterHandles(aMatrixARK); + + return flops; + } + + template + size_t + MatrixOperations::GenerateDiagonalTiles(common::Uplo auplo, Matrix &aMatrixUV, Matrix &aMatrixRK, + Matrix &aMatrixDiag, unsigned long long int seed, int maxrank, + double tol, int compress_diag, Matrix &aMatrixDense, + runtime::HicmaContext &aContext) { + size_t flops = 0; + + if (!aMatrixUV.IsMatrixValid()) { + throw std::runtime_error("Matrix UV invalid"); + } + if (!aMatrixRK.IsMatrixValid()) { + throw std::runtime_error("Matrix RK invalid"); + } + if (!aMatrixDiag.IsMatrixValid()) { + throw std::runtime_error("Matrix Diagonal invalid"); + } + if (!aMatrixDense.IsMatrixValid()) { + throw std::runtime_error("Matrix Dense invalid"); + } + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + + hcorepp::dataunits::MemoryHandler &memoryHandler = hcorepp::dataunits::MemoryHandler::GetInstance(); + + RunTime_instance->RegisterHandles(aMatrixUV); + RunTime_instance->RegisterHandles(aMatrixDiag); + RunTime_instance->RegisterHandles(aMatrixRK); + RunTime_instance->RegisterHandles(aMatrixDense); + + int num_of_global_tiles_in_rows = aMatrixUV.GetNumOfGlobalTilesInRows(); + int num_of_global_tiles_in_cols = aMatrixUV.GetNumOfGlobalTilesInCols(); + for (int m = 0; m < num_of_global_tiles_in_rows; m++) { + + for (int n = 0; n < num_of_global_tiles_in_cols; n++) { + + if (!aMatrixUV.ContainsTile(m, n) && + !aMatrixDiag.ContainsTile(m, 0) && + !aMatrixRK.ContainsTile(m, n) && + !aMatrixDense.ContainsTile(m, n)) { + continue; + } + + if (auplo == common::Uplo::HicmaLower && m < n) { + continue; + } else if (auplo == common::Uplo::HicmaUpper && m > n) { + continue; + } + + int call_diag = 0; + int AD_icol; + if (m == n) { + call_diag = 1; + AD_icol = 0; + } else { + call_diag = 0; + AD_icol = n; + } + + if (call_diag) { + RunTime_instance->GenerateDiagonalTile(aMatrixUV, aMatrixDiag, m, AD_icol, aMatrixRK, m, n, + seed, + maxrank, tol, compress_diag, aMatrixDense, + aContext.GetContext(), call_diag); + } else { + RunTime_instance->GenerateDiagonalTile(aMatrixUV, aMatrixDiag, m, AD_icol, aMatrixRK, m, n, + seed, + maxrank, tol, compress_diag, aMatrixDense, + aContext.GetContext(), call_diag); + } + } + } + + + RunTime_instance->Flush(aMatrixUV); + RunTime_instance->Flush(aMatrixDiag); + RunTime_instance->Flush(aMatrixRK); + RunTime_instance->Flush(aMatrixDense); + + RunTime_instance->Sync(); + + //unregister data handles + RunTime_instance->UnRegisterHandles(aMatrixUV); + RunTime_instance->UnRegisterHandles(aMatrixDiag); + RunTime_instance->UnRegisterHandles(aMatrixRK); + RunTime_instance->UnRegisterHandles(aMatrixDense); + + } + + template + size_t + MatrixOperations::DiagVecToMat(Matrix &aMatrixDiag, Matrix &aMatrixDense, + runtime::HicmaContext &aContext) { + + auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + + hcorepp::dataunits::MemoryHandler &memoryHandler = hcorepp::dataunits::MemoryHandler::GetInstance(); + + RunTime_instance->RegisterHandles(aMatrixDense); + RunTime_instance->RegisterHandles(aMatrixDiag); + + int num_of_global_tiles_in_rows = aMatrixDiag.GetNumOfGlobalTilesInRows(); + + for (int m = 0; m < num_of_global_tiles_in_rows; m++) { + if (aMatrixDiag.ContainsTile(m, 0) + || aMatrixDense.ContainsTile(m, m)) { + RunTime_instance->LaCpy(aMatrixDiag, m, 0, aMatrixDense, m, m, aContext.GetContext()); + } + } + RunTime_instance->Flush(aMatrixDense); + RunTime_instance->Flush(aMatrixDiag); + RunTime_instance->Sync(); + RunTime_instance->UnRegisterHandles(aMatrixDense); + RunTime_instance->UnRegisterHandles(aMatrixDiag); + } + + + template + std::vector + MatrixOperations::CalculateGemmPoolSize(Matrix &aMatrixA, const blas::Op &aAOp, Matrix &aMatrixB, + const blas::Op &aBOp, Matrix &aMatrixC, T &aAlpha, T &aBeta, + runtime::HicmaContext &aContext, + const CompressionParameters &aSVDArguments, + const std::vector> &aRanks) { + size_t num_of_global_tiles_in_rows_c = aMatrixC.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols_c = aMatrixC.GetNumOfGlobalTilesInCols(); + size_t num_of_global_tiles_in_rows_a = aMatrixA.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols_a = aMatrixA.GetNumOfGlobalTilesInCols(); + + if (!aMatrixA.IsMatrixValid()) { + throw std::runtime_error("Matrix A invalid"); + } + if (!aMatrixB.IsMatrixValid()) { + throw std::runtime_error("Matrix B invalid"); + } + if (!aMatrixC.IsMatrixValid()) { + throw std::runtime_error("Matrix C invalid"); + } + + if (aMatrixC.GetGlobalNumOfRowsInMatrix() == 0 || aMatrixC.GetGlobalNumOfColsInMatrix() == 0 || + ((aAlpha == 0.0 || aMatrixA.GetGlobalNumOfColsInMatrix() == 0) && aBeta == 1.0)) { + return {0}; + } + + size_t streams = aContext.GetNumOfContexts(); + std::vector pool_sizes(streams); + size_t idx = 0; + + CompressionParameters parameters = aSVDArguments; + if (aMatrixC.GetStorageLayout() == common::StorageLayout::HicmaCM) { + for (size_t col_idx_c = 0; col_idx_c < num_of_global_tiles_in_cols_c; col_idx_c++) { + for (size_t row_idx_c = 0; row_idx_c < num_of_global_tiles_in_rows_c; row_idx_c++) { + if (!aRanks.empty()) { + parameters = CompressionParameters(aSVDArguments.GetAccuracy(), false, true, + false, aRanks[row_idx_c][col_idx_c]); + } + if (!aMatrixC.ContainsTile(row_idx_c, col_idx_c)) { + continue; + } + Tile *tile_c = aMatrixC.GetTilePointer(row_idx_c, col_idx_c); + if (aAOp == blas::Op::NoTrans) { + for (size_t col_idx_a = 0; col_idx_a < num_of_global_tiles_in_cols_a; col_idx_a++) { + if (!aMatrixA.ContainsTile(row_idx_c, col_idx_a) || + !aMatrixB.ContainsTile(col_idx_a, col_idx_c)) { + continue; + } + Tile *tile_a = aMatrixA.GetTilePointer(row_idx_c, col_idx_a); + Tile *tile_b = aMatrixB.GetTilePointer(col_idx_a, col_idx_c); + // HCORE GEMM CALL... + pool_sizes[idx % streams] = std::max(pool_sizes[idx % streams], + hcorepp::api::HCore::CalculateMemoryPoolSize( + *tile_a, *tile_b, + *tile_c, parameters, + aContext.GetContext(idx % streams))); + } + idx++; + } else { + for (size_t row_idx_a = 0; row_idx_a < num_of_global_tiles_in_rows_a; row_idx_a++) { + Tile *tile_a = aMatrixA.GetTilePointer(row_idx_c, row_idx_a); + Tile *tile_b = aMatrixB.GetTilePointer(row_idx_a, col_idx_c); + // HCORE GEMM CALL... + pool_sizes[idx % streams] = std::max(pool_sizes[idx % streams], + hcorepp::api::HCore::CalculateMemoryPoolSize( + *tile_a, *tile_b, + *tile_c, parameters, + aContext.GetContext(idx % streams))); + } + idx++; + } + } + } + } else if (aMatrixC.GetStorageLayout() == common::StorageLayout::HicmaRM) { + for (size_t row_idx_c = 0; row_idx_c < num_of_global_tiles_in_rows_c; row_idx_c++) { + for (size_t col_idx_c = 0; col_idx_c < num_of_global_tiles_in_cols_c; col_idx_c++) { + if (!aRanks.empty()) { + parameters = CompressionParameters(aSVDArguments.GetAccuracy(), false, true, + false, aRanks[row_idx_c][col_idx_c]); + } + Tile *tile_c = aMatrixC.GetTilePointer(row_idx_c, col_idx_c); + if (aAOp == blas::Op::NoTrans) { + for (size_t col_idx_a = 0; col_idx_a < num_of_global_tiles_in_cols_a; col_idx_a++) { + Tile *tile_a = aMatrixA.GetTilePointer(row_idx_c, col_idx_a); + Tile *tile_b = aMatrixB.GetTilePointer(col_idx_a, col_idx_c); + // HCORE GEMM CALL... + pool_sizes[idx % streams] = std::max(pool_sizes[idx % streams], + hcorepp::api::HCore::CalculateMemoryPoolSize( + *tile_a, *tile_b, + *tile_c, parameters, + aContext.GetContext(idx % streams))); + } + idx++; + } else { + for (size_t row_idx_a = 0; row_idx_a < num_of_global_tiles_in_rows_a; row_idx_a++) { + Tile *tile_a = aMatrixA.GetTilePointer(row_idx_c, row_idx_a); + Tile *tile_b = aMatrixB.GetTilePointer(row_idx_a, col_idx_c); + // HCORE GEMM CALL... + pool_sizes[idx % streams] = std::max(pool_sizes[idx % streams], + hcorepp::api::HCore::CalculateMemoryPoolSize( + *tile_a, *tile_b, + *tile_c, parameters, + aContext.GetContext(idx % streams))); + } + idx++; + } + } + } + } + + return pool_sizes; + + } + + + HICMAPP_INSTANTIATE_CLASS(MatrixOperations) + +} \ No newline at end of file diff --git a/src/primitives/CMakeLists.txt b/src/primitives/CMakeLists.txt new file mode 100644 index 0000000..5fcdb0a --- /dev/null +++ b/src/primitives/CMakeLists.txt @@ -0,0 +1,9 @@ +add_subdirectory(decomposer) + +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ProblemManager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/matrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/submatrix.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/primitives/ProblemManager.cpp b/src/primitives/ProblemManager.cpp new file mode 100644 index 0000000..651e9eb --- /dev/null +++ b/src/primitives/ProblemManager.cpp @@ -0,0 +1,347 @@ +extern "C" { +#include "starsh-spatial.h" +} + +#include +#include +#include + +namespace hicmapp { + namespace primitives { + + + ProblemManager::ProblemManager( + hicmapp::common::ProblemType aProblemType) { + + this->mProblemType = aProblemType; + + this->mCharProperties[HICMA_PROB_PROPERTY_SYM] = '\0'; + + switch (aProblemType) { + case common::ProblemType::PROBLEM_TYPE_RND: + + InitRNDProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_SS: + + InitSSProblemMetadata(); + + break; + + case common::ProblemType::PROBLEM_TYPE_GEOSTAT: + + InitGeostatProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_EDSIN: + + InitEDSINProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_GEOSTAT_POINT: + + InitGeostatPointProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_ST_3D_EXP: + + InitST3DExpProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_ST_3D_SQEXP: + + InitST3DSQExpProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_3D_RBF_VIRUS: + + InitRBFVirus3DProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_3D_RBF_CUBE: + + InitRBFCube3DProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_AC_3D: + + InitAC3DProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_ST_2D_EXP: + + InitST2DExpProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE: + + InitGeostatParsimoniousBivariateProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE_POINT: + + InitGeostatParsimoniousBivariatePointProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE: + + InitGeostatNonGaussianProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE_POINT: + + InitGeostatNonGaussianPointProblemMetadata(); + + break; + case common::ProblemType::PROBLEM_TYPE_RNDUSR: + break; + case common::ProblemType::PROBLEM_TYPE_FILE: + break; + } + } + + + void ProblemManager::InitAC3DProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_POINTS); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MORDERING); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_FILE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_INTERPL_FILE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NTRIAN); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NIPP); + } + + + void ProblemManager::InitEDSINProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_DIAG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_WAVE_K); + mProblemProperties.insert(HICMA_PROB_PROPERTY_DECAY); + } + + + void ProblemManager::InitGeostatNonGaussianProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + } + + + void ProblemManager::InitGeostatNonGaussianPointProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_POINT); + } + + + void + ProblemManager::InitGeostatParsimoniousBivariatePointProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_POINT); + } + + + void ProblemManager::InitGeostatParsimoniousBivariateProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + } + + + void ProblemManager::InitGeostatPointProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_POINT); + } + + + void ProblemManager::InitGeostatProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_THETA); + } + + + void ProblemManager::InitRBFCube3DProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_REG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_ISREG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_RAD); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_POINTS); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MORDERING); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_FILE); + } + + + void ProblemManager::InitRBFVirus3DProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_REG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NUMOBJ); + mProblemProperties.insert(HICMA_PROB_PROPERTY_ISREG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_RAD); + mProblemProperties.insert(HICMA_PROB_PROPERTY_DENST); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_POINTS); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MORDERING); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MESH_FILE); + } + + + void ProblemManager::InitRNDProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_NDIM); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_DECAY); + mProblemProperties.insert(HICMA_PROB_PROPERTY_N); + mProblemProperties.insert(HICMA_PROB_PROPERTY_BLOCK_SIZE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MT); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NT); + mProblemProperties.insert(HICMA_PROB_PROPERTY_SYM); + } + + + void ProblemManager::InitSSProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_N); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NDIM); + mProblemProperties.insert(HICMA_PROB_PROPERTY_BETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NU); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_WAVE_K); + mProblemProperties.insert(HICMA_PROB_PROPERTY_DIAG); + mProblemProperties.insert(HICMA_PROB_PROPERTY_SYM); + mProblemProperties.insert(HICMA_PROB_PROPERTY_BLOCK_SIZE); + mProblemProperties.insert(HICMA_PROB_PROPERTY_DECAY); + mProblemProperties.insert(HICMA_PROB_PROPERTY_MT); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NT); + } + + + void ProblemManager::InitST2DExpProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_BETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NU); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + } + + + void ProblemManager::InitST3DExpProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_BETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NU); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + } + + + void ProblemManager::InitST3DSQExpProblemMetadata() { + mProblemProperties.insert(HICMA_PROB_PROPERTY_BETA); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NU); + mProblemProperties.insert(HICMA_PROB_PROPERTY_NOISE); + } + + + template<> + int + ProblemManager::GetProblemProperty(ProblemProperty aProperty) { + if (this->mIntProblemProperties.find(aProperty) != this->mIntProblemProperties.end()) { + return this->mIntProblemProperties.find(aProperty)->second; + } else { + throw std::invalid_argument( "Problem property was not initialized" ); + } + } + + + template<> + double + ProblemManager::GetProblemProperty(ProblemProperty aProperty) { + if (this->mDoubleProblemProperties.find(aProperty) != this->mDoubleProblemProperties.end()) { + return this->mDoubleProblemProperties.find(aProperty)->second; + } else { + throw std::invalid_argument( "Problem property was not initialized" ); + } + } + + + template<> + double * + ProblemManager::GetProblemProperty(ProblemProperty aProperty) { + if (this->mP2DoubleProblemProperties.find(aProperty) != this->mP2DoubleProblemProperties.end()) { + return this->mP2DoubleProblemProperties.find(aProperty)->second; + } else { + throw std::invalid_argument( "Problem property was not initialized" ); + } + } + + + template<> + char * + ProblemManager::GetProblemProperty(ProblemProperty aProperty) { + if (this->mStringProblemProperties.find(aProperty) != this->mStringProblemProperties.end()) { + return this->mStringProblemProperties.find(aProperty)->second; + } else { + throw std::invalid_argument( "Problem property was not initialized" ); + } + } + + + template<> + char + ProblemManager::GetProblemProperty(ProblemProperty aProperty) { + if (this->mCharProperties.find(aProperty) != this->mCharProperties.end()) { + return this->mCharProperties.find(aProperty)->second; + } else { + throw std::invalid_argument( "Problem property was not initialized" ); + } + } + + + common::ProblemType ProblemManager::GetProblemType() const { + return mProblemType; + } + + + ProblemManager::~ProblemManager() { + mDoubleProblemProperties.clear(); + mIntProblemProperties.clear(); + mCharProperties.clear(); + mStringProblemProperties.clear(); + mP2DoubleProblemProperties.clear(); + } + + + void ProblemManager::SetProblemProperty(ProblemProperty aProperty, int aPropertyValue) { + if(mProblemProperties.find(aProperty) != mProblemProperties.end()) { + this->mIntProblemProperties.insert(std::make_pair(aProperty, aPropertyValue)); + } else { + throw std::invalid_argument( "Not a problem property" ); + } + } + + + void ProblemManager::SetProblemProperty(ProblemProperty aProperty, double aPropertyValue) { + if(mProblemProperties.find(aProperty) != mProblemProperties.end()) { + this->mDoubleProblemProperties.insert(std::make_pair(aProperty, aPropertyValue)); + } else { + throw std::invalid_argument( "Not a problem property" ); + } + } + + + void ProblemManager::SetProblemProperty(ProblemProperty aProperty, double *aPropertyValue) { + if(mProblemProperties.find(aProperty) != mProblemProperties.end()) { + this->mP2DoubleProblemProperties.insert(std::make_pair(aProperty, aPropertyValue)); + } else { + throw std::invalid_argument( "Not a problem property" ); + } + } + + + void ProblemManager::SetProblemProperty(ProblemProperty aProperty, char aPropertyValue) { + + if(mProblemProperties.find(aProperty) != mProblemProperties.end()) { + mCharProperties[aProperty] = aPropertyValue; + } else { + throw std::invalid_argument( "Not a problem property" ); + } + } + + + void ProblemManager::SetProblemProperty(ProblemProperty aProperty, char *aPropertyValue) { + if(mProblemProperties.find(aProperty) != mProblemProperties.end()) { + this->mStringProblemProperties.insert(std::make_pair(aProperty, aPropertyValue)); + } else { + throw std::invalid_argument( "Not a problem property" ); + } + } + + } +} \ No newline at end of file diff --git a/src/primitives/decomposer/CMakeLists.txt b/src/primitives/decomposer/CMakeLists.txt new file mode 100644 index 0000000..6c9ce48 --- /dev/null +++ b/src/primitives/decomposer/CMakeLists.txt @@ -0,0 +1,6 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/slowest_dimension_decomposer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/two_dimension_cyclic_decomposer.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/primitives/decomposer/slowest_dimension_decomposer.cpp b/src/primitives/decomposer/slowest_dimension_decomposer.cpp new file mode 100644 index 0000000..8b24972 --- /dev/null +++ b/src/primitives/decomposer/slowest_dimension_decomposer.cpp @@ -0,0 +1,95 @@ +#include +#include +#include + +namespace hicmapp::primitives { + + SlowestDimDecomposer::SlowestDimDecomposer(size_t aNumOfSubMatrices, + common::StorageLayout aStorageLayout) : mStorageLayout( + aStorageLayout), mNumOfSubMatrices(aNumOfSubMatrices) { + } + + + std::vector + SlowestDimDecomposer::Decompose(size_t aGlobalMatrixTilesInRows, size_t aGlobalMatrixTilesInCols, + bool aDiagonalMatrix) { + +/** This assumes a one-to-one process to sub-matrix mapping. Should be revised to support more general mappings */ + size_t sub_matrix_num_of_tiles_in_cols = aGlobalMatrixTilesInCols; + size_t rem_sub_matrix_num_of_tiles_in_cols = 0; + size_t sub_matrix_num_of_tiles_in_rows = aGlobalMatrixTilesInRows; + size_t rem_sub_matrix_num_of_tiles_in_rows = 0; + auto actual_num_of_submatrices = mNumOfSubMatrices; + + if (mNumOfSubMatrices == 0) { + throw std::runtime_error("SlowestDecomposer::Decompose, Division by zero.\n"); + } + + if (mStorageLayout == common::StorageLayout::HicmaCM) { + if (aDiagonalMatrix) { + sub_matrix_num_of_tiles_in_cols = 1;//aGlobalMatrixTilesInCols / mNumOfSubMatrices; + rem_sub_matrix_num_of_tiles_in_cols = 0;//aGlobalMatrixTilesInCols % mNumOfSubMatrices; + sub_matrix_num_of_tiles_in_rows = aGlobalMatrixTilesInRows / mNumOfSubMatrices; + rem_sub_matrix_num_of_tiles_in_rows = aGlobalMatrixTilesInRows % mNumOfSubMatrices; + + } else { + sub_matrix_num_of_tiles_in_cols = aGlobalMatrixTilesInCols / mNumOfSubMatrices; + rem_sub_matrix_num_of_tiles_in_cols = aGlobalMatrixTilesInCols % mNumOfSubMatrices; + if (sub_matrix_num_of_tiles_in_cols == 0) { + throw std::runtime_error( + "SlowestDecomposer::Decompose, Number of tiles per column < Number of submatrices.\n"); + } + if (sub_matrix_num_of_tiles_in_cols == 0 && rem_sub_matrix_num_of_tiles_in_cols > 0) { + actual_num_of_submatrices = 1; + } else { + actual_num_of_submatrices = mNumOfSubMatrices; + } + } + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + sub_matrix_num_of_tiles_in_rows = aGlobalMatrixTilesInRows / mNumOfSubMatrices; + rem_sub_matrix_num_of_tiles_in_rows = aGlobalMatrixTilesInRows % mNumOfSubMatrices; + if (sub_matrix_num_of_tiles_in_rows == 0) { + throw std::runtime_error( + "SlowestDecomposer::Decompose, Number of tiles per row < Number of submatrices.\n"); + } + if (sub_matrix_num_of_tiles_in_rows == 0 && rem_sub_matrix_num_of_tiles_in_rows > 0) { + actual_num_of_submatrices = 1; + } else { + actual_num_of_submatrices = mNumOfSubMatrices; + } + } + + std::vector specs(actual_num_of_submatrices); + int owner_id = 0; + for (size_t i = 0; i < actual_num_of_submatrices; i++) { + + specs[i].SetNumOfTilesInCol(sub_matrix_num_of_tiles_in_cols); + specs[i].SetNumOfTilesInRow(sub_matrix_num_of_tiles_in_rows); + if (i == (actual_num_of_submatrices - 1)) { + specs[i].SetNumOfTilesInCol(rem_sub_matrix_num_of_tiles_in_cols + sub_matrix_num_of_tiles_in_cols); + specs[i].SetNumOfTilesInRow(rem_sub_matrix_num_of_tiles_in_rows + sub_matrix_num_of_tiles_in_rows); + } + + //starting index of the sub-matrix tiles in rows and columns. + if (mStorageLayout == common::StorageLayout::HicmaCM) { + specs[i].SetStartingIndexInCols(i * sub_matrix_num_of_tiles_in_cols); + specs[i].SetStartingIndexInRows(0); + if(aDiagonalMatrix) { + specs[i].SetStartingIndexInCols(0); + specs[i].SetStartingIndexInRows(i * sub_matrix_num_of_tiles_in_rows); + } + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + specs[i].SetStartingIndexInCols(0); + specs[i].SetStartingIndexInRows(i * sub_matrix_num_of_tiles_in_rows); + } + + + specs[i].SetOwnerId(owner_id); + owner_id++; + } + + return specs; + } + + +} diff --git a/src/primitives/decomposer/two_dimension_cyclic_decomposer.cpp b/src/primitives/decomposer/two_dimension_cyclic_decomposer.cpp new file mode 100644 index 0000000..fe987ef --- /dev/null +++ b/src/primitives/decomposer/two_dimension_cyclic_decomposer.cpp @@ -0,0 +1,64 @@ +#include +#include +#include + +namespace hicmapp::primitives { + + TwoDimCyclicDecomposer::TwoDimCyclicDecomposer(size_t aNumOfProcessesInRows, size_t aNumOfProcessesInCols) + : mNumOfProcessesInRows(aNumOfProcessesInRows), + mNumOfProcessesInCols(aNumOfProcessesInCols) { + } + + std::vector + TwoDimCyclicDecomposer::Decompose(size_t aGlobalMatrixTilesInRows, size_t aGlobalMatrixTilesInCols, + bool aDiagonalMatrix) { + + std::vector matrix_specifications; + + int process_id = 0; + int num_of_processes = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &process_id); + MPI_Comm_size(MPI_COMM_WORLD, &num_of_processes); +#endif + + if (num_of_processes > mNumOfProcessesInCols * mNumOfProcessesInRows) { + return {}; + } + + if (num_of_processes == 1) { + mNumOfProcessesInCols = 1; + mNumOfProcessesInRows = 1; + return {{aGlobalMatrixTilesInRows, aGlobalMatrixTilesInCols, 0, 0, mNumOfProcessesInRows, + mNumOfProcessesInCols, process_id}}; + } + + if (num_of_processes < mNumOfProcessesInCols * mNumOfProcessesInRows) { + throw std::runtime_error( + " Num of processes passed is less than requested grid size \n "); + } + + auto num_of_tiles_in_rows = 1; + auto num_of_tiles_in_cols = 1; + size_t owner_id = 0; + size_t initial_owner_row; + + for (size_t tile_row_idx = 0; tile_row_idx < aGlobalMatrixTilesInRows; tile_row_idx++) { + if (tile_row_idx % mNumOfProcessesInRows == 0) { + initial_owner_row = 0; + } else { + initial_owner_row = mNumOfProcessesInCols; + } + for (size_t tile_col_idx = 0; tile_col_idx < aGlobalMatrixTilesInCols; tile_col_idx++) { + if (tile_col_idx % mNumOfProcessesInCols == 0) { + owner_id = initial_owner_row % num_of_processes; + } + matrix_specifications.emplace_back(num_of_tiles_in_rows, num_of_tiles_in_cols, + tile_row_idx, tile_col_idx, mNumOfProcessesInRows, + mNumOfProcessesInCols, owner_id); + owner_id = (owner_id + 1) % num_of_processes; + } + } + return matrix_specifications; + } +} diff --git a/src/primitives/matrix.cpp b/src/primitives/matrix.cpp new file mode 100644 index 0000000..8735950 --- /dev/null +++ b/src/primitives/matrix.cpp @@ -0,0 +1,613 @@ + +#include +#include +#include +#include "hcorepp/operators/interface/TilePacker.hpp" +#include "hicmapp/runtime/interface/RunTimeSingleton.hpp" + +namespace hicmapp::primitives { + + size_t GenerateMatrixId() { + static std::atomic_size_t matrix_id = 0; + const size_t id = matrix_id; + matrix_id++; + return id; + } + + template + Matrix::Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + runtime::HicmaContext &aContext, size_t aRank, bool aDiagonalMatrix) : mStorageLayout{ + aStorageLayout}, mContext{aContext}, mDiagonalMatrix(aDiagonalMatrix) { + + auto world_size = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_size(aContext.GetCommunicator().GetMPICommunicatior(), &world_size); +#endif + SlowestDimDecomposer decomposer(world_size, aStorageLayout); + mDecomposerType = SLOWESTDIM; + + Initialize(apMatrixData, aTotalGlobalNumOfRows, aTotalGlobalNumOfCols, aTileNumOfRows, aTileNumOfCols, + aStorageLayout, decomposer, aContext, aRank); + } + + template + Matrix::Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, runtime::HicmaContext &aContext, size_t aRank, + bool aDiagonalMatrix): mStorageLayout{aStorageLayout}, mContext{aContext}, + mDiagonalMatrix(aDiagonalMatrix) { + + mDecomposerType = aMatrixDecomposer.GetType(); + Initialize(apMatrixData, aTotalGlobalNumOfRows, aTotalGlobalNumOfCols, aTileNumOfRows, aTileNumOfCols, + aStorageLayout, aMatrixDecomposer, aContext, aRank); + } + + template + Matrix::Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, runtime::HicmaContext &aContext, + const CompressionParameters &aParams) : mStorageLayout{ + aStorageLayout}, mContext{aContext} { + + mDecomposerType = aMatrixDecomposer.GetType(); + mDiagonalMatrix = false; + Initialize(apMatrixData, aTotalGlobalNumOfRows, aTotalGlobalNumOfCols, aTileNumOfRows, aTileNumOfCols, + aStorageLayout, aMatrixDecomposer, aContext, aParams); + } + + template + Matrix::Matrix(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + runtime::HicmaContext &aContext, const CompressionParameters &aParams) : mStorageLayout{ + aStorageLayout}, mContext{aContext} { + mDiagonalMatrix = false; + auto world_size = 1; +#ifdef HICMAPP_USE_MPI + MPI_Comm_size(aContext.GetCommunicator().GetMPICommunicatior(), &world_size); +#endif + SlowestDimDecomposer decomposer(world_size, aStorageLayout); + mDecomposerType = SLOWESTDIM; + Initialize(apMatrixData, aTotalGlobalNumOfRows, aTotalGlobalNumOfCols, aTileNumOfRows, aTileNumOfCols, + aStorageLayout, decomposer, aContext, aParams); + } + + template + Matrix::~Matrix() { + for (auto sub_matrix: mSubMatrices) { + delete sub_matrix; + } + mSubMatrices.clear(); + } + + template + size_t Matrix::GetMatrixId() const { + return mMatrixId; + } + + template + size_t Matrix::GetNumOfSubMatrices() const { + return mSubMatrices.size(); + } + + template + size_t Matrix::GetTotalNumOfSubMatrices() const { + return mSpecs.size(); + } + + template + std::vector *> & + Matrix::GetSubMatrices() { + return mSubMatrices; + } + + template + SubMatrix &Matrix::GetSubMatrix(size_t aSubMatrixIndex) const { + if (aSubMatrixIndex >= mSubMatrices.size()) { + throw std::out_of_range("SubMatrix Index greater than number of Submatrices "); + } + return *mSubMatrices[aSubMatrixIndex]; + } + + template + void Matrix::Initialize(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, hicmapp::runtime::HicmaContext &aContext, + size_t aRank) { + + this->mGlobalNumOfRowsInMatrix = aTotalGlobalNumOfRows; + this->mGlobalNumOfColsInMatrix = aTotalGlobalNumOfCols; + this->mGlobalNumOfRowsInTile = aTileNumOfRows; + this->mGlobalNumOfColsInTile = aTileNumOfCols; + this->mStorageLayout = aStorageLayout; + this->mFixedRank = aRank; +// auto RunTime_instance = hicmapp::runtime::RunTimeSingleton::GetRunTimeInstance(); + if (aRank > 0) { + this->mTileType = COMPRESSED; + } else { + this->mTileType = DENSE; + } + + if (aRank < 0 || !IsMatrixValid()) { + throw std::invalid_argument("Matrix::Initialize Invalid Matrix Initialization"); + } + + auto process_id = 0; + auto number_of_processes = 1; + +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(mContext.GetCommunicator().GetMPICommunicatior(), &process_id); + if (process_id == 0) { + this->mMatrixId = GenerateMatrixId(); + } + + MPI_Bcast(&this->mMatrixId, 1, MPI_UNSIGNED_LONG, 0, mContext.GetCommunicator().GetMPICommunicatior()); + MPI_Comm_size(mContext.GetCommunicator().GetMPICommunicatior(), &number_of_processes); +#else + this->mMatrixId = GenerateMatrixId(); +#endif + + if (mGlobalNumOfColsInTile == 0 || mGlobalNumOfRowsInTile == 0) throw std::runtime_error("Division By zero"); + + mGlobalNumOfTilesInRows = (mGlobalNumOfRowsInMatrix + mGlobalNumOfRowsInTile - 1) / mGlobalNumOfRowsInTile; + mGlobalNumOfTilesInCols = (mGlobalNumOfColsInMatrix + mGlobalNumOfColsInTile - 1) / mGlobalNumOfColsInTile; + + auto specs = aMatrixDecomposer.Decompose(mGlobalNumOfTilesInRows, + mGlobalNumOfTilesInCols, + mDiagonalMatrix); + + for (size_t i = 0; i < specs.size(); i++) { + + /// calculating the first and last element index in the submatrix in terms of rows. + auto sub_matrix_st_idx_in_rows = specs[i].GetStartingIndexInRows() * mGlobalNumOfRowsInTile; + auto sub_matrix_end_idx_in_rows = + sub_matrix_st_idx_in_rows + specs[i].GetNumOfTilesInRow() * mGlobalNumOfRowsInTile; + + sub_matrix_end_idx_in_rows = std::min(sub_matrix_end_idx_in_rows, mGlobalNumOfRowsInMatrix); + + auto num_of_elements_in_rows = sub_matrix_end_idx_in_rows - sub_matrix_st_idx_in_rows; + + auto sub_matrix_st_idx_in_cols = specs[i].GetStartingIndexInCols() * mGlobalNumOfColsInTile; + auto sub_matrix_end_idx_in_cols = + sub_matrix_st_idx_in_cols + specs[i].GetNumOfTilesInCol() * mGlobalNumOfColsInTile; + + sub_matrix_end_idx_in_cols = std::min(sub_matrix_end_idx_in_cols, mGlobalNumOfColsInMatrix); + + auto num_of_elements_in_cols = sub_matrix_end_idx_in_cols - sub_matrix_st_idx_in_cols; + + specs[i].SetTotalSubMatrixNumOfElementsInRows(num_of_elements_in_rows); + specs[i].SetTotalSubMatrixNumOfElementsInCols(num_of_elements_in_cols); + + if (specs[i].GetOwnerId() != process_id) { + continue; + } + + if ((specs[i].GetNumOfTilesInRow() == 0) || (specs[i].GetNumOfTilesInCol() == 0)) { + continue; + } + if (apMatrixData == nullptr) { + mSubMatrices.push_back( + new SubMatrix(nullptr, mGlobalNumOfRowsInTile, mGlobalNumOfColsInTile, + mGlobalNumOfRowsInMatrix, + mGlobalNumOfColsInMatrix, num_of_elements_in_rows, num_of_elements_in_cols, + specs[i].GetStartingIndexInRows(), specs[i].GetStartingIndexInCols(), + aStorageLayout, process_id, mContext, aRank)); + continue; + } + mSubMatrices.push_back( + new SubMatrix(apMatrixData, mGlobalNumOfRowsInTile, mGlobalNumOfColsInTile, + mGlobalNumOfRowsInMatrix, mGlobalNumOfColsInMatrix, num_of_elements_in_rows, + num_of_elements_in_cols, specs[i].GetStartingIndexInRows(), + specs[i].GetStartingIndexInCols(), aStorageLayout, process_id, mContext, aRank)); + } + + mMemory = 0; + mSpecs = specs; + for (auto &submatrix: mSubMatrices) { + mMemory += submatrix->GetMemoryFootprint(); + } + +// RunTime_instance->RegisterHandles(this); + } + + template + void Matrix::Initialize(T *apMatrixData, size_t aTotalGlobalNumOfRows, size_t aTotalGlobalNumOfCols, + size_t aTileNumOfRows, size_t aTileNumOfCols, common::StorageLayout aStorageLayout, + MatrixDecomposer &aMatrixDecomposer, hicmapp::runtime::HicmaContext &aContext, + const CompressionParameters &aParams) { + this->mGlobalNumOfRowsInMatrix = aTotalGlobalNumOfRows; + this->mGlobalNumOfColsInMatrix = aTotalGlobalNumOfCols; + this->mGlobalNumOfRowsInTile = aTileNumOfRows; + this->mGlobalNumOfColsInTile = aTileNumOfCols; + this->mStorageLayout = aStorageLayout; + this->mTileType = COMPRESSED; + + if (aParams.GetFixedRank() < 0 || !IsMatrixValid()) { + throw std::invalid_argument("Matrix::Initialize Invalid Matrix Initialization"); + } + + auto process_id = 0; + auto number_of_processes = 1; + +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(mContext.GetCommunicator().GetMPICommunicatior(), &process_id); + if (process_id == 0) { + this->mMatrixId = GenerateMatrixId(); + } + + MPI_Bcast(&this->mMatrixId, 1, MPI_UNSIGNED_LONG, 0, mContext.GetCommunicator().GetMPICommunicatior()); + MPI_Comm_size(mContext.GetCommunicator().GetMPICommunicatior(), &number_of_processes); + +#else + this->mMatrixId = GenerateMatrixId(); +#endif + + + if (mGlobalNumOfColsInTile == 0 || mGlobalNumOfRowsInTile == 0) throw std::runtime_error("Division By zero"); + + mGlobalNumOfTilesInRows = (mGlobalNumOfRowsInMatrix + mGlobalNumOfRowsInTile - 1) / mGlobalNumOfRowsInTile; + mGlobalNumOfTilesInCols = (mGlobalNumOfColsInMatrix + mGlobalNumOfColsInTile - 1) / mGlobalNumOfColsInTile; + + + auto specs = aMatrixDecomposer.Decompose(mGlobalNumOfTilesInRows, mGlobalNumOfTilesInCols, mDiagonalMatrix); + + size_t sub_matrix_idx = 0; + for (size_t i = 0; i < specs.size(); i++) { + + /// calculating the first and last element index in the submatrix in terms of rows. + auto sub_matrix_st_idx_in_rows = specs[i].GetStartingIndexInRows() * mGlobalNumOfRowsInTile; + auto sub_matrix_end_idx_in_rows = + sub_matrix_st_idx_in_rows + specs[i].GetNumOfTilesInRow() * mGlobalNumOfRowsInTile; + + sub_matrix_end_idx_in_rows = std::min(sub_matrix_end_idx_in_rows, mGlobalNumOfRowsInMatrix); + + auto num_of_elements_in_rows = sub_matrix_end_idx_in_rows - sub_matrix_st_idx_in_rows; + + auto sub_matrix_st_idx_in_cols = specs[i].GetStartingIndexInCols() * mGlobalNumOfColsInTile; + auto sub_matrix_end_idx_in_cols = + sub_matrix_st_idx_in_cols + specs[i].GetNumOfTilesInCol() * mGlobalNumOfColsInTile; + + sub_matrix_end_idx_in_cols = std::min(sub_matrix_end_idx_in_cols, mGlobalNumOfColsInMatrix); + + auto num_of_elements_in_cols = sub_matrix_end_idx_in_cols - sub_matrix_st_idx_in_cols; + + specs[i].SetTotalSubMatrixNumOfElementsInRows(num_of_elements_in_rows); + specs[i].SetTotalSubMatrixNumOfElementsInCols(num_of_elements_in_cols); + + if (specs[i].GetOwnerId() != process_id) { + continue; + } + + if ((specs[i].GetNumOfTilesInRow() == 0) || (specs[i].GetNumOfTilesInCol() == 0)) { + continue; + } + if (apMatrixData == nullptr) { + mSubMatrices.push_back( + new SubMatrix(nullptr, mGlobalNumOfRowsInTile, mGlobalNumOfColsInTile, + mGlobalNumOfRowsInMatrix, + mGlobalNumOfColsInMatrix, num_of_elements_in_rows, num_of_elements_in_cols, + specs[i].GetStartingIndexInRows(), specs[i].GetStartingIndexInCols(), + aStorageLayout, process_id, mContext, aParams)); + continue; + } + + mSubMatrices.push_back( + new SubMatrix(apMatrixData, mGlobalNumOfRowsInTile, mGlobalNumOfColsInTile, + mGlobalNumOfRowsInMatrix, mGlobalNumOfColsInMatrix, num_of_elements_in_rows, + num_of_elements_in_cols, specs[i].GetStartingIndexInRows(), + specs[i].GetStartingIndexInCols(), aStorageLayout, process_id, mContext, aParams)); + } + + mSpecs = specs; + mMemory = 0; + for (auto &submatrix: mSubMatrices) { + mMemory += submatrix->GetMemoryFootprint(); + } + + } + + template + size_t Matrix::GetNumOfGlobalTilesInRows() const { + return mGlobalNumOfTilesInRows; + } + + template + size_t Matrix::GetNumOfGlobalTilesInCols() const { + return mGlobalNumOfTilesInCols; + } + + template + bool Matrix::ContainsTile(size_t aTileIdxInRows, size_t aTileIdxInCols) const { + for (auto &subMatrix: mSubMatrices) { + if (subMatrix->ContainsTile(aTileIdxInRows, aTileIdxInCols)) { + return true; + } + } + return false; + } + + //todo throw out of range exception + template + Tile *Matrix::GetTilePointer(size_t aTileIdxInRows, size_t aTileIdxInCols) { + for (auto subMatrix: mSubMatrices) { + if (subMatrix->ContainsTile(aTileIdxInRows, aTileIdxInCols)) { + return subMatrix->GetTilePointer(aTileIdxInRows, aTileIdxInCols); + } + } + throw std::out_of_range("Matrix::GetTilePointer, out of range tile.\n"); + } + + template + common::StorageLayout Matrix::GetStorageLayout() const { + return mStorageLayout; + } + + template + void Matrix::Print(std::ostream &aOutStream) { + aOutStream << "Matrix :" << std::endl; + for (size_t cols = 0; cols < this->GetGlobalNumOfColsInMatrix(); cols += this->GetNumOfColsInTile()) { + for (size_t rows = 0; + rows < this->GetGlobalNumOfRowsInMatrix(); rows += this->GetNumOfRowsInTile()) { + auto tile_index_r = rows / this->GetNumOfRowsInTile(); + auto tile_index_c = cols / this->GetNumOfColsInTile(); + if (this->ContainsTile(tile_index_r, tile_index_c)) { + aOutStream << "Tile( " << tile_index_r << "," << tile_index_c << ")" << std::endl; + this->GetTilePointer(tile_index_r, tile_index_c)->Print(aOutStream); + } + } + } + } + + template + hcorepp::helpers::RawMatrix Matrix::ToRawMatrix(runtime::HicmaContext &aContext) { + int process_id = 0; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &process_id); +#endif + size_t full_array_index; + size_t tile_index_r; + size_t tile_index_c; + size_t index_in_tile; + hcorepp::helpers::RawMatrix ret(this->mGlobalNumOfRowsInMatrix, this->mGlobalNumOfColsInMatrix); + auto data_ptr = ret.GetData(); + for (size_t cols = 0; cols < mGlobalNumOfColsInMatrix; cols += mGlobalNumOfColsInTile) { + for (size_t rows = 0; rows < mGlobalNumOfRowsInMatrix; rows += mGlobalNumOfRowsInTile) { + size_t tile_rows = std::min(mGlobalNumOfRowsInTile, mGlobalNumOfRowsInMatrix - rows); + size_t tile_cols = std::min(mGlobalNumOfColsInTile, mGlobalNumOfColsInMatrix - cols); + tile_index_r = rows / mGlobalNumOfRowsInTile; + tile_index_c = cols / mGlobalNumOfColsInTile; + T *temp; + + auto tile_idx = tile_index_r + (tile_index_c * mGlobalNumOfTilesInRows); + if (!this->ContainsTile(tile_index_r, tile_index_c)) { +#ifdef HICMAPP_USE_MPI + temp = new T[tile_rows * tile_cols]; +#endif + } else { + auto *tile = this->GetTilePointer(tile_index_r, tile_index_c); + if (tile->isDense()) { + auto &sub_matrix = tile->GetDataHolder().get(); + size_t n = sub_matrix.GetNumOfCols(); + size_t m = sub_matrix.GetNumOfRows(); + temp = new T[n * m]; + hcorepp::memory::Memcpy(temp, sub_matrix.GetData(), n * m, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + aContext.SyncMainContext(); + } else { + auto *comp_tile = static_cast *>(tile); + auto m = comp_tile->GetNumOfRows(); + auto n = comp_tile->GetNumOfCols(); + auto rank = comp_tile->GetTileRank(); + size_t num_elements = m * rank; + T *cu = new T[num_elements]; + hcorepp::memory::Memcpy(cu, comp_tile->GetUMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + num_elements = rank * n; + T *cv = new T[num_elements]; + hcorepp::memory::Memcpy(cv, comp_tile->GetVMatrix(), num_elements, + aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + aContext.SyncMainContext(); + temp = new T[n * m]; + memset(temp, 0, m * n * sizeof(T)); + + blas::gemm(blas::Layout::ColMajor, blas::Op::NoTrans, blas::Op::NoTrans, + m, n, rank, 1.0, cu, + comp_tile->GetULeadingDim(), cv, + comp_tile->GetVLeadingDim(), 0.0, temp, m); + delete[] cu; + delete[] cv; + } + } +#ifdef HICMAPP_USE_MPI + MPI_Bcast((char *)temp, tile_rows * tile_cols * sizeof(T), MPI_CHAR, + GetTileOwnerId(tile_index_r, tile_index_c), + MPI_COMM_WORLD); +#endif + + for (size_t i = 0; i < tile_cols; i++) { + for (size_t j = 0; j < tile_rows; j++) { + index_in_tile = i * tile_rows + j; + full_array_index = rows + j + ((cols + i) * mGlobalNumOfRowsInMatrix); + data_ptr[full_array_index] = temp[index_in_tile]; + } + } + delete[] temp; + } + } + return ret; + } + + template + int Matrix::GetSubMatrixOwnerId(size_t aTileIdxInRows, size_t aTileIdxInCols) const { + for (auto &subMatrix: mSubMatrices) { + if (subMatrix->ContainsTile(aTileIdxInRows, aTileIdxInCols)) { + return subMatrix->GetSubMatrixOwnerId(); + } + } + return -1; + } + + template + int Matrix::GetTileOwnerId(size_t aTileIdxInRows, size_t aTileIdxInCols) const { + for (auto &sub_matrix_spec: mSpecs) { + auto st_idx_in_rows = sub_matrix_spec.GetStartingIndexInRows(); + auto st_idx_in_cols = sub_matrix_spec.GetStartingIndexInCols(); + auto end_idx_in_rows = sub_matrix_spec.GetStartingIndexInRows() + sub_matrix_spec.GetNumOfTilesInRow(); + auto end_idx_in_cols = sub_matrix_spec.GetStartingIndexInCols() + sub_matrix_spec.GetNumOfTilesInCol(); + + if (aTileIdxInRows >= st_idx_in_rows && aTileIdxInRows < end_idx_in_rows) { + if (aTileIdxInCols >= st_idx_in_cols && aTileIdxInCols < end_idx_in_cols) { + return sub_matrix_spec.GetOwnerId(); + } + } + } + return -1; + } + + template + runtime::HicmaContext &Matrix::GetContext() const { + return mContext; + } + + template + bool Matrix::IsMatrixValid() const { + + if (!(std::is_floating_point::value && (sizeof(T) == sizeof(float) || sizeof(T) == sizeof(double)))) { + return false; + } + if (mGlobalNumOfRowsInTile <= 0 || mGlobalNumOfColsInTile <= 0) { + return false; + } + if (mGlobalNumOfRowsInMatrix <= 0 || mGlobalNumOfColsInMatrix <= 0) { + return false; + } + for (auto sub_matrix: mSubMatrices) { + if (!sub_matrix->IsValid()) { + return false; + } + } + return true; + } + + template + size_t Matrix::GetNumOfRowsInTile() const { + return mGlobalNumOfRowsInTile; + } + + template + size_t Matrix::GetNumOfColsInTile() const { + return mGlobalNumOfColsInTile; + } + + template + size_t Matrix::GetGlobalNumOfRowsInMatrix() const { + return mGlobalNumOfRowsInMatrix; + } + + template + size_t Matrix::GetGlobalNumOfColsInMatrix() const { + return mGlobalNumOfColsInMatrix; + } + + template + size_t Matrix::GetMemoryFootprint() { + return mMemory; + } + + template + size_t Matrix::GetTileLeadingDim(size_t aTileIdx) { + size_t leading_dim = 0; + if (mStorageLayout == common::StorageLayout::HicmaCM) { + leading_dim = mGlobalNumOfRowsInTile; + if (aTileIdx == mGlobalNumOfTilesInRows) { + leading_dim = mGlobalNumOfRowsInMatrix % mGlobalNumOfRowsInTile; + } + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + leading_dim = mGlobalNumOfColsInTile; + if (aTileIdx == mGlobalNumOfTilesInCols) { + leading_dim = mGlobalNumOfColsInMatrix % mGlobalNumOfColsInTile; + } + } + return leading_dim; + } + + template + TileMetadata *Matrix::GetTileMetadata(size_t aTileRowIdx, size_t aTileColIdx) { + int owner = this->GetTileOwnerId(aTileRowIdx, aTileColIdx); + int myrank = 0; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(this->GetContext().GetCommunicator().GetMPICommunicatior(), &myrank); +#endif + if (myrank == owner) { + auto *tile = this->GetTilePointer(aTileRowIdx, aTileColIdx); + auto metadata_data = hcorepp::operators::TilePacker::UnPackTile(*tile, + hcorepp::kernels::ContextManager::GetInstance().GetContext()); + + return metadata_data.first; + } else { + size_t tile_rows = mGlobalNumOfRowsInTile; + size_t tile_cols = mGlobalNumOfColsInTile; + for (size_t i = 0; i < mSpecs.size(); i++) { + auto sub_matrix_spec = mSpecs[i]; + auto mNumOfTilesinRows = + (sub_matrix_spec.GetTotalSubMatrixNumOfElementsInRows() + tile_rows - 1) / tile_rows; + auto mNumOfTilesinCols = + (sub_matrix_spec.GetTotalSubMatrixNumOfElementsInCols() + tile_cols - 1) / tile_cols; + + auto sub_matrix_st_idx_in_rows = sub_matrix_spec.GetStartingIndexInRows() * mGlobalNumOfRowsInTile; + auto sub_matrix_end_idx_in_rows = + sub_matrix_st_idx_in_rows + sub_matrix_spec.GetNumOfTilesInRow() * mGlobalNumOfRowsInTile; + + sub_matrix_end_idx_in_rows = std::min(sub_matrix_end_idx_in_rows, mGlobalNumOfRowsInMatrix); + + size_t sub_matrix_tile_st_idx_rows = sub_matrix_spec.GetStartingIndexInRows(); + size_t sub_matrix_tile_end_idx_rows = sub_matrix_spec.GetStartingIndexInRows() + mNumOfTilesinRows; + size_t sub_matrix_tile_st_idx_cols = sub_matrix_spec.GetStartingIndexInCols(); + size_t sub_matrix_tile_end_idx_cols = sub_matrix_spec.GetStartingIndexInCols() + mNumOfTilesinCols; + + bool row_check = (aTileRowIdx < sub_matrix_tile_end_idx_rows && + aTileRowIdx >= sub_matrix_tile_st_idx_rows); + bool column_check = (aTileColIdx < sub_matrix_tile_end_idx_cols && + aTileColIdx >= sub_matrix_tile_st_idx_cols); + if (!row_check || !column_check) { + continue; + } + + auto remainder_rows = (sub_matrix_spec.GetTotalSubMatrixNumOfElementsInRows() % tile_rows != 0); + auto remainder_cols = (sub_matrix_spec.GetTotalSubMatrixNumOfElementsInCols() % tile_cols != 0); + + if (remainder_rows && aTileRowIdx == sub_matrix_tile_end_idx_rows - 1) { + /// remainder tile in rows + tile_rows = std::min(tile_rows, + sub_matrix_spec.GetTotalSubMatrixNumOfElementsInRows() - + aTileRowIdx * tile_rows); + } + if (remainder_cols && aTileColIdx == sub_matrix_tile_end_idx_cols - 1) { + /// remainder tile in cols + tile_cols = std::min(tile_cols, + sub_matrix_spec.GetTotalSubMatrixNumOfElementsInCols() - + aTileColIdx * tile_cols); + } + + auto tile_layout = (blas::Layout) mStorageLayout; + auto tile_leading_dim = (tile_layout == blas::Layout::ColMajor) ? tile_rows : tile_cols; + auto tile_matrix_rank = mFixedRank; + auto tile_dense = mTileType; + auto max_rank = std::max(std::min(tile_rows, tile_cols) / MAX_RANK_RATIO, 1UL); + TileMetadata *metadata = new TileMetadata(tile_rows, tile_cols, tile_matrix_rank, max_rank, + tile_leading_dim, + tile_layout, tile_dense); + + return metadata; + } + + } + + return nullptr; + } + + HICMAPP_INSTANTIATE_CLASS(Matrix) +} \ No newline at end of file diff --git a/src/primitives/submatrix.cpp b/src/primitives/submatrix.cpp new file mode 100644 index 0000000..6828bfd --- /dev/null +++ b/src/primitives/submatrix.cpp @@ -0,0 +1,416 @@ + +#include +#include + +namespace hicmapp::primitives { + + template + SubMatrix::SubMatrix(T *apSubMatrixData, size_t aTileNumOfRows, size_t aTileNumOfCols, size_t aGlobalMatrixRows, + size_t aGlobalMatrixCols, size_t aSubMatrixNumOfRows, size_t aSubMatrixNumOfCols, + size_t aTilesGlobalStIdxInRows, size_t aTilesGlobalStIdxInCols, + common::StorageLayout aStorageLayout, size_t aOwnerId, runtime::HicmaContext &aContext, + size_t aRank) { + mStorageLayout = aStorageLayout; + mSubMatrixRows = aSubMatrixNumOfRows; + mSubMatrixCols = aSubMatrixNumOfCols; + mTileRows = aTileNumOfRows; + mTileCols = aTileNumOfCols; + mGlobalMatrixRows = aGlobalMatrixRows; + mGlobalMatrixCols = aGlobalMatrixCols; + // with respect to the tiles decomposed within the global matrix + mTilesGlobalStIdxInRows = aTilesGlobalStIdxInRows; + mTilesGlobalStIdxInCols = aTilesGlobalStIdxInCols; + + if (aRank < 0 || !IsValid()) { + throw std::invalid_argument("Matrix::Initialize Invalid Matrix Initialization"); + } + + if (mTileCols == 0 || mTileRows == 0) { + throw std::runtime_error("SubMatrix::SubMatrix, Division by zero.\n"); + } + mNumOfTilesinRows = (mSubMatrixRows + mTileRows - 1) / mTileRows; + mNumOfTilesinCols = (mSubMatrixCols + mTileCols - 1) / mTileCols; + + mSubMatrixOwnerId = aOwnerId; + + size_t data_offset = 0; + + auto slow_dim_total_num_of_elements_in_matrix = 0; + auto fast_dim_total_num_of_elements_in_matrix = 0; + auto slow_dim_total_num_of_elements_in_submatrix = 0; + auto fast_dim_total_num_of_elements_in_submatrix = 0; + auto slow_dim_tile_num_of_elements = 0; + auto fast_dim_tile_num_of_elements = 0; + auto num_of_rows_in_tile = mTileRows; + auto num_of_cols_in_tile = mTileCols; + auto layout = blas::Layout::RowMajor; + auto leading_dim = num_of_rows_in_tile; + size_t initial_global_offset = 0; + + if (mStorageLayout == common::StorageLayout::HicmaCM) { + slow_dim_total_num_of_elements_in_submatrix = mSubMatrixCols; + fast_dim_total_num_of_elements_in_submatrix = mSubMatrixRows; + slow_dim_tile_num_of_elements = mTileCols; + fast_dim_tile_num_of_elements = mTileRows; + layout = blas::Layout::ColMajor; + slow_dim_total_num_of_elements_in_matrix = mGlobalMatrixCols; + fast_dim_total_num_of_elements_in_matrix = mGlobalMatrixRows; + initial_global_offset = + mTilesGlobalStIdxInCols * mTileCols * mGlobalMatrixRows + mTilesGlobalStIdxInRows * mTileRows; + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + slow_dim_total_num_of_elements_in_submatrix = mSubMatrixRows; + fast_dim_total_num_of_elements_in_submatrix = mSubMatrixCols; + slow_dim_tile_num_of_elements = mTileRows; + fast_dim_tile_num_of_elements = mTileCols; + layout = blas::Layout::RowMajor; + slow_dim_total_num_of_elements_in_matrix = mGlobalMatrixRows; + fast_dim_total_num_of_elements_in_matrix = mGlobalMatrixCols; + initial_global_offset = + mTilesGlobalStIdxInRows * mTileRows * mGlobalMatrixCols + mTilesGlobalStIdxInCols * mTileCols; + } + + size_t global_offset = 0; + + for (size_t i = 0; i < slow_dim_total_num_of_elements_in_submatrix; i += slow_dim_tile_num_of_elements) { + global_offset = (i * fast_dim_total_num_of_elements_in_matrix) + initial_global_offset; + data_offset = global_offset; + + for (size_t j = 0; j < fast_dim_total_num_of_elements_in_submatrix; j += fast_dim_tile_num_of_elements) { + if (mStorageLayout == common::StorageLayout::HicmaCM) { + num_of_cols_in_tile = std::min(mTileCols, slow_dim_total_num_of_elements_in_submatrix - i); + num_of_rows_in_tile = std::min(mTileRows, fast_dim_total_num_of_elements_in_submatrix - j); + leading_dim = num_of_rows_in_tile; + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + num_of_cols_in_tile = std::min(mTileCols, fast_dim_total_num_of_elements_in_submatrix - j); + num_of_rows_in_tile = std::min(mTileRows, slow_dim_total_num_of_elements_in_submatrix - i); + leading_dim = num_of_cols_in_tile; + } + + if (apSubMatrixData == nullptr) { + if (aRank > 0) { + mTiles.push_back( + new CompressedTile(num_of_rows_in_tile, num_of_cols_in_tile, nullptr, leading_dim, + aRank, layout, + aContext.GetMainContext())); + } else { + mTiles.push_back( + new DenseTile(num_of_rows_in_tile, num_of_cols_in_tile, nullptr, leading_dim, layout, + aContext.GetMainContext())); + } + continue; + } + + if (apSubMatrixData != nullptr) { + auto data_array = hcorepp::memory::AllocateArray(num_of_rows_in_tile * num_of_cols_in_tile, + aContext.GetMainContext()); + auto array_off = 0; + auto temp_offset = data_offset; + if (mStorageLayout == common::StorageLayout::HicmaCM) { + for (size_t col = 0; col < num_of_cols_in_tile; col++) { + hcorepp::memory::Memcpy(&data_array[array_off], &apSubMatrixData[temp_offset], + num_of_rows_in_tile, aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_DEVICE); + temp_offset += mGlobalMatrixRows; + array_off += num_of_rows_in_tile; + } + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + for (size_t row = 0; row < num_of_rows_in_tile; row++) { + hcorepp::memory::Memcpy(&data_array[array_off], &apSubMatrixData[temp_offset], + num_of_cols_in_tile, aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_DEVICE); + temp_offset += mGlobalMatrixCols; + array_off += num_of_cols_in_tile; + } + } + + if (aRank > 0) { + mTiles.push_back( + new CompressedTile(num_of_rows_in_tile, num_of_cols_in_tile, data_array, leading_dim, + aRank, + layout, aContext.GetMainContext())); + } else { + mTiles.push_back( + new DenseTile(num_of_rows_in_tile, num_of_cols_in_tile, data_array, leading_dim, + layout, + aContext.GetMainContext())); + } + + data_offset += leading_dim; + hcorepp::memory::DestroyArray(data_array, aContext.GetMainContext()); + } + } + } + + mMemory = 0; + if (aRank > 0) { + for (size_t i = 0; i < mNumOfTilesinCols; i++) { + for (size_t j = 0; j < mNumOfTilesinRows; j++) { + auto tile_cols = std::min(mTileCols, mSubMatrixCols - i * mTileCols); + auto tile_rows = std::min(mTileRows, mSubMatrixRows - j * mTileRows); + auto tile_idx = 0; + if (mStorageLayout == common::StorageLayout::HicmaCM) { + tile_idx = i * mNumOfTilesinRows + j; + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + tile_idx = j * mNumOfTilesinCols + i; + } + mMemory += ((tile_rows + tile_cols) * + this->mTiles[tile_idx]->GetTileRank() * sizeof(T)); + } + } + } else { + mMemory = mSubMatrixRows * mSubMatrixCols * sizeof(T); + } + } + + template + SubMatrix::SubMatrix(T *apSubMatrixData, size_t aTileNumOfRows, size_t aTileNumOfCols, + size_t aGlobalMatrixRows, size_t aGlobalMatrixCols, size_t aSubMatrixNumOfRows, + size_t aSubMatrixNumOfCols, size_t aTilesGlobalStIdxInRows, + size_t aTilesGlobalStIdxInCols, common::StorageLayout aStorageLayout, size_t aOwnerId, + runtime::HicmaContext &aContext, const CompressionParameters &aParams) { + mStorageLayout = aStorageLayout; + mSubMatrixRows = aSubMatrixNumOfRows; + mSubMatrixCols = aSubMatrixNumOfCols; + mTileRows = aTileNumOfRows; + mTileCols = aTileNumOfCols; + mGlobalMatrixRows = aGlobalMatrixRows; + mGlobalMatrixCols = aGlobalMatrixCols; + // with respect to the tiles decomposed within the global matrix + mTilesGlobalStIdxInRows = aTilesGlobalStIdxInRows; + mTilesGlobalStIdxInCols = aTilesGlobalStIdxInCols; + + if (aParams.GetFixedRank() < 0 || !IsValid()) { + throw std::invalid_argument("Matrix::Initialize Invalid Matrix Initialization"); + } + + + if (mTileCols == 0 || mTileRows == 0) { + throw std::runtime_error("SubMatrix::SubMatrix, Division by zero.\n"); + } + mNumOfTilesinRows = (mSubMatrixRows + mTileRows - 1) / mTileRows; + mNumOfTilesinCols = (mSubMatrixCols + mTileCols - 1) / mTileCols; + + + mSubMatrixOwnerId = aOwnerId; + + size_t data_offset = 0; + + auto slow_dim_total_num_of_elements = 0; + auto fast_dim_total_num_of_elements = 0; + auto slow_dim_tile_num_of_elements = 0; + auto fast_dim_tile_num_of_elements = 0; + auto num_of_rows = mTileRows; + auto num_of_cols = mTileCols; + auto layout = blas::Layout::RowMajor; + auto leading_dim = num_of_rows; + size_t initial_global_offset = 0; + + if (mStorageLayout == common::StorageLayout::HicmaCM) { + slow_dim_total_num_of_elements = mSubMatrixCols; + fast_dim_total_num_of_elements = mSubMatrixRows; + slow_dim_tile_num_of_elements = mTileCols; + fast_dim_tile_num_of_elements = mTileRows; + layout = blas::Layout::ColMajor; + initial_global_offset = + mTilesGlobalStIdxInCols * mTileCols * mGlobalMatrixRows + mTilesGlobalStIdxInRows * mTileRows; + + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + slow_dim_total_num_of_elements = mSubMatrixRows; + fast_dim_total_num_of_elements = mSubMatrixCols; + slow_dim_tile_num_of_elements = mTileRows; + fast_dim_tile_num_of_elements = mTileCols; + layout = blas::Layout::RowMajor; + initial_global_offset = + mTilesGlobalStIdxInRows * mTileRows * mGlobalMatrixCols + mTilesGlobalStIdxInCols * mTileCols; + } + + size_t global_offset = 0; + for (size_t i = 0; i < slow_dim_total_num_of_elements; i += slow_dim_tile_num_of_elements) { + global_offset = (i * fast_dim_total_num_of_elements) + initial_global_offset; + data_offset = global_offset; + + for (size_t j = 0; j < fast_dim_total_num_of_elements; j += fast_dim_tile_num_of_elements) { + if (mStorageLayout == common::StorageLayout::HicmaCM) { + num_of_cols = std::min(mTileCols, slow_dim_total_num_of_elements - i); + num_of_rows = std::min(mTileRows, fast_dim_total_num_of_elements - j); + leading_dim = num_of_rows; + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + num_of_cols = std::min(mTileCols, fast_dim_total_num_of_elements - j); + num_of_rows = std::min(mTileRows, slow_dim_total_num_of_elements - i); + leading_dim = num_of_cols; + } + + if (apSubMatrixData == nullptr) { + mTiles.push_back( + new CompressedTile(num_of_rows, num_of_cols, nullptr, leading_dim, aParams, layout, + aContext.GetMainContext())); + + continue; + } + + + if (apSubMatrixData != nullptr) { + auto data_array = hcorepp::memory::AllocateArray(num_of_rows * num_of_cols, + aContext.GetMainContext()); + auto array_off = 0; + auto temp_offset = data_offset; + + if (mStorageLayout == common::StorageLayout::HicmaCM) { + for (size_t col = 0; col < num_of_cols; col++) { + hcorepp::memory::Memcpy(&data_array[array_off], &apSubMatrixData[temp_offset], + num_of_rows, aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_DEVICE); + temp_offset += mGlobalMatrixRows; + array_off += num_of_rows; + } + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + for (size_t row = 0; row < num_of_rows; row++) { + hcorepp::memory::Memcpy(&data_array[array_off], &apSubMatrixData[temp_offset], + num_of_cols, aContext.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_DEVICE); + temp_offset += mGlobalMatrixCols; + array_off += num_of_cols; + } + } + + mTiles.push_back( + new CompressedTile(num_of_rows, num_of_cols, data_array, leading_dim, aParams, + layout, aContext.GetMainContext())); + + data_offset += leading_dim; + hcorepp::memory::DestroyArray(data_array, aContext.GetMainContext()); + } + } + + + } + + mMemory = 0; + if (aParams.GetFixedRank() > 0) { + for (size_t i = 0; i < mNumOfTilesinCols; i++) { + for (size_t j = 0; j < mNumOfTilesinRows; j++) { + auto tile_cols = std::min(mTileCols, mSubMatrixCols - i * mTileCols); + auto tile_rows = std::min(mTileRows, mSubMatrixRows - j * mTileRows); + mMemory += ((tile_rows + tile_cols) * + this->mTiles[i][j].GetTileRank() * sizeof(T)); + } + } + } else { + mMemory = mSubMatrixRows * mSubMatrixCols * sizeof(T); + } + + } + + template + SubMatrix::~SubMatrix() { + for (auto tile: mTiles) { + delete tile; + } + mTiles.clear(); + } + + template + size_t SubMatrix::GetNumberofTiles() { + return mTiles.size(); + } + + template + std::vector *> & + SubMatrix::GetTiles() { + return mTiles; + } + + template + bool SubMatrix::ContainsTile(size_t aTileIdxInRows, size_t aTileIdxInCols) { + size_t sub_matrix_tile_end_idx_row = mTilesGlobalStIdxInRows + mNumOfTilesinRows; + size_t sub_matrix_tile_end_idx_col = mTilesGlobalStIdxInCols + mNumOfTilesinCols; + + bool row_check = (aTileIdxInRows < sub_matrix_tile_end_idx_row && aTileIdxInRows >= mTilesGlobalStIdxInRows); + bool column_check = (aTileIdxInCols < sub_matrix_tile_end_idx_col && aTileIdxInCols >= mTilesGlobalStIdxInCols); + return (row_check && column_check); + } + + + template + Tile *SubMatrix::GetTilePointer(size_t aTileIdxInRows, size_t aTileIdxInCols) { + + auto requested_tile_idx = 0; + + size_t relative_tile_row_idx = aTileIdxInRows - mTilesGlobalStIdxInRows; + size_t relative_tile_col_idx = aTileIdxInCols - mTilesGlobalStIdxInCols; + + if (mStorageLayout == common::StorageLayout::HicmaCM) { + requested_tile_idx = + relative_tile_col_idx * mNumOfTilesinRows + relative_tile_row_idx; + } else if (mStorageLayout == common::StorageLayout::HicmaRM) { + requested_tile_idx = + relative_tile_row_idx * mNumOfTilesinCols + relative_tile_col_idx; + } + + size_t index = requested_tile_idx; + + return mTiles[index]; + } + + template + int SubMatrix::GetSubMatrixOwnerId() { + return mSubMatrixOwnerId; + } + + template + bool SubMatrix::IsValid() { + + if (mSubMatrixRows <= 0 || mSubMatrixCols < 0) { + return false; + } + if ((mGlobalMatrixRows < mSubMatrixRows) || (mGlobalMatrixCols < mSubMatrixCols)) { + return false; + } + if ((mTilesGlobalStIdxInRows > 0 && mTilesGlobalStIdxInRows >= mGlobalMatrixRows) || + (mTilesGlobalStIdxInCols > 0 && mTilesGlobalStIdxInCols >= mGlobalMatrixCols)) { + return false; + } + if (mTilesGlobalStIdxInRows + mSubMatrixRows > mGlobalMatrixRows || + mTilesGlobalStIdxInCols + mSubMatrixCols > mGlobalMatrixCols) { + return false; + } + return true; + } + + template + size_t SubMatrix::GetNumOfTilesinRows() { + return mNumOfTilesinRows; + } + + template + size_t SubMatrix::GetNumOfTilesinCols() { + return mNumOfTilesinCols; + } + + template + size_t SubMatrix::GetTileRows() { + return mTileRows; + } + + template + size_t SubMatrix::GetTileCols() { + return mTileCols; + } + + template + size_t SubMatrix::GetTilesGlobalStIdxInRows() { + return mTilesGlobalStIdxInRows; + } + + template + size_t SubMatrix::GetTilesGlobalStIdxInCols() { + return mTilesGlobalStIdxInCols; + } + + template + size_t SubMatrix::GetMemoryFootprint() { + return mMemory; + } + + HICMAPP_INSTANTIATE_CLASS(SubMatrix) +} \ No newline at end of file diff --git a/src/problem-manager/CMakeLists.txt b/src/problem-manager/CMakeLists.txt new file mode 100644 index 0000000..e426a03 --- /dev/null +++ b/src/problem-manager/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/StarshManager.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/problem-manager/StarshManager.cpp b/src/problem-manager/StarshManager.cpp new file mode 100644 index 0000000..bd6a0e6 --- /dev/null +++ b/src/problem-manager/StarshManager.cpp @@ -0,0 +1,105 @@ +#include "hicmapp/problem-manager/StarshManager.hpp" +#include "hicmapp/primitives/ProblemManager.hpp" +#include +#include +#include +#include +#include "starsh-spatial.h" +#include "starsh-randtlr.h" +#include "iostream" + +namespace hicmapp::operations { + STARSH_blrf *StarsHManager::starsh_format = nullptr; + + STARSH_blrf *StarsHManager::GetStarsHFormat() { + if (starsh_format != nullptr) { + return starsh_format; + } else { + // throw exception.. + } + } + + void StarsHManager::SetStarsHFormat(primitives::ProblemManager &aProblemManager) { + + int info = 0; + STARSH_problem *problem = nullptr; + void *data = nullptr; + STARSH_kernel *kernel = nullptr; + char dtype = 'd'; + + auto N = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_N); + auto Ndim = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_NDIM); + auto block_size = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_BLOCK_SIZE); + char sym = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_SYM); + + std::string problem_str; + if (aProblemManager.GetProblemType() == hicmapp::common::ProblemType::PROBLEM_TYPE_SS) { + auto Nu = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_NU); + auto Beta = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_BETA); + auto Noise = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_NOISE); + + + int kernel_type = STARSH_SPATIAL_SQREXP_SIMD; + srand(0); // FIXME + enum STARSH_PARTICLES_PLACEMENT place = STARSH_PARTICLES_UNIFORM; + info = starsh_application((void **) &data, &kernel, N, + dtype, STARSH_SPATIAL, kernel_type, + STARSH_SPATIAL_NDIM, Ndim, STARSH_SPATIAL_BETA, + Beta, STARSH_SPATIAL_NU, Nu, + STARSH_SPATIAL_NOISE, Noise, STARSH_SPATIAL_PLACE, place, + 0); + problem_str = "ST_2D_SQEXP"; + } else if (aProblemManager.GetProblemType() == hicmapp::common::ProblemType::PROBLEM_TYPE_RND) { + auto Noise = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_NOISE); + auto decay = aProblemManager.GetProblemProperty(primitives::ProblemProperty::HICMA_PROB_PROPERTY_DECAY); + + int kernel_type = STARSH_RANDTLR_KERNEL1; + info = starsh_application((void **) &data, &kernel, N, dtype, + STARSH_RANDTLR, kernel_type, STARSH_RANDTLR_NB, block_size, + STARSH_RANDTLR_DECAY, decay, STARSH_RANDTLR_DIAG, + Noise, + 0); + problem_str = "Randomly generated matrix"; + } else { + fprintf(stderr, "Unknown type of STARS-H problem:%d. Exiting...\n", aProblemManager.GetProblemType()); + } + + if (info != 0) { + printf("wrong parameters for starsh_application()\n"); +// exit(info); + } + + STARSH_int shape[] = {N, N}; + + + starsh_problem_new(&problem, Ndim, + shape, sym, dtype, data, data, kernel, + (char *) problem_str.c_str()); + STARSH_cluster *cluster; + info = starsh_cluster_new_plain(&cluster, data, N, block_size); + if (info != 0) { + printf("Error in creation of cluster\n"); +// exit(info); + } + + STARSH_blrf *F; + info = starsh_blrf_new_tlr(&F, problem, sym, cluster, cluster); + if (info != 0) { + printf("Error in creation of format\n"); +// exit(info); + } + if (starsh_format != nullptr) { + starsh_blrf_free(starsh_format); + starsh_format = nullptr; + } + starsh_format = F; + + } + + void StarsHManager::DestroyStarsHManager() { + if (starsh_format != nullptr) { + starsh_blrf_free(starsh_format); + starsh_format = nullptr; + } + } +} \ No newline at end of file diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt new file mode 100644 index 0000000..b31d096 --- /dev/null +++ b/src/runtime/CMakeLists.txt @@ -0,0 +1,12 @@ +if (HICMAPP_SCHED_LOWER STREQUAL "starpu") + add_subdirectory(starpu) +elseif(HICMAPP_SCHED_LOWER STREQUAL "default") + add_subdirectory(default) +endif() + +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/RunTimeSingleton.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/HicmaContext.cpp + ${SOURCES} + PARENT_SCOPE + ) \ No newline at end of file diff --git a/src/runtime/HicmaContext.cpp b/src/runtime/HicmaContext.cpp new file mode 100644 index 0000000..88c230f --- /dev/null +++ b/src/runtime/HicmaContext.cpp @@ -0,0 +1,43 @@ +#include + +namespace hicmapp::runtime { + + HicmaContext::HicmaContext() = default; + + const hcorepp::kernels::RunContext & + HicmaContext::GetMainContext() { + return hcorepp::kernels::ContextManager::GetInstance().GetContext(0); + } + + const hcorepp::kernels::RunContext &HicmaContext::GetContext(size_t aIdx) { + return hcorepp::kernels::ContextManager::GetInstance().GetContext(aIdx); + } + + size_t HicmaContext::GetNumOfContexts() { + return hcorepp::kernels::ContextManager::GetInstance().GetNumOfContexts(); + } + + void HicmaContext::SyncMainContext() { + hcorepp::kernels::ContextManager::GetInstance().SyncMainContext(); + } + + void HicmaContext::SyncContext(size_t aIdx) { + hcorepp::kernels::ContextManager::GetInstance().SyncContext(aIdx); + } + + void HicmaContext::SyncAll() { + hcorepp::kernels::ContextManager::GetInstance().SyncAll(); + } + + HicmaCommunicator &HicmaContext::GetCommunicator() { + return mCommunicator; + } + + void HicmaContext::SetCommunicator(HicmaCommunicator &aCommunicator) { + mCommunicator = aCommunicator; + } + + HicmaContext::HicmaContext(HicmaCommunicator aComm) : mCommunicator(aComm) { + } + +} \ No newline at end of file diff --git a/src/runtime/RunTimeSingleton.cpp b/src/runtime/RunTimeSingleton.cpp new file mode 100644 index 0000000..5e04f68 --- /dev/null +++ b/src/runtime/RunTimeSingleton.cpp @@ -0,0 +1,37 @@ +#include +#include + +namespace hicmapp::runtime { + + template + hicmapp::runtime::RunTimeInterface *RunTimeSingleton::runtime_instance = nullptr; + + template + RunTimeSingleton::RunTimeSingleton() = default; + + template + RunTimeInterface *RunTimeSingleton::GetRunTimeInstance() { + if (runtime_instance != nullptr) { + return runtime_instance; + } else { + + throw std::runtime_error("RunTimeSingleton::GetRunTimeInstance, Instance is null.\n"); + } + } + + template + void RunTimeSingleton::setRunTimeInstance(hicmapp::runtime::HicmaHardware& aHardware) { + if (runtime_instance == nullptr) { + runtime_instance = hicmapp::runtime::RunTimeFactory::CreateRunTimeInstance(aHardware); + } + + if (runtime_instance == nullptr) { + throw std::bad_alloc(); + } + + + } + + HICMAPP_INSTANTIATE_CLASS(RunTimeSingleton); + +} \ No newline at end of file diff --git a/src/runtime/default/CMakeLists.txt b/src/runtime/default/CMakeLists.txt new file mode 100644 index 0000000..e306118 --- /dev/null +++ b/src/runtime/default/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/default_runtime.cpp + ${SOURCES} + PARENT_SCOPE + ) \ No newline at end of file diff --git a/src/runtime/default/default_runtime.cpp b/src/runtime/default/default_runtime.cpp new file mode 100644 index 0000000..28a0767 --- /dev/null +++ b/src/runtime/default/default_runtime.cpp @@ -0,0 +1,228 @@ +#include +#include + +namespace hicmapp::runtime { + + template + DefaultRuntime::DefaultRuntime([[maybe_unused]] hicmapp::runtime::HicmaHardware &aHardware) { + } + + template + DefaultRuntime::~DefaultRuntime() = default; + + template + int DefaultRuntime::GenerateDenseMatrix(Matrix &aMatrix, size_t aTileIdxInCols, size_t aTileIdxInRows) { + + + auto *tile = static_cast *>(aMatrix.GetTilePointer(aTileIdxInRows, aTileIdxInCols)); + + int rc = hicmapp::operations::TileOperations::GenerateDenseTile(*tile, aTileIdxInRows, aTileIdxInCols); + + return rc; + } + + template + int DefaultRuntime::GenerateCompressedMatrix(Matrix &aMatrix, + size_t aTileIdxInRows, size_t aTileIdxInCols, + const CompressionParameters &aSVDArguments) { + + auto *tile = aMatrix.GetTilePointer(aTileIdxInRows, aTileIdxInCols); + int rc = hicmapp::operations::TileOperations::GenerateCompressedMatrix(*(CompressedTile *) tile, + aTileIdxInRows, aTileIdxInCols, + aSVDArguments); + return rc; + } + + template + size_t + DefaultRuntime::Gemm(T aAlpha, Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + const blas::Op &aAOp, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, const blas::Op &aBOp, + T aBeta, + Matrix &aMatrixC, const size_t &aRowIdxC, const size_t &aColIdxC, + const hcorepp::kernels::RunContext &aContext, const CompressionParameters &aSVDArguments, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, bool aCholesky) { + + size_t flops = 0; + auto *tile_a = aMatrixA.GetTilePointer(aRowIdxA, aColIdxA); + auto *tile_b = aMatrixB.GetTilePointer(aRowIdxB, aColIdxB); + auto *tile_c = aMatrixC.GetTilePointer(aRowIdxC, aColIdxC); + flops += hicmapp::operations::TileOperations::Gemm(aAlpha, *tile_a, aAOp, *tile_b, aBOp, + aBeta, *tile_c, aContext, aMemoryUnit, aSVDArguments, + aCholesky); + return flops; + } + + template + int DefaultRuntime::Sync() { + return 0; + } + + template + void DefaultRuntime::Flush(const Matrix &aMatrix) { + + } + + template + void DefaultRuntime::Finalize() { + + } + + template + void DefaultRuntime::UnRegisterHandles(Matrix &A) { + + } + + template + void DefaultRuntime::RegisterHandles(Matrix &A) { + + } + + template + void DefaultRuntime::Flush(const Matrix &aMatrix, const size_t aRowIdx, const size_t aColIdx) { + + } + + template + size_t DefaultRuntime::Syrk(Matrix &aMatrixA, const size_t &aRowIdxA, + const size_t &aColIdxA, const blas::Op &aAOp, Matrix &aMatrixC, + const size_t &aRowIdxC, const size_t &aColIdxC, const blas::Uplo aUplo, T aAlpha, + T aBeta, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + auto *tile_a = aMatrixA.GetTilePointer(aRowIdxA, aColIdxA); + auto *tile_c = aMatrixC.GetTilePointer(aRowIdxC, aColIdxC); + + flops += hicmapp::operations::TileOperations::Syrk(aAlpha, *tile_a, aAOp, aUplo, aBeta, *tile_c, aContext, + aMemoryUnit); + + return flops; + } + + template + size_t DefaultRuntime::Potrf(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + const blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + auto *tile_a = aMatrixA.GetTilePointer(aRowIdxA, aColIdxA); + + flops += hicmapp::operations::TileOperations::Potrf(*tile_a, aUplo, aContext, aMemoryUnit); + + return flops; + } + + template + size_t DefaultRuntime::Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + Matrix &aMatrixADiagonal, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixAUV, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + auto *tile_a = aMatrixADiagonal.GetTilePointer(aRowIdxA, aColIdxA); + auto *tile_b = aMatrixAUV.GetTilePointer(aRowIdxB, aColIdxB); + + flops += hicmapp::operations::TileOperations::Trsm(aSide, aUplo, aTrans, aDiag, aAlpha, *tile_a, *tile_b, + aContext, aMemoryUnit); + + return flops; + } + + template + size_t + DefaultRuntime::GenerateDiagonalTile(Matrix &aMatrixUV, Matrix &aMatrixDiag, const size_t &aRowIdxDiag, + const size_t &aColIdxDiag, Matrix &aMatrixRK, const size_t &aRowIdx, + const size_t &aColIdx, unsigned long long int seed, size_t maxrank, + double tol, + size_t compress_diag, Matrix &aMatrixDense, + const hcorepp::kernels::RunContext &aContext, bool diagonal_tile) { + size_t flops = 0; + + auto *tile_auv = aMatrixUV.GetTilePointer(aRowIdx, aColIdx); + auto *tile_ark = aMatrixRK.GetTilePointer(aRowIdx, aColIdx); + auto *tile_dense = aMatrixDense.GetTilePointer(aRowIdx, aColIdx); + + Tile *tile_diag = nullptr; + + size_t rows, cols, lda_diag, ld_uv; + + if (diagonal_tile) { + tile_diag = aMatrixDiag.GetTilePointer(aRowIdxDiag, aColIdxDiag); + + rows = aMatrixDiag.GetNumOfRowsInTile(); + if (aRowIdxDiag == aMatrixDiag.GetNumOfGlobalTilesInRows() - 1) { + rows = aMatrixDiag.GetGlobalNumOfRowsInMatrix() - aRowIdxDiag * aMatrixDiag.GetNumOfRowsInTile(); + } + cols = rows; + + lda_diag = rows;// aMatrixDiag.GetTilePointer(aRowIdxDiag, 0).GetLeadingDim(); + ld_uv = 0; + + } else { + rows = aMatrixDiag.GetNumOfRowsInTile(); + if (aRowIdxDiag == aMatrixDiag.GetNumOfGlobalTilesInRows() - 1) { + rows = aMatrixDiag.GetGlobalNumOfRowsInMatrix() - aRowIdxDiag * aMatrixDiag.GetNumOfRowsInTile(); + } + + cols = aMatrixUV.GetNumOfRowsInTile(); + if (aRowIdx == aMatrixUV.GetNumOfGlobalTilesInRows() - 1) { + cols = aMatrixUV.GetGlobalNumOfRowsInMatrix() - aRowIdx * aMatrixUV.GetNumOfRowsInTile(); + } + + lda_diag = rows;//aMatrixDiag.GetTilePointer(aRowIdxDiag, 0).GetLeadingDim(); + ld_uv = cols;//aMatrixUV.GetTilePointer(aRowIdx, 0).GetLeadingDim(); + + } + + flops += hicmapp::operations::TileOperations::GenerateDiagonalTile(tile_auv, tile_ark, tile_dense, + tile_diag, aRowIdx, aColIdx, + seed, maxrank, tol, compress_diag, + lda_diag, ld_uv, ld_uv, rows, cols, + aContext); + + return flops; + } + + template + size_t DefaultRuntime::LaCpy(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext) { + + size_t flops = 0; + auto* tile_a = aMatrixA.GetTilePointer(aRowIdxA, aColIdxA); + int rows = tile_a->GetNumOfRows(); + int cols = rows; + + auto tile_b = aMatrixB.GetTilePointer(aRowIdxB, aColIdxB); + + flops += hicmapp::operations::TileOperations::LaCpy(rows, cols, *tile_a, *tile_b, aContext); + + return flops; + } + + template + size_t DefaultRuntime::Uncompress(Matrix &aMatrixUV, Matrix &aMatrixDense, Matrix &aMatrixRk, + const size_t &aRowIdx, const size_t &aColIdx) { + + Tile *tile_d = aMatrixDense.GetTilePointer(aRowIdx, aColIdx); + size_t nrows = tile_d->GetNumOfRows(); + size_t ldad = tile_d->GetLeadingDim(); + auto dense_data = tile_d->GetTileSubMatrix(0); + + auto *tile_uv = static_cast *>(aMatrixUV.GetTilePointer(aRowIdx, aColIdx)); + size_t ldauv = tile_uv->GetULeadingDim(); + auto u_data = tile_uv->GetUMatrix(); + auto v_data = tile_uv->GetVMatrix(); + size_t ncols = aMatrixUV.GetNumOfRowsInTile(); + + T rk_data = tile_uv->GetTileRank(); + + hicmapp::operations::TileOperations::UnCompressTile(nrows, ncols, 1, + u_data, &rk_data, ldauv, + v_data, ldauv, 0, dense_data, ldad); + + } + + HICMAPP_INSTANTIATE_CLASS(DefaultRuntime) + +} \ No newline at end of file diff --git a/src/runtime/starpu/CMakeLists.txt b/src/runtime/starpu/CMakeLists.txt new file mode 100644 index 0000000..67ddcab --- /dev/null +++ b/src/runtime/starpu/CMakeLists.txt @@ -0,0 +1,7 @@ +add_subdirectory(starpu-codelets) +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/starpu.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/factory.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/runtime/starpu/factory.cpp b/src/runtime/starpu/factory.cpp new file mode 100644 index 0000000..9d71d02 --- /dev/null +++ b/src/runtime/starpu/factory.cpp @@ -0,0 +1,50 @@ +#include +#include +#include "hicmapp/common/definitions.h" +#ifdef USE_OMP +#include +#endif + +namespace hicmapp { + namespace runtime { + + template + std::unordered_map *> &CodeletFactory::GetMakersMap() { + static auto mMakers = std::unordered_map *>(); + return mMakers; + } + + template + std::unordered_map &CodeletFactory::GetCodeletsMap() { + static auto mCodelets = std::unordered_map(); + return mCodelets; + + } + + template + void CodeletFactory::RegisterMaker(CodeletType aType, MakerInterface *aMaker) { + if (GetMakersMap().find(aType) == GetMakersMap().end()) { + GetMakersMap().insert(std::make_pair(aType, aMaker)); + } else { + std::cout << "The Maker passed is already registered. \n"; + } + } + + template + StarpuCodelet *CodeletFactory::CreateCodelet(CodeletType aType) { + auto &makers_map = GetMakersMap(); + auto &codelets_map = GetCodeletsMap(); + + if (makers_map.find(aType) != makers_map.end()) { + if (codelets_map.find(aType) == codelets_map.end()) { + codelets_map[aType] = makers_map[aType]->CreateObject(); + } + return codelets_map[aType]; + } else { + throw std::runtime_error("Requested codelet does not have a registered maker"); + } + } + + HICMAPP_INSTANTIATE_CLASS(CodeletFactory) + } +} \ No newline at end of file diff --git a/src/runtime/starpu/starpu-codelets/CMakeLists.txt b/src/runtime/starpu/starpu-codelets/CMakeLists.txt new file mode 100644 index 0000000..f470852 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/CMakeLists.txt @@ -0,0 +1,14 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/generate-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/gemm-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/GenerateCompressedDataCodelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/syrk-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/potrf-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/trsm-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/generate-dgytlr-diag-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/generate-dgytlr-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/uncompress-codelet.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/lacpy-codelet.cpp + ${SOURCES} + PARENT_SCOPE + ) \ No newline at end of file diff --git a/src/runtime/starpu/starpu-codelets/GenerateCompressedDataCodelet.cpp b/src/runtime/starpu/starpu-codelets/GenerateCompressedDataCodelet.cpp new file mode 100644 index 0000000..844ab32 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/GenerateCompressedDataCodelet.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" +#include "hcorepp/kernels/memory.hpp" +#include "hcorepp/operators/interface/TilePacker.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool GenerateCompressedDataCodelet::registered_ = GenerateCompressedDataCodelet::Register(); + + template + bool GenerateCompressedDataCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker( + CodeletType::GENERATE_COMPRESSED_DATA); + return true; + } + + template + starpu_codelet *GenerateCompressedDataCodelet::GetCodelet() { + return &this->cl_dhagcm; + } + + template + GenerateCompressedDataCodelet::GenerateCompressedDataCodelet() { + cl_dhagcm = {.where=STARPU_CPU, .type = STARPU_SEQ, + .cpu_funcs={cl_dhagcm_cpu_func}, + .cuda_funcs={}, .cuda_flags={(0)}, .nbuffers=((2)), + /// @Todo: add the model used in old hicma.. + .model={}, .name="dhagcm"}; + } + + template + void GenerateCompressedDataCodelet::cl_dhagcm_cpu_func(void **descr, void *cl_arg) { + auto *metadata = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + + size_t tile_col_index; + size_t tile_row_index; + hcorepp::operators::CompressionParameters aParams; + + starpu_codelet_unpack_args(cl_arg, &tile_row_index, + &tile_col_index, &aParams); + + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile = hcorepp::operators::TilePacker::PackTile(*metadata, tile_data, context); + + hicmapp::operations::TileOperations::GenerateCompressedMatrix(*(hcorepp::operators::CompressedTile *) tile, + tile_row_index, tile_col_index, aParams); + + metadata->mMatrixRank = ((hcorepp::operators::Tile *) tile)->GetTileRank(); + } + + HICMAPP_INSTANTIATE_CLASS(GenerateCompressedDataCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/gemm-codelet.cpp b/src/runtime/starpu/starpu-codelets/gemm-codelet.cpp new file mode 100644 index 0000000..7aa2aa5 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/gemm-codelet.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool GemmCodelet::registered_ = GemmCodelet::Register(); + + template + bool GemmCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::GEMM); + return true; + } + + template + starpu_codelet *GemmCodelet::GetCodelet() { + return &this->cl_dgemm; + } + + template + GemmCodelet::GemmCodelet() { + cl_dgemm = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_dgemm_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_dgemm_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(6), + .model={}, + .name="dgemm"}; + } + + template + void GemmCodelet::cl_dgemm_func(void **descr, void *cl_arg) { + T alpha, beta; + hcorepp::operators::CompressionParameters parameters; + blas::Op AOp, BOp; + hcorepp::operators::TileMetadata *metadata_a, *metadata_b, *metadata_c; + bool cholesky; + + starpu_codelet_unpack_args(cl_arg, &alpha, &AOp, &BOp, &beta, ¶meters, &cholesky); + + metadata_a = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_a = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_b = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_b = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + metadata_c = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[4]); + auto *tile_c = (T *) STARPU_MATRIX_GET_PTR(descr[5]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + +// static int index = 0; + auto *a = hcorepp::operators::TilePacker::PackTile(*metadata_a, tile_a, context); + auto *b = hcorepp::operators::TilePacker::PackTile(*metadata_b, tile_b, context); + auto *c = hcorepp::operators::TilePacker::PackTile(*metadata_c, tile_c, context); +// auto &memory_unit = memory_handler.GetMemoryUnit(index); + hcorepp::dataunits::MemoryUnit *memory_unit = new hcorepp::dataunits::MemoryUnit(context); + hicmapp::operations::TileOperations::Gemm(alpha, *a, AOp, *b, BOp, beta, *c, context, + *memory_unit, parameters, cholesky); + metadata_c->mMatrixRank = ((hcorepp::operators::Tile *) c)->GetTileRank(); + memory_unit->FreeAllocations(); +// index++; + delete memory_unit; +// delete a; +// delete b; +// delete c; + } + + HICMAPP_INSTANTIATE_CLASS(GemmCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/generate-codelet.cpp b/src/runtime/starpu/starpu-codelets/generate-codelet.cpp new file mode 100644 index 0000000..f6a9b9b --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/generate-codelet.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" +#include "hcorepp/operators/interface/TilePacker.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool GenerateCodelet::registered_ = GenerateCodelet::Register(); + + template + bool GenerateCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::GENERATE_DENSE_DATA); + return true; + } + + template + starpu_codelet *GenerateCodelet::GetCodelet() { + return &this->cl_dhagdm; + } + + template + GenerateCodelet::GenerateCodelet() { + cl_dhagdm = {.where=STARPU_CPU, .type = STARPU_SEQ, + .cpu_funcs={cl_dhagdm_cpu_func}, + .cuda_funcs={}, .cuda_flags={(0)}, .nbuffers=((2)), + .model={}, .name="dhagdm"}; + } + + template + void GenerateCodelet::cl_dhagdm_cpu_func(void **descr, void *cl_arg) { + auto* metadata = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + + size_t tile_col_index; + size_t tile_row_index; + + starpu_codelet_unpack_args(cl_arg, &tile_row_index, + &tile_col_index); + + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile = static_cast *>(hcorepp::operators::TilePacker::PackTile(*metadata, + tile_data, + context)); + + hicmapp::operations::TileOperations::GenerateDenseTile(*tile, + tile_row_index, + tile_col_index); + } + + HICMAPP_INSTANTIATE_CLASS(GenerateCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/generate-dgytlr-codelet.cpp b/src/runtime/starpu/starpu-codelets/generate-dgytlr-codelet.cpp new file mode 100644 index 0000000..a1c65a4 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/generate-dgytlr-codelet.cpp @@ -0,0 +1,87 @@ +// +// Created by mirna on 01/11/23. +// +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool GenerateDgytlrCodelet::registered_ = GenerateDgytlrCodelet::Register(); + + template + bool GenerateDgytlrCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::DGYTLR); + return true; + } + + template + starpu_codelet *GenerateDgytlrCodelet::GetCodelet() { + return &this->cl_dgytlr; + } + + template + GenerateDgytlrCodelet::GenerateDgytlrCodelet() { + cl_dgytlr= { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_dgytlr_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_dgytlr_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(6), + .model={}, + .name="dgytlr"}; + } + + template + void GenerateDgytlrCodelet::cl_dgytlr_func(void **descr, void *cl_arg) { + hcorepp::operators::TileMetadata *metadata_auv, *metadata_ark, *metadata_dense; + size_t row_idx, col_idx; + size_t lda, ldu, ldv, rows, cols; + unsigned long long int seed; + size_t maxrank; + double tol; + size_t compress_diag; + + starpu_codelet_unpack_args(cl_arg, &row_idx, &col_idx, &seed, &maxrank, &tol, &compress_diag, &lda, &ldu, &ldv, + &rows,&cols); + + metadata_auv = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_auv_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_ark = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_ark_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + metadata_dense = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[4]); + auto *tile_dense_data = (T *) STARPU_MATRIX_GET_PTR(descr[5]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_auv = hcorepp::operators::TilePacker::PackTile(*metadata_auv, tile_auv_data, context); + auto *tile_ark = hcorepp::operators::TilePacker::PackTile(*metadata_ark, tile_ark_data, context); + auto *tile_dense = hcorepp::operators::TilePacker::PackTile(*metadata_dense, tile_dense_data, context); + + auto memory_unit = memory_handler.GetMemoryUnit(); + + size_t flops = 0; + flops += hicmapp::operations::TileOperations::GenerateDiagonalTile(tile_auv, tile_ark, tile_dense, + nullptr, row_idx, col_idx, + seed, maxrank, tol, compress_diag, + lda, ldu, ldv, rows, cols, context); + metadata_auv->mMatrixRank = ((hcorepp::operators::Tile *) tile_auv)->GetTileRank(); + memory_unit.FreeAllocations(); + } + + HICMAPP_INSTANTIATE_CLASS(GenerateDgytlrCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/generate-dgytlr-diag-codelet.cpp b/src/runtime/starpu/starpu-codelets/generate-dgytlr-diag-codelet.cpp new file mode 100644 index 0000000..c4e082f --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/generate-dgytlr-diag-codelet.cpp @@ -0,0 +1,92 @@ +// +// Created by mirna on 01/11/23. +// +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool GenerateDgytlrDiagonalCodelet::registered_ = GenerateDgytlrDiagonalCodelet::Register(); + + template + bool GenerateDgytlrDiagonalCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker( + CodeletType::DGYTLR_DIAG); + return true; + } + + template + starpu_codelet *GenerateDgytlrDiagonalCodelet::GetCodelet() { + return &this->cl_dgytlr_diag; + } + + template + GenerateDgytlrDiagonalCodelet::GenerateDgytlrDiagonalCodelet() { + cl_dgytlr_diag = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_dgytlr_diag_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_dgytlr_diag_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(8), + .model={}, + .name="dgytlr_diag"}; + } + + template + void GenerateDgytlrDiagonalCodelet::cl_dgytlr_diag_func(void **descr, void *cl_arg) { + hcorepp::operators::TileMetadata *metadata_auv, *metadata_ark, *metadata_dense, *metadata_diag; + size_t row_idx, col_idx; + size_t lda, ldu, ldv, rows, cols; + unsigned long long int seed; + size_t maxrank; + double tol; + size_t compress_diag; + + starpu_codelet_unpack_args(cl_arg, &row_idx, &col_idx, &seed, &maxrank, &tol, &compress_diag, &lda, &ldu, &ldv, + &rows, &cols); + + metadata_auv = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_auv_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_ark = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_ark_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + metadata_dense = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[4]); + auto *tile_dense_data = (T *) STARPU_MATRIX_GET_PTR(descr[5]); + metadata_diag = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[6]); + auto *tile_diag_data = (T *) STARPU_MATRIX_GET_PTR(descr[7]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_auv = hcorepp::operators::TilePacker::PackTile(*metadata_auv, tile_auv_data, context); + auto *tile_ark = hcorepp::operators::TilePacker::PackTile(*metadata_ark, tile_ark_data, context); + auto *tile_dense = hcorepp::operators::TilePacker::PackTile(*metadata_dense, tile_dense_data, context); + auto *tile_diag = hcorepp::operators::TilePacker::PackTile(*metadata_diag, tile_diag_data, context); + + auto memory_unit = memory_handler.GetMemoryUnit(); + + size_t flops = 0; + flops += hicmapp::operations::TileOperations::GenerateDiagonalTile(tile_auv, tile_ark, tile_dense, + tile_diag, row_idx, col_idx, + seed, maxrank, tol, compress_diag, + lda, ldu, ldv, rows, cols, context); + metadata_auv->mMatrixRank = ((hcorepp::operators::Tile *) tile_auv)->GetTileRank(); + + memory_unit.FreeAllocations(); + } + + HICMAPP_INSTANTIATE_CLASS(GenerateDgytlrDiagonalCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/lacpy-codelet.cpp b/src/runtime/starpu/starpu-codelets/lacpy-codelet.cpp new file mode 100644 index 0000000..329b441 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/lacpy-codelet.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool LacpyCodelet::registered_ = LacpyCodelet::Register(); + + template + bool LacpyCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::LACPY); + return true; + } + + template + starpu_codelet *LacpyCodelet::GetCodelet() { + return &this->cl_lacpy; + } + + template + LacpyCodelet::LacpyCodelet() { + cl_lacpy = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_lacpy_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_lacpy_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(4), + .model={}, + .name="lacpy"}; + } + + template + void LacpyCodelet::cl_lacpy_func(void **descr, void *cl_arg) { + hcorepp::operators::TileMetadata *metadata_a, *metadata_b; + + metadata_a = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_a_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_b = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_b_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_a = hcorepp::operators::TilePacker::PackTile(*metadata_a, tile_a_data, + context); + auto *tile_b = hcorepp::operators::TilePacker::PackTile(*metadata_b, tile_b_data, + context); + + auto memory_unit = memory_handler.GetMemoryUnit(); + + int rows = tile_a->GetNumOfRows(); + int cols = rows; + + hicmapp::operations::TileOperations::LaCpy(rows, cols, *tile_a, *tile_b, + context); + + memory_unit.FreeAllocations(); + } + + HICMAPP_INSTANTIATE_CLASS(LacpyCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/potrf-codelet.cpp b/src/runtime/starpu/starpu-codelets/potrf-codelet.cpp new file mode 100644 index 0000000..dffe717 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/potrf-codelet.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool PotrfCodelet::registered_ = PotrfCodelet::Register(); + + template + bool PotrfCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::POTRF); + return true; + } + + template + starpu_codelet *PotrfCodelet::GetCodelet() { + return &this->cl_potrf; + } + + template + PotrfCodelet::PotrfCodelet() { + cl_potrf = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_potrf_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_potrf_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(2), + .model={}, + .name="potrf"}; + } + + template + void PotrfCodelet::cl_potrf_func(void **descr, void *cl_arg) { + hcorepp::operators::CompressionParameters parameters; + blas::Uplo uplo; + hcorepp::operators::TileMetadata *metadata_a; + + starpu_codelet_unpack_args(cl_arg, &uplo); + + metadata_a = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_a_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_a = hcorepp::operators::TilePacker::PackTile(*metadata_a, tile_a_data, context); +// auto memory_unit = memory_handler.GetMemoryUnit(); + hcorepp::dataunits::MemoryUnit *memory_unit = new hcorepp::dataunits::MemoryUnit(context); + + size_t flops = 0; + + flops += hicmapp::operations::TileOperations::Potrf(*tile_a, uplo, context, *memory_unit); + + memory_unit->FreeAllocations(); + + delete memory_unit; + } + + HICMAPP_INSTANTIATE_CLASS(PotrfCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/syrk-codelet.cpp b/src/runtime/starpu/starpu-codelets/syrk-codelet.cpp new file mode 100644 index 0000000..f46c137 --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/syrk-codelet.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool SyrkCodelet::registered_ = SyrkCodelet::Register(); + + template + bool SyrkCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::SYRK); + return true; + } + + template + starpu_codelet *SyrkCodelet::GetCodelet() { + return &this->cl_syrk; + } + + template + SyrkCodelet::SyrkCodelet() { + cl_syrk = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_syrk_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_syrk_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(4), + .model={}, + .name="syrk"}; + } + + template + void SyrkCodelet::cl_syrk_func(void **descr, void *cl_arg) { + T alpha, beta; + blas::Op AOp; + blas::Uplo uplo; + hcorepp::operators::TileMetadata *metadata_a, *metadata_diag; + + starpu_codelet_unpack_args(cl_arg, &alpha, &AOp, &uplo, &beta); + + metadata_a = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_a_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_diag = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_diag_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_a = hcorepp::operators::TilePacker::PackTile(*metadata_a, tile_a_data, context); + auto *tile_diag = hcorepp::operators::TilePacker::PackTile(*metadata_diag, tile_diag_data, context); +// auto memory_unit = memory_handler.GetMemoryUnit(); + + hcorepp::dataunits::MemoryUnit *memory_unit = new hcorepp::dataunits::MemoryUnit(context); + + int flops = 0; + flops += hicmapp::operations::TileOperations::Syrk(alpha, *tile_a, AOp, uplo, beta, *tile_diag, context, + *memory_unit); + + memory_unit->FreeAllocations(); + delete memory_unit; + } + + HICMAPP_INSTANTIATE_CLASS(SyrkCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/trsm-codelet.cpp b/src/runtime/starpu/starpu-codelets/trsm-codelet.cpp new file mode 100644 index 0000000..99de95e --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/trsm-codelet.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool TrsmCodelet::registered_ = TrsmCodelet::Register(); + + template + bool TrsmCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::TRSM); + return true; + } + + template + starpu_codelet *TrsmCodelet::GetCodelet() { + return &this->cl_trsm; + } + + template + TrsmCodelet::TrsmCodelet() { + cl_trsm = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_trsm_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_trsm_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(4), + .model={}, + .name="trsm"}; + } + + template + void TrsmCodelet::cl_trsm_func(void **descr, void *cl_arg) { + T alpha; + blas::Uplo uplo; + blas::Side side; + blas::Op trans; + blas::Diag diag; + hcorepp::operators::TileMetadata *metadata_a, *metadata_b; + + starpu_codelet_unpack_args(cl_arg, &side, &uplo, &trans, &diag, &alpha); + + metadata_a = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_a_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_b = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_b_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_a = hcorepp::operators::TilePacker::PackTile(*metadata_a, tile_a_data, context); + auto *tile_b = hcorepp::operators::TilePacker::PackTile(*metadata_b, tile_b_data, context); +// auto memory_unit = memory_handler.GetMemoryUnit(); + hcorepp::dataunits::MemoryUnit *memory_unit = new hcorepp::dataunits::MemoryUnit(context); + + int flops = 0; + flops += hicmapp::operations::TileOperations::Trsm(side, uplo, trans, diag, alpha, *tile_a, *tile_b, + context, *memory_unit); + +// metadata_b->mMatrixRank = ((Tile *) tile_b)->GetTileRank(); + + + memory_unit->FreeAllocations(); + delete memory_unit; + } + + HICMAPP_INSTANTIATE_CLASS(TrsmCodelet) +} + diff --git a/src/runtime/starpu/starpu-codelets/uncompress-codelet.cpp b/src/runtime/starpu/starpu-codelets/uncompress-codelet.cpp new file mode 100644 index 0000000..90dc91f --- /dev/null +++ b/src/runtime/starpu/starpu-codelets/uncompress-codelet.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include "hicmapp/tile-operations/TileOperations.hpp" + +using namespace hicmapp::runtime; + +namespace hicmapp::runtime { + + template const bool UncompressCodelet::registered_ = UncompressCodelet::Register(); + + template + bool UncompressCodelet::Register() { + static auto maker = new hicmapp::runtime::CodeletMaker(CodeletType::UNCOMPRESS); + return true; + } + + template + starpu_codelet *UncompressCodelet::GetCodelet() { + return &this->cl_uncompress; + } + + template + UncompressCodelet::UncompressCodelet() { + cl_uncompress = { +#ifdef USE_CUDA + .where= STARPU_CPU | STARPU_CUDA, + .cpu_funcs={cl_uncompress_func}, + .cuda_funcs={}, + .cuda_flags={0}, +#else + .where=STARPU_CPU, + .cpu_funcs={cl_uncompress_func}, + .cuda_funcs={}, + .cuda_flags={(0)}, +#endif + .nbuffers=(6), + .model={}, + .name="uncompress"}; + } + + template + void UncompressCodelet::cl_uncompress_func(void **descr, void *cl_arg) { + T alpha = 1; + T beta = 0; + + hcorepp::common::BlasOperation a_trans, b_trans; + size_t ncols; + + hcorepp::operators::TileMetadata *metadata_auv, *metadata_dense, *metadata_rk; + + starpu_codelet_unpack_args(cl_arg, &a_trans, &b_trans, &ncols); + + metadata_auv = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[0]); + auto *tile_auv_data = (T *) STARPU_MATRIX_GET_PTR(descr[1]); + metadata_dense = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[2]); + auto *tile_dense_data = (T *) STARPU_MATRIX_GET_PTR(descr[3]); + metadata_rk = (hcorepp::operators::TileMetadata *) STARPU_VARIABLE_GET_PTR(descr[4]); + auto *tile_rk_data = (T *) STARPU_MATRIX_GET_PTR(descr[5]); + + hcorepp::dataunits::MemoryHandler &memory_handler = hcorepp::dataunits::MemoryHandler::GetInstance(); + hcorepp::kernels::RunContext &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + + auto *tile_uv = hcorepp::operators::TilePacker::PackTile(*metadata_auv, tile_auv_data, context); + auto *tile_dense = hcorepp::operators::TilePacker::PackTile(*metadata_dense, tile_dense_data, context); + auto *tile_rk = hcorepp::operators::TilePacker::PackTile(*metadata_rk, tile_rk_data, context); + + auto memory_unit = memory_handler.GetMemoryUnit(); + + size_t nrows = tile_dense->GetNumOfRows(); + size_t ldad = tile_dense->GetLeadingDim(); + auto dense_data = tile_dense->GetTileSubMatrix(0); + + auto *tile_comp = static_cast *>(tile_uv); + size_t ldauv = tile_comp->GetULeadingDim(); + auto u_data = tile_comp->GetUMatrix(); + auto v_data = tile_comp->GetVMatrix(); + + T rk_data = tile_comp->GetTileRank(); + + hicmapp::operations::TileOperations::UnCompressTile(nrows, ncols, alpha, u_data, &rk_data, + ldauv, v_data, + ldauv, beta, dense_data, ldad); + + memory_unit.FreeAllocations(); + } + + HICMAPP_INSTANTIATE_CLASS(UncompressCodelet) +} + diff --git a/src/runtime/starpu/starpu.cpp b/src/runtime/starpu/starpu.cpp new file mode 100644 index 0000000..a7301fa --- /dev/null +++ b/src/runtime/starpu/starpu.cpp @@ -0,0 +1,841 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/** + * Malloc/Free of the data + */ +#ifdef STARPU_MALLOC_SIMULATION_FOLDED +#define FOLDED STARPU_MALLOC_SIMULATION_FOLDED +#else +#define FOLDED 0 +#endif + +namespace hicmapp::runtime { + static size_t tag_sep = 16; + + template + StarPu::StarPu(hicmapp::runtime::HicmaHardware &aHardware) { + if (!starpu_is_initialized()) { + mConf = (starpu_conf_t *) malloc(sizeof(starpu_conf_t)); + starpu_conf_init(mConf); + mConf->ncpus = aHardware.mCPUs; + mConf->ncuda = aHardware.mGPUs; + mConf->nopencl = 0; + if (mConf->ncuda > 0) { + mConf->sched_policy_name = "dmdas"; + } else { + /** + * Set scheduling to "ws"/"lws" if no cuda devices used because it + * behaves better on homogneneous architectures. If the user wants + * to use another scheduling strategy, he can set STARPU_SCHED + * env. var. to whatever he wants + */ +#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 2)) + mConf->sched_policy_name = "lws"; +#else + mConf->sched_policy_name = "ws"; +#endif + } + + auto ncpus = mConf->ncpus; + auto ncuda = mConf->ncuda; + auto nthreads_per_worker = aHardware.mThreadsPerWorker; + + if (ncpus + ncuda >= 64) { + ncpus = 64 - ncuda; + } + + if (ncpus != -1 && nthreads_per_worker != -1) { + int worker = 0; + + for (worker = 0; worker < ncpus; worker++) { + mConf->workers_bindid[worker] = (worker + 1) * nthreads_per_worker - 1; + } + + for (worker = 0; worker < ncpus; worker++) { + mConf->workers_bindid[worker + ncuda] = worker * nthreads_per_worker; + } + + mConf->use_explicit_workers_bindid = 1; + } + + +#ifdef BLAS_HAVE_MKL + auto envMKL = std::getenv("MKL_NUM_THREADS"); + int nmkl = 0; + if (envMKL != nullptr) { + std::string envStr(envMKL); + nmkl = std::stoi(envMKL); + } + +#endif + +#ifdef HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS + starpu_malloc_on_node_set_default_flags(STARPU_MAIN_RAM, STARPU_MALLOC_PINNED | STARPU_MALLOC_COUNT +#ifdef STARPU_MALLOC_SIMULATION_FOLDED + | STARPU_MALLOC_SIMULATION_FOLDED +#endif + ); +#endif + +#ifdef HICMAPP_USE_MPI + int flag = 0; +#if !defined(CHAMELEON_SIMULATION) + MPI_Initialized(&flag); +#endif + int info = starpu_mpi_init_conf(nullptr, nullptr, 0, MPI_COMM_WORLD, mConf); + if (info) { + throw std::runtime_error("StarPu Initialization Failed"); + } +#else + int info = starpu_init(mConf); + + if (info) { + throw std::runtime_error("StarPu Initialization Failed"); + } + +#endif +#ifdef USE_CUDA + starpu_cublas_init(); +#endif + std::cout << "StarPu Initialized with " << aHardware.mCPUs << " CPU(s) and " << aHardware.mGPUs << " GPU(s)" + << std::endl; + } + + } + + template + TileHandlesMap &StarPu::GetMatrixHandles(size_t aMatrixId) { + // RunTimeHandle is not found.. + if (mRunTimeHandles.find(aMatrixId) == mRunTimeHandles.end()) { + mRunTimeHandles[aMatrixId] = TileHandlesMap{}; + } + return mRunTimeHandles[aMatrixId]; + } + + template + int StarPu::GenerateDenseMatrix(Matrix &aMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols) { + + auto codelet = (new GenerateCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + auto tile_handles = StarPu::GetTileHandles(aMatrix, aTileIdxInRows, aTileIdxInCols); + + starpu_insert_task( + codelet, + STARPU_VALUE, &aTileIdxInRows, sizeof(size_t), + STARPU_VALUE, &aTileIdxInCols, sizeof(size_t), + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "dhagdm", + #endif + STARPU_RW, tile_handles[0], + STARPU_W, tile_handles[1], + 0); + + return 0; + } + + template + void + StarPu::RegisterTileHandles(Matrix &A, size_t aM, size_t aN) { + + common::StorageLayout storage_layout = A.GetStorageLayout(); + size_t handle_index = 0; + if (storage_layout == common::StorageLayout::HicmaRM) { + handle_index = aM * A.GetNumOfGlobalTilesInCols() + aN; + } else if (storage_layout == common::StorageLayout::HicmaCM) { + handle_index = aN * A.GetNumOfGlobalTilesInRows() + aM; + } + + TileHandlesMap &handles_map = this->GetMatrixHandles(A.GetMatrixId()); + + auto &tile_handles = GetTileHandles(handles_map, handle_index); + + int myrank = 0; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(A.GetContext().GetCommunicator().GetMPICommunicatior(), &myrank); +#endif + + int owner = A.GetTileOwnerId(aM, aN); + + int tag_offset = A.GetNumOfGlobalTilesInRows() * A.GetNumOfGlobalTilesInCols(); + + if (myrank == owner) { + auto tile = A.GetTilePointer(aM, aN); + auto metadata_data = hcorepp::operators::TilePacker::UnPackTile(*tile, + hcorepp::kernels::ContextManager::GetInstance().GetContext()); + + auto metadata = metadata_data.first; + auto tile_data = metadata_data.second; + if (this->mTileMetadata.count(A.GetMatrixId()) == 0) { + this->mTileMetadata[A.GetMatrixId()] = {}; + } + this->mTileMetadata[A.GetMatrixId()].emplace_back(aM, aN, metadata); + + auto &metadata_handle = tile_handles[0]; + int home_node = STARPU_MAIN_RAM; + + starpu_variable_data_register(&metadata_handle, home_node, (uintptr_t) (void *) metadata, + sizeof(TileMetadata)); + +#ifdef HICMAPP_USE_MPI + auto tag = (A.GetMatrixId() << tag_sep) | (handle_index); + starpu_data_set_rank(metadata_handle, owner); + starpu_data_set_tag(metadata_handle, tag); +#endif + + auto &tile_handle = tile_handles[1]; + + auto &dh = tile->GetDataHolder().get(); + + uint32_t leading_dim = dh.GetLeadingDim(); + uint32_t rows = dh.GetNumOfRows(); + uint32_t cols = dh.GetNumOfCols(); + + starpu_matrix_data_register(&tile_handle, home_node, (uintptr_t) (void *) tile->GetTileSubMatrix(0), + leading_dim, rows, cols, sizeof(T)); +#ifdef HICMAPP_USE_MPI + tag = (A.GetMatrixId() << tag_sep) | (handle_index + tag_offset); + starpu_data_set_rank(tile_handle, owner); + starpu_data_set_tag(tile_handle, tag); +#endif + + } else { + + auto &metadata_handle = tile_handles[0]; + int home_node = -1; + auto metadata = A.GetTileMetadata(aM, aN); + auto tile_rows = metadata->mNumOfRows; + auto tile_cols = metadata->mNumOfCols; + auto tile_layout = (blas::Layout) A.GetStorageLayout(); + auto tile_leading_dim = (tile_layout == blas::Layout::ColMajor) ? tile_rows : tile_cols; + auto tile_type = A.GetMatrixTileType(); + if (tile_type == COMPRESSED) { + size_t max_rank = std::max(std::min(tile_rows, tile_cols) / MAX_RANK_RATIO, 1UL); + auto num_elements = tile_rows * max_rank + max_rank * tile_cols; + tile_rows = tile_leading_dim = num_elements; + tile_cols = 1; + } + + + starpu_variable_data_register(&metadata_handle, home_node, (uintptr_t) (void *) nullptr, + sizeof(TileMetadata)); + +#ifdef HICMAPP_USE_MPI + auto tag = (A.GetMatrixId() << tag_sep) | (handle_index); + starpu_data_set_rank(metadata_handle, owner); + starpu_data_set_tag(metadata_handle, tag); +#endif + + auto &tile_handle = tile_handles[1]; + + /* Revise This */ + starpu_matrix_data_register(&tile_handle, home_node, (uintptr_t) nullptr, tile_leading_dim, + tile_rows, tile_cols, sizeof(T)); + +#ifdef HICMAPP_USE_MPI + tag = (A.GetMatrixId() << tag_sep) | (handle_index + tag_offset); + starpu_data_set_rank(tile_handle, owner); + starpu_data_set_tag(tile_handle, tag); +#endif + } + + } + + + template + TileHandles &StarPu::GetTileHandles(TileHandlesMap &aHandlesMap, size_t aHandleIdx) { + if (aHandlesMap.count(aHandleIdx) == 0) { + aHandlesMap[aHandleIdx] = {nullptr, nullptr}; + } + + return aHandlesMap[aHandleIdx]; + } + + template + TileHandles & + StarPu::GetTileHandles(Matrix &A, size_t aM, size_t aN) { + + common::StorageLayout storage_layout = A.GetStorageLayout(); + size_t handle_index = 0; + if (storage_layout == common::StorageLayout::HicmaRM) { + handle_index = aM * A.GetNumOfGlobalTilesInCols() + aN; + } else if (storage_layout == common::StorageLayout::HicmaCM) { + handle_index = aN * A.GetNumOfGlobalTilesInRows() + aM; + } + + TileHandlesMap &handle_map = this->GetMatrixHandles(A.GetMatrixId()); + + if (handle_map.count(handle_index) == 0) { + handle_map[handle_index] = {nullptr, nullptr}; + } + + return handle_map[handle_index]; + } + + template + int StarPu::Sync() { +#ifdef HICMAPP_USE_MPI + starpu_mpi_wait_for_all(MPI_COMM_WORLD); + starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD); + starpu_mpi_barrier(MPI_COMM_WORLD); +#else + starpu_task_wait_for_all(); +#endif + return 0; + } + + template + StarPu::~StarPu() { + if (mConf != nullptr) { + free(mConf); + } +#if defined(HICMAPP_USE_MPI) + starpu_mpi_shutdown(); +#endif + +#ifdef USE_CUDA + starpu_cublas_shutdown(); +#else + starpu_shutdown(); +#endif + } + + template + void StarPu::Flush(const Matrix &aMatrix, const size_t aRowIdx, const size_t aColIdx) { + size_t handle_index = 0; + if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaRM) { + handle_index = aRowIdx * aMatrix.GetNumOfGlobalTilesInCols() + aColIdx; + } else if (aMatrix.GetStorageLayout() == common::StorageLayout::HicmaCM) { + handle_index = aColIdx * aMatrix.GetNumOfGlobalTilesInRows() + aRowIdx; + } + + auto &handle_map = this->GetMatrixHandles(aMatrix.GetMatrixId()); + + auto &tile_handles = GetTileHandles(handle_map, handle_index); +#ifdef HICMAPP_USE_MPI + auto tag_1 = starpu_mpi_data_get_tag(tile_handles[1]); +// starpu_mpi_r +// starpu_tag_wait(tag_1); +#endif + + + for (auto &handle: tile_handles) { + if (handle == nullptr) { + continue; + } +#ifdef HICMAPP_USE_MPI + starpu_mpi_cache_flush(MPI_COMM_WORLD, handle); +#endif + if (aMatrix.ContainsTile(aRowIdx, aColIdx)) { + starpu_data_acquire_cb(handle, STARPU_R, (void (*)(void *)) &starpu_data_release, handle); + } + } + } + + template + void StarPu::Flush(const Matrix &apMatrix) { + size_t num_of_global_tiles_in_rows = apMatrix.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols = apMatrix.GetNumOfGlobalTilesInCols(); + for (size_t i = 0; i < num_of_global_tiles_in_rows; i++) { + for (size_t j = 0; j < num_of_global_tiles_in_cols; j++) { + this->Flush(apMatrix, i, j); + } + } + } + + template + void StarPu::Finalize() { +#ifdef HICMAPP_USE_MPI + starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD); +#endif + + starpu_task_wait_for_all(); +#ifdef HICMAPP_USE_MPI + starpu_mpi_barrier(MPI_COMM_WORLD); +#endif + + for (auto &matrix: mRunTimeHandles) { + for (auto &tile: matrix.second) { + this->UnRegisterTileHandles(tile.second); + } + } + +#ifdef HICMAPP_USE_MPI + starpu_mpi_shutdown(); +#else + starpu_shutdown(); +#endif + +#ifdef USE_CUDA + starpu_cublas_shutdown(); +#endif + } + + template + void StarPu::UnRegisterHandles(Matrix &A) { + auto &handles_map = this->GetMatrixHandles(A.GetMatrixId()); + + // Sync each metadata with the appropriate tile. + if (mTileMetadata.count(A.GetMatrixId())) { + for (auto &it: mTileMetadata[A.GetMatrixId()]) { + auto metadata = std::get<2>(it); + A.GetTilePointer(std::get<0>(it), std::get<1>(it))->UpdateMetadata(*metadata); + } + } + + for (auto &tile: handles_map) { + UnRegisterTileHandles(tile.second); + } + mRunTimeHandles.erase(A.GetMatrixId()); + } + + template + void StarPu::UnRegisterTileHandles(TileHandles &aHandles) { + for (auto &handle: aHandles) { + if (handle != nullptr) { + starpu_data_unregister(handle); + handle = nullptr; + } + } + } + + template + void StarPu::RegisterHandles(Matrix &A) { + size_t num_of_global_tiles_in_rows = A.GetNumOfGlobalTilesInRows(); + size_t num_of_global_tiles_in_cols = A.GetNumOfGlobalTilesInCols(); + + if (A.GetStorageLayout() == hicmapp::common::StorageLayout::HicmaCM) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + this->RegisterTileHandles(A, row_idx, col_idx); + } + } + } else if (A.GetStorageLayout() == hicmapp::common::StorageLayout::HicmaRM) { + for (size_t row_idx = 0; row_idx < num_of_global_tiles_in_rows; row_idx++) { + for (size_t col_idx = 0; col_idx < num_of_global_tiles_in_cols; col_idx++) { + this->RegisterTileHandles(A, row_idx, col_idx); + } + } + } + } + + template + int StarPu::GenerateCompressedMatrix(Matrix &apMatrix, size_t aTileIdxInRows, size_t aTileIdxInCols, + const CompressionParameters &aSVDArguments) { + + auto *codelet = (new GenerateCompressedDataCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + auto tile_handles = StarPu::GetTileHandles(apMatrix, aTileIdxInRows, aTileIdxInCols); + + starpu_insert_task( + codelet, + STARPU_VALUE, &aTileIdxInRows, sizeof(size_t), + STARPU_VALUE, &aTileIdxInCols, sizeof(size_t), + STARPU_VALUE, &aSVDArguments, sizeof(CompressionParameters), + STARPU_VALUE, + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "dhagcm", + #endif + STARPU_RW, tile_handles[0], + STARPU_W, tile_handles[1], + 0); + + + return 0; + } + + template + size_t + StarPu::Gemm(T aAlpha, Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + const blas::Op &aAOp, Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const blas::Op &aBOp, T aBeta, Matrix &aMatrixC, const size_t &aRowIdxC, + const size_t &aColIdxC, const hcorepp::kernels::RunContext &aContext, + const CompressionParameters &aSVDArguments, hcorepp::dataunits::MemoryUnit &aMemoryUnit, + bool aCholesky) { + auto codelet = (new GemmCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int tile_a_owner = aMatrixA.GetTileOwnerId(aRowIdxA, aColIdxA); + int tile_b_owner = aMatrixB.GetTileOwnerId(aRowIdxB, aColIdxB); + int tile_c_owner = aMatrixC.GetTileOwnerId(aRowIdxC, aColIdxC); + + int execution_rank = tile_c_owner; + if (tile_a_owner == tile_b_owner) { + execution_rank = tile_a_owner; + } + + auto &tile_a_handles = StarPu::GetTileHandles(aMatrixA, aRowIdxA, aColIdxA); + auto &tile_b_handles = StarPu::GetTileHandles(aMatrixB, aRowIdxB, aColIdxB); + auto &tile_c_handles = StarPu::GetTileHandles(aMatrixC, aRowIdxC, aColIdxC); + + + auto tag_0 = starpu_mpi_data_get_tag(tile_c_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_c_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aAlpha, sizeof(T), + STARPU_VALUE, &aAOp, sizeof(blas::Op), + STARPU_VALUE, &aBOp, sizeof(blas::Op), + STARPU_VALUE, &aBeta, sizeof(T), + STARPU_VALUE, &aSVDArguments, sizeof(CompressionParameters), + STARPU_VALUE, &aCholesky, sizeof(bool), + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "dgemm", + #endif + STARPU_R, tile_a_handles[0], + STARPU_R, tile_a_handles[1], + STARPU_R, tile_b_handles[0], + STARPU_R, tile_b_handles[1], + STARPU_RW, tile_c_handles[0], + STARPU_RW, tile_c_handles[1], + STARPU_TAG, tag_1, + STARPU_EXECUTE_ON_NODE, execution_rank, + 0); + return 0; + } + + template + size_t StarPu::Syrk(Matrix &aMatrixA, const size_t &aRowIdxA, + const size_t &aColIdxA, const blas::Op &aAOp, Matrix &aMatrixDiag, + const size_t &aRowIdxC, const size_t &aColIdxC, const blas::Uplo aUplo, T aAlpha, + T aBeta, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + + auto codelet = (new SyrkCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int execution_rank = aMatrixA.GetTileOwnerId(aRowIdxA, aColIdxA); + + auto &tile_a_handles = StarPu::GetTileHandles(aMatrixA, aRowIdxA, aColIdxA); + auto &tile_diag_handles = StarPu::GetTileHandles(aMatrixDiag, aRowIdxC, aColIdxC); + + + auto tag_0 = starpu_mpi_data_get_tag(tile_diag_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_diag_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aAlpha, sizeof(T), + STARPU_VALUE, &aAOp, sizeof(blas::Op), + STARPU_VALUE, &aUplo, sizeof(blas::Uplo), + STARPU_VALUE, &aBeta, sizeof(T), + STARPU_PRIORITY, 3, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "syrk", + #endif + STARPU_R, tile_a_handles[0], + STARPU_R, tile_a_handles[1], + STARPU_RW, tile_diag_handles[0], + STARPU_RW, tile_diag_handles[1], + STARPU_TAG, tag_1, + STARPU_EXECUTE_ON_NODE, execution_rank, + 0); + + return flops; + } + + template + size_t + StarPu::Potrf(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, const blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + + auto codelet = (new PotrfCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int tile_a_owner = aMatrixA.GetTileOwnerId(aRowIdxA, aColIdxA); + + int execution_rank = tile_a_owner; + + auto tile_a_handles = StarPu::GetTileHandles(aMatrixA, aRowIdxA, aColIdxA); + + auto tag_0 = starpu_mpi_data_get_tag(tile_a_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_a_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aUplo, sizeof(blas::Uplo), + STARPU_PRIORITY, 5, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "potrf", + #endif + STARPU_RW, tile_a_handles[0], + STARPU_RW, tile_a_handles[1], + STARPU_EXECUTE_ON_NODE, execution_rank, + STARPU_TAG, tag_1, + 0); + return flops; + } + + template + size_t StarPu::Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + Matrix &aMatrixADiagonal, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixAUV, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + + auto codelet = (new TrsmCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int tile_b_owner = aMatrixAUV.GetTileOwnerId(aRowIdxB, aColIdxB); + + int execution_rank = tile_b_owner; + + auto &tile_a_handles = StarPu::GetTileHandles(aMatrixADiagonal, aRowIdxA, aColIdxA); + auto &tile_b_handles = StarPu::GetTileHandles(aMatrixAUV, aRowIdxB, aColIdxB); + + + auto tag_0 = starpu_mpi_data_get_tag(tile_b_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_b_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aSide, sizeof(blas::Side), + STARPU_VALUE, &aUplo, sizeof(blas::Uplo), + STARPU_VALUE, &aTrans, sizeof(blas::Op), + STARPU_VALUE, &aDiag, sizeof(blas::Diag), + STARPU_VALUE, &aAlpha, sizeof(T), + STARPU_PRIORITY, 4, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "trsm", + #endif + STARPU_R, tile_a_handles[0], + STARPU_R, tile_a_handles[1], + STARPU_RW, tile_b_handles[0], + STARPU_RW, tile_b_handles[1], + STARPU_TAG, tag_1, + STARPU_EXECUTE_ON_NODE, execution_rank, + 0); + return flops; + } + + template + size_t + StarPu::GenerateDiagonalTile(Matrix &apMatrixUV, Matrix &apMatrixDiag, const size_t &aRowIdxDiag, + const size_t &aColIdxDiag, Matrix &apMatrixRK, const size_t &aRowIdx, + const size_t &aColIdx, unsigned long long int seed, size_t maxrank, double tol, + size_t compress_diag, Matrix &apMatrixDense, + const hcorepp::kernels::RunContext &aContext, bool diagonal_tile) { + size_t flops = 0; + + if (diagonal_tile) { + auto codelet = (new GenerateDgytlrDiagonalCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + auto tile_auv_handles = StarPu::GetTileHandles(apMatrixUV, aRowIdx, aColIdx); + auto tile_ark_handles = StarPu::GetTileHandles(apMatrixRK, aRowIdx, aColIdx); + auto tile_dense_handles = StarPu::GetTileHandles(apMatrixDense, aRowIdx, aColIdx); + auto tile_diag_handles = StarPu::GetTileHandles(apMatrixDiag, aRowIdxDiag, aColIdxDiag); + + size_t rows = apMatrixDiag.GetNumOfRowsInTile(); + if (aRowIdxDiag == apMatrixDiag.GetNumOfGlobalTilesInRows() - 1) { + rows = apMatrixDiag.GetGlobalNumOfRowsInMatrix() - aRowIdxDiag * apMatrixDiag.GetNumOfRowsInTile(); + } + size_t cols = rows; + + size_t lda_diag = rows;// apMatrixDiag.GetTilePointer(aRowIdxDiag, 0).GetLeadingDim(); + size_t ld_uv = 0; + + + auto tag_0 = starpu_mpi_data_get_tag(tile_auv_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_auv_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aRowIdx, sizeof(size_t), + STARPU_VALUE, &aColIdx, sizeof(size_t), + STARPU_VALUE, &seed, sizeof(unsigned long long int), + STARPU_VALUE, &maxrank, sizeof(size_t), + STARPU_VALUE, &tol, sizeof(double), + STARPU_VALUE, &compress_diag, sizeof(size_t), + STARPU_VALUE, &lda_diag, sizeof(size_t), + STARPU_VALUE, &ld_uv, sizeof(size_t), + STARPU_VALUE, &ld_uv, sizeof(size_t), + STARPU_VALUE, &rows, sizeof(size_t), + STARPU_VALUE, &cols, sizeof(size_t), + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "dgytlr_diag", + #endif + STARPU_W, tile_auv_handles[0], + STARPU_W, tile_auv_handles[1], + STARPU_W, tile_ark_handles[0], + STARPU_W, tile_ark_handles[1], + STARPU_RW, tile_dense_handles[0], + STARPU_RW, tile_dense_handles[1], + STARPU_W, tile_diag_handles[0], + STARPU_W, tile_diag_handles[1], + STARPU_TAG, tag_1, + 0); + + } else { + + auto codelet = (new GenerateDgytlrCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + auto tile_auv_handles = StarPu::GetTileHandles(apMatrixUV, aRowIdx, aColIdx); + auto tile_ark_handles = StarPu::GetTileHandles(apMatrixRK, aRowIdx, aColIdx); + auto tile_dense_handles = StarPu::GetTileHandles(apMatrixDense, aRowIdx, aColIdx); + + + size_t rows = apMatrixDiag.GetNumOfRowsInTile(); + if (aRowIdxDiag == apMatrixDiag.GetNumOfGlobalTilesInRows() - 1) { + rows = apMatrixDiag.GetGlobalNumOfRowsInMatrix() - aRowIdxDiag * apMatrixDiag.GetNumOfRowsInTile(); + } + + size_t cols = apMatrixUV.GetNumOfRowsInTile(); + if (aRowIdx == apMatrixUV.GetNumOfGlobalTilesInRows() - 1) { + cols = apMatrixUV.GetGlobalNumOfRowsInMatrix() - aRowIdx * apMatrixUV.GetNumOfRowsInTile(); + } + + size_t lda_diag = rows;//apMatrixDiag.GetTilePointer(aRowIdxDiag, 0).GetLeadingDim(); + size_t ld_uv = cols;//apMatrixUV.GetTilePointer(aRowIdx, 0).GetLeadingDim(); + + + auto tag_0 = starpu_mpi_data_get_tag(tile_auv_handles[0]); + auto tag_1 = starpu_mpi_data_get_tag(tile_auv_handles[1]); + + starpu_insert_task(codelet, + STARPU_VALUE, &aRowIdx, sizeof(size_t), + STARPU_VALUE, &aColIdx, sizeof(size_t), + STARPU_VALUE, &seed, sizeof(unsigned long long int), + STARPU_VALUE, &maxrank, sizeof(size_t), + STARPU_VALUE, &tol, sizeof(double), + STARPU_VALUE, &compress_diag, sizeof(size_t), + STARPU_VALUE, &lda_diag, sizeof(size_t), + STARPU_VALUE, &ld_uv, sizeof(size_t), + STARPU_VALUE, &ld_uv, sizeof(size_t), + STARPU_VALUE, &rows, sizeof(size_t), + STARPU_VALUE, &cols, sizeof(size_t), + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "dgytlr", + #endif + STARPU_W, tile_auv_handles[0], + STARPU_W, tile_auv_handles[1], + STARPU_W, tile_ark_handles[0], + STARPU_W, tile_ark_handles[1], + STARPU_RW, tile_dense_handles[0], + STARPU_RW, tile_dense_handles[1], + STARPU_TAG, tag_1, + 0); + } + + return flops; + } + + template + size_t StarPu::LaCpy(Matrix &aMatrixA, const size_t &aRowIdxA, const size_t &aColIdxA, + Matrix &aMatrixB, const size_t &aRowIdxB, const size_t &aColIdxB, + const hcorepp::kernels::RunContext &aContext) { + + size_t flops = 0; + + auto codelet = (new LacpyCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int tile_destination_owner = aMatrixB.GetTileOwnerId(aRowIdxB, aColIdxB); + + int execution_rank = tile_destination_owner; + + auto &tile_src_handles = StarPu::GetTileHandles(aMatrixA, aRowIdxA, aColIdxA); + auto &tile_dest_handles = StarPu::GetTileHandles(aMatrixB, aRowIdxB, aColIdxB); + + starpu_insert_task(codelet, + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "lacpy", + #endif + STARPU_R, tile_src_handles[0], + STARPU_R, tile_src_handles[1], + STARPU_RW, tile_dest_handles[0], + STARPU_RW, tile_dest_handles[1], + STARPU_EXECUTE_ON_NODE, execution_rank, + 0); + return flops; + } + + template + size_t + StarPu::Uncompress(Matrix &apMatrixUV, Matrix &apMatrixDense, Matrix &apMatrixRk, const size_t &aRowIdx, + const size_t &aColIdx) { + size_t flops = 0; + + auto codelet = (new UncompressCodelet())->GetCodelet(); + + void (*callback)(void *) = nullptr; + + int tile_dense_owner = apMatrixDense.GetTileOwnerId(aRowIdx, aColIdx); + + int execution_rank = tile_dense_owner; + + auto tile_uv_handles = StarPu::GetTileHandles(apMatrixUV, aRowIdx, aColIdx); + auto tile_dense_handles = StarPu::GetTileHandles(apMatrixDense, aRowIdx, aColIdx); + auto tile_rk_handles = StarPu::GetTileHandles(apMatrixRk, aRowIdx, aColIdx); + hcorepp::common::BlasOperation atrans = hcorepp::common::BlasOperation::OP_NoTRANS; + hcorepp::common::BlasOperation btrans = hcorepp::common::BlasOperation::OP_CONJG; + size_t ncols = apMatrixUV.GetNumOfRowsInTile(); + + starpu_insert_task(codelet, + STARPU_VALUE, &atrans, sizeof(hcorepp::common::BlasOperation), + STARPU_VALUE, &btrans, sizeof(hcorepp::common::BlasOperation), + STARPU_VALUE, &ncols, sizeof(size_t), + STARPU_PRIORITY, 0, + STARPU_CALLBACK, callback, + #if defined(CHAMELEON_CODELETS_HAVE_NAME) + STARPU_NAME, "uncompress", + #endif + STARPU_R, tile_uv_handles[0], + STARPU_R, tile_uv_handles[1], + STARPU_RW, tile_dense_handles[0], + STARPU_RW, tile_dense_handles[1], + STARPU_R, tile_rk_handles[0], + STARPU_R, tile_rk_handles[1], + STARPU_EXECUTE_ON_NODE, execution_rank, + 0); + + return flops; + + } + + HICMAPP_INSTANTIATE_CLASS(StarPu) +} \ No newline at end of file diff --git a/src/tile-operations/CMakeLists.txt b/src/tile-operations/CMakeLists.txt new file mode 100644 index 0000000..eb4d6a1 --- /dev/null +++ b/src/tile-operations/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/TileOperations.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/tile-operations/TileOperations.cpp b/src/tile-operations/TileOperations.cpp new file mode 100644 index 0000000..c2010d2 --- /dev/null +++ b/src/tile-operations/TileOperations.cpp @@ -0,0 +1,308 @@ +#include +#include + +extern "C" { +#include "starsh.h" +} + +#include "hicmapp/tile-operations/TileOperations.hpp" +#include "hicmapp/problem-manager/StarshManager.hpp" +#include +#include "hcorepp/kernels/kernels.hpp" + +namespace hicmapp::operations { + + template + int TileOperations::GenerateCompressedMatrix(hcorepp::operators::CompressedTile &aCompressedTile, + size_t aTileRowIdx, size_t aTileColIdx, + const hcorepp::operators::CompressionParameters &aSVDArguments) { + + STARSH_blrf *starsh_format = hicmapp::operations::StarsHManager::GetStarsHFormat(); + auto &context = hcorepp::kernels::ContextManager::GetInstance().GetContext(); + STARSH_cluster *RC = starsh_format->row_cluster, *CC = RC; + void *RD = RC->data, *CD = RD; + + int num_of_rows = aCompressedTile.GetNumOfRows(); + int num_of_cols = aCompressedTile.GetNumOfCols(); + int leading_dim = aCompressedTile.GetLeadingDim(); + T *array = hcorepp::memory::AllocateArray(leading_dim * num_of_cols, context); + + starsh_format->problem->kernel(num_of_rows, num_of_cols, + (STARSH_int *) (RC->pivot + RC->start[aTileRowIdx]), + (STARSH_int *) (CC->pivot + CC->start[aTileColIdx]), + (void *) RD, (void *) CD, (void *) array, leading_dim); + + size_t rk; + size_t maxRank = std::max(std::min(num_of_rows, num_of_cols) / MAX_RANK_RATIO, 1); + size_t min_m_n = std::min(num_of_rows, num_of_cols); + auto sigma = hcorepp::memory::AllocateArray>(min_m_n, context); + hcorepp::dataunits::DataHolder u_dataholder(num_of_rows, min_m_n, num_of_rows, nullptr, context); + auto u = u_dataholder.GetData(); + hcorepp::dataunits::DataHolder vt_dataholder(min_m_n, num_of_cols, min_m_n, nullptr, context); + auto vt = vt_dataholder.GetData(); + + hcorepp::kernels::HCoreKernels::SVD(hcorepp::common::Job::SomeVec, hcorepp::common::Job::SomeVec, + num_of_rows, num_of_cols, array, num_of_rows, sigma, u, num_of_rows, + vt, + min_m_n, aSVDArguments.GetOperationType(), nullptr, 0, 0, context); + rk = 0; + if (aSVDArguments.GetFixedRank()) { + /// truncate according to fixed_rk + rk = aSVDArguments.GetFixedRank(); + if (aSVDArguments.GetFixedRank() > min_m_n) { + rk = min_m_n; + } + } else { // truncate according to accuracy + hcorepp::kernels::HCoreKernels::CalculateNewRank(rk, aSVDArguments.GetTruncatedSvd(), sigma, + min_m_n, + aSVDArguments.GetAccuracy(), context); + } + + // Ensure at least rank is 1. + rk = std::max(rk, 1UL); + + if (rk > maxRank) { + rk = maxRank; + } + + // VT eats Sigma. + hcorepp::kernels::HCoreKernels::CalculateVTnew(rk, aSVDArguments.GetUngqr(), + num_of_cols, sigma, vt, min_m_n, + vt_dataholder.GetNumOfRows(), + context); + // Prepare UV array. + auto auv = hcorepp::memory::AllocateArray((num_of_rows + num_of_cols) * rk, context); + hcorepp::memory::Memcpy(auv, u, (num_of_rows * rk), context, + hcorepp::memory::MemoryTransfer::DEVICE_TO_DEVICE); + hcorepp::kernels::HCoreKernels::LaCpy(hcorepp::common::MatrixType::General, rk, num_of_cols, vt, min_m_n, + &auv[num_of_rows * rk], rk, context); + hcorepp::memory::DestroyArray(sigma, context); + + + hcorepp::memory::Memcpy(aCompressedTile.GetUMatrix(), auv, num_of_rows * rk, context); + hcorepp::memory::Memcpy(aCompressedTile.GetVMatrix(), &auv[num_of_rows * rk], + rk * num_of_cols, context); + hcorepp::memory::DestroyArray(auv, context); + + + hcorepp::operators::TileMetadata metadata(num_of_rows, num_of_cols, rk, maxRank, leading_dim, + aCompressedTile.GetLayout(), + hcorepp::operators::TileType::COMPRESSED); + aCompressedTile.UpdateMetadata(metadata); + + return 0; + } + + template + int + TileOperations::GenerateDenseTile(hcorepp::operators::DenseTile &aDenseTile, size_t aTileRowIdx, + size_t aTileColIdx) { + + STARSH_blrf *starsh_format = hicmapp::operations::StarsHManager::GetStarsHFormat(); + STARSH_cluster *RC = starsh_format->row_cluster, *CC = RC; + void *RD = RC->data, *CD = RD; + + int num_of_rows = aDenseTile.GetNumOfRows(); + int num_of_cols = aDenseTile.GetNumOfCols(); + int leading_dim = aDenseTile.GetLeadingDim(); + T *array = (T *) aDenseTile.GetTileSubMatrix(0); + + starsh_format->problem->kernel(num_of_rows, num_of_cols, + (STARSH_int *) (RC->pivot + RC->start[aTileRowIdx]), + (STARSH_int *) (CC->pivot + CC->start[aTileColIdx]), + (void *) RD, (void *) CD, (void *) array, leading_dim); + + return 0; + } + + template + int + TileOperations::UnCompressTile(size_t aNumOfRows, size_t aNumOfCols, double aAlpha, const T *apAU, + const T *apArk, + size_t aLeadingDimA, const T *apBV, size_t aLeadingDimB, + double aBeta, + T *apC, size_t aLeadingDimC) { + cblas_dgemm( + CblasColMajor, + CblasNoTrans, CblasTrans, + aNumOfRows, aNumOfCols, apArk[0], + aAlpha, (double *) apAU, aLeadingDimA, + (double *) apBV, aLeadingDimB, + aBeta, (double *) apC, aLeadingDimC); + + return 0; + } + + template + size_t + TileOperations::Gemm(T aAlpha, const hcorepp::operators::Tile &aA, const blas::Op &aAOp, + const hcorepp::operators::Tile &aB, + const blas::Op &aBOp, T aBeta, hcorepp::operators::Tile &aC, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit, + const hcorepp::operators::CompressionParameters &aSVDArguments, bool aCholesky) { + size_t flops = 0; + size_t &flops_ref = flops; + hcorepp::api::HCore::Gemm(aAlpha, aA, aAOp, aB, aBOp, aBeta, aC, aContext, flops_ref, aMemoryUnit, + aSVDArguments, aCholesky); + return flops; + } + + template + size_t TileOperations::Syrk(T aAlpha, const hcorepp::operators::Tile &aA, const blas::Op &aAOp, + const blas::Uplo aUplo, T aBeta, + hcorepp::operators::Tile &aC, const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + size_t &flops_ref = flops; + + hcorepp::api::HCore::Syrk(aAlpha, aA, aAOp, aUplo, aBeta, aC, aContext, flops_ref, aMemoryUnit); + + return flops; + } + + template + size_t TileOperations::Potrf(hcorepp::operators::Tile &aA, const blas::Uplo aUplo, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + size_t &flops_ref = flops; + + hcorepp::api::HCore::Potrf(aA, aUplo, aContext, flops_ref, aMemoryUnit); + + return flops; + } + + template + size_t TileOperations::Trsm(blas::Side aSide, blas::Uplo aUplo, blas::Op aTrans, blas::Diag aDiag, T aAlpha, + hcorepp::operators::Tile &aA, hcorepp::operators::Tile &aB, + const hcorepp::kernels::RunContext &aContext, + hcorepp::dataunits::MemoryUnit &aMemoryUnit) { + size_t flops = 0; + size_t &flops_ref = flops; + + hcorepp::api::HCore::Trsm(aSide, aUplo, aTrans, aDiag, aAlpha, aA, aB, aContext, flops_ref, + aMemoryUnit); + + return flops; + } + + template + size_t + TileOperations::GenerateDiagonalTile(hcorepp::operators::Tile *aAuv, hcorepp::operators::Tile *aArk, + hcorepp::operators::Tile *aADense, + hcorepp::operators::Tile *aADiagonal, + int aTileRowIdx, int aTileColIdx, unsigned long long int seed, + int maxrank, double tol, int compress_diag, int lda, int ldu, + int ldv, int rows, int cols, + const hcorepp::kernels::RunContext &aContext) { + size_t flops = 0; + + auto &comp_tile = (hcorepp::operators::CompressedTile &) (*aAuv); + + auto *AU = comp_tile.GetUMatrix(); + auto *AV = comp_tile.GetVMatrix(); + T *ADiagonal = nullptr; + bool A_diagonal_allocated = false; + if (aADiagonal != nullptr) { + ADiagonal = aADiagonal->GetTileSubMatrix(0); + } + + auto *ARK = aArk->GetTileSubMatrix(0); + auto *Dense = aADense->GetTileSubMatrix(0); + + int rank = 0; + int oversample = 10; + double *work; + int *iwork; + STARSH_blrf *blrf = hicmapp::operations::StarsHManager::GetStarsHFormat(); + STARSH_cluster *RC = blrf->row_cluster, *CC = RC; + void *RD = RC->data, *CD = RD; + T *saveAD; + + if ((aTileRowIdx != aTileColIdx) || compress_diag == 1) { + saveAD = ADiagonal; + if (ADiagonal == nullptr) { + ADiagonal = (T *) malloc(sizeof(T) * lda * cols); + A_diagonal_allocated = true; + } + assert(rows == lda); + } + + blrf->problem->kernel(rows, cols, RC->pivot + RC->start[aTileRowIdx], CC->pivot + CC->start[aTileColIdx], + RD, CD, ADiagonal, lda); + + { + char chall = 'A'; + dlacpy_(&chall, &rows, &cols, (double *) ADiagonal, &lda, (double *) Dense, &lda +#ifdef LAPACK_FORTRAN_STRLEN_END + , 0 +#endif + ); + } + int mn = rows; + int mn2 = maxrank + oversample; + if (mn2 > mn) + mn2 = mn; + + size_t lwork = cols, lwork_sdd = (4 * mn2 + 7) * mn2; + if (lwork_sdd > lwork) + lwork = lwork_sdd; + lwork += (size_t) + mn2 * (2 * cols + rows + mn2 + 1); + size_t liwork = 8 * mn2; + + iwork = (int *) malloc(sizeof(*iwork) * liwork); + + work = (double *) malloc(sizeof(*work) * lwork); + + if (aTileRowIdx != aTileColIdx || + compress_diag == 1) { + + starsh_dense_dlrrsdd(rows, cols, (double *) ADiagonal, lda, (double *) AU, ldu, (double *) AV, ldv, &rank, + maxrank, oversample, tol, (double *) work, lwork, iwork); + + if (rank == -1) { //means that tile is dense. + rank = rows; + fprintf(stderr, "%s %s %d: Dense off-diagonal block (%d,%d). maxrank:%d\n", __FILE__, __func__, + __LINE__, + aTileRowIdx, aTileColIdx, maxrank); + exit(0); + } + if (rank == 0) rank = 1; + ARK[0] = rank; + assert(ADiagonal != saveAD); + if (A_diagonal_allocated) { + free(ADiagonal); + } + } else { + ARK[0] = rows; + } + + comp_tile.ReadjustTileRank(ARK[0], aContext); + + free(work); + free(iwork); + + return flops; + } + + + template + size_t TileOperations::LaCpy(int aRows, int aCols, const hcorepp::operators::Tile &aA, + hcorepp::operators::Tile &aB, + const hcorepp::kernels::RunContext &aContext) { + + auto data_a = aA.GetTileSubMatrix(0); + auto lda = aA.GetLeadingDim(); + auto data_b = aB.GetTileSubMatrix(0); + auto ldb = aB.GetLeadingDim(); + hcorepp::kernels::HCoreKernels::LaCpy(hcorepp::common::MatrixType::General, aRows, aCols, data_a, lda, + data_b, + ldb, aContext); + + return 0; + } + + HICMAPP_INSTANTIATE_CLASS(TileOperations) +} \ No newline at end of file diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt new file mode 100644 index 0000000..27be7e0 --- /dev/null +++ b/src/utils/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/MatrixHelpers.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/utils/MatrixHelpers.cpp b/src/utils/MatrixHelpers.cpp new file mode 100644 index 0000000..62d9373 --- /dev/null +++ b/src/utils/MatrixHelpers.cpp @@ -0,0 +1,59 @@ +#include +#include +#include + + +namespace hicmapp::utils { + template + void MatrixHelpers::MatrixToArray(primitives::Matrix &aMatrix, T *&aArray) { + + size_t array_offset = 0; + for (auto sub_matrix: aMatrix.GetSubMatrices()) { + auto &tiles = sub_matrix->GetTiles(); + for (size_t row = 0; row < sub_matrix->GetNumOfTilesinRows(); row++) { + for (size_t col = 0; col < sub_matrix->GetNumOfTilesinCols(); col++) { + auto index = col * sub_matrix->GetNumOfTilesinRows() + row; + auto rows = row * sub_matrix->GetTileRows() + + sub_matrix->GetTilesGlobalStIdxInRows() * aMatrix.GetNumOfRowsInTile(); + auto cols = col * sub_matrix->GetTileCols() + + sub_matrix->GetTilesGlobalStIdxInCols() * aMatrix.GetNumOfColsInTile(); + auto &tile = tiles[index]; + auto tile_rows = tile->GetNumOfRows(); + auto tile_cols = tile->GetNumOfCols(); + auto tile_data = tile->GetTileSubMatrix(0); + for (size_t i = 0; i < tile_cols; i++) { + for (size_t j = 0; j < tile_rows; j++) { + auto index_in_tile = i * tile_rows + j; + auto full_array_index = rows + j + ((cols + i) * aMatrix.GetGlobalNumOfRowsInMatrix()); + aArray[full_array_index] = tile_data[index_in_tile]; + } + } + } + } + } + } + + template + void MatrixHelpers::PrintArray(T *&aArray, size_t aRows, size_t aCols, hicmapp::common::StorageLayout aLayout) { + if (aLayout == common::StorageLayout::HicmaRM) { + for (size_t row = 0; row < aRows; row++) { + for (size_t col = 0; col < aCols; col++) { + auto index = row * aCols + col; + std::cout << "data[" << row << "][" << col << "]= " << aArray[index] << "\t"; + } + std::cout << " \n"; + } + } else if (aLayout == common::StorageLayout::HicmaCM) { + for (size_t row = 0; row < aRows; row++) { + for (size_t col = 0; col < aCols; col++) { + auto index = col * aRows + row; + std::cout << "data[" << row << "][" << col << "]= " << aArray[index] << "\t"; + } + std::cout << " \n"; + } + } + } + + HICMAPP_INSTANTIATE_CLASS(MatrixHelpers) + +} \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..f75df69 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,11 @@ +set(HICMAPP_TESTFILES test_main.cpp) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/primitives) +enable_testing() + +add_executable(hicmapp-tests ${HICMAPP_TESTFILES}) +if (HICMAPP_USE_MPI) + target_link_libraries(hicmapp-tests Catch2::Catch2WithMain ${PROJECT_NAME} MPI::MPI_CXX) +else () + target_link_libraries(hicmapp-tests Catch2::Catch2WithMain ${PROJECT_NAME}) +endif () +catch_discover_tests(hicmapp-tests) \ No newline at end of file diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..f294b26 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,5 @@ +# Tests Subdirectory + +This contains all the tests for the project. + +It follows the same file structure explained in the ReadMe in the include directory. diff --git a/tests/primitives/CMakeLists.txt b/tests/primitives/CMakeLists.txt new file mode 100644 index 0000000..48ca837 --- /dev/null +++ b/tests/primitives/CMakeLists.txt @@ -0,0 +1,7 @@ +set(HICMAPP_TESTFILES + ${CMAKE_CURRENT_SOURCE_DIR}/TestDecomposer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/TestMatrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/TestSubMatrix.cpp + ${HICMAPP_TESTFILES} + PARENT_SCOPE + ) \ No newline at end of file diff --git a/tests/primitives/TestDecomposer.cpp b/tests/primitives/TestDecomposer.cpp new file mode 100644 index 0000000..15c7ab5 --- /dev/null +++ b/tests/primitives/TestDecomposer.cpp @@ -0,0 +1,587 @@ +#include + +#include + +#include +#include +#include +using namespace hicmapp::common; +using namespace hicmapp::primitives; + +template +void TEST_MATRIX_DECOMPOSER() { + SECTION("Slowest Dimension Decomposer, CM, 1 sub-matrix") { + /** + * __________________________ + * |tile_0 | tile_2 | tile_4| + * |tile_1 | tile_3 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 1; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == 0); + } + + } + + SECTION("Slowest Dimension Decomposer, CM, 2 sub-matrices, non-divisible") { + /** + * ____________________________ + * | tile_0 || tile_2 | tile_4 | + * | tile_1 || tile_3 | tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + REQUIRE(matrix_specs[0].GetNumOfTilesInCol() == 1); + REQUIRE(matrix_specs[0].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[0].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[0].GetStartingIndexInRows() == 0); + + REQUIRE(matrix_specs[1].GetNumOfTilesInCol() == 2); + REQUIRE(matrix_specs[1].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[1].GetStartingIndexInCols() == 1); + REQUIRE(matrix_specs[1].GetStartingIndexInRows() == 0); + + } + + SECTION("Slowest Dimension Decomposer, CM, 2 sub-matrices, sub-matrices > number of tiles") { + /** + * ____________________________ + * | tile_0 | tile_2 | tile_4 | + * | tile_1 | tile_3 | tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + //TODO: test for number of sub mtarices > number of tiles per row/col error handling + } + + SECTION("Slowest Dimension Decomposer, CM, 3 sub-matrices") { + /** + * ____________________________ + * | tile_0 || tile_2 || tile_4 | + * | tile_1 || tile_3 || tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 1); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == i); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == 0); + } + } + + SECTION("Slowest Dimension Decomposer, RM, 1 sub-matrix") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 1; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == 0); + } + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 1); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + } + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices, sub-matrices > number of tiles") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + REQUIRE_THROWS(slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols)); + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices, non-divisible") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + * |tile_3 | tile_4 | tile_5| + */ + + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 4; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + int i = 0; + for (; i < number_of_sub_matrices - 1; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 1); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + } + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + + } + + SECTION("Slowest Dimension Decomposer, CM, 2 sub-matrices, non-divisible") { + /** + * ____________________________ + * | tile_0 || tile_2 | tile_4 | + * | tile_1 || tile_3 | tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + REQUIRE(matrix_specs[0].GetNumOfTilesInCol() == 1); + REQUIRE(matrix_specs[0].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[0].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[0].GetStartingIndexInRows() == 0); + + REQUIRE(matrix_specs[1].GetNumOfTilesInCol() == 2); + REQUIRE(matrix_specs[1].GetNumOfTilesInRow() == 2); + + REQUIRE(matrix_specs[1].GetStartingIndexInCols() == 1); + REQUIRE(matrix_specs[1].GetStartingIndexInRows() == 0); + + } + + SECTION("Slowest Dimension Decomposer, CM, 2 sub-matrices, sub-matrices > number of tiles") { + /** + * ____________________________ + * | tile_0 | tile_2 | tile_4 | + * | tile_1 | tile_3 | tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + //TODO: test for number of sub mtarices > number of tiles per row/col error handling + } + + SECTION("Slowest Dimension Decomposer, CM, 3 sub-matrices") { + /** + * ____________________________ + * | tile_0 || tile_2 || tile_4 | + * | tile_1 || tile_3 || tile_5 | + * ---------------------------- + */ + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 1); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == i); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == 0); + } + } + + SECTION("Slowest Dimension Decomposer, RM, 1 sub-matrix") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 1; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == 0); + } + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 2; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + + for (int i = 0; i < number_of_sub_matrices; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 1); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + } + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices, sub-matrices > number of tiles") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + */ + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 2; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + REQUIRE_THROWS(slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, global_matrix_tiles_in_cols)); + + } + + SECTION("Slowest Dimension Decomposer, RM, 2 sub-matrices, non-divisible") { + /** + * __________________________ + * |tile_0 | tile_1 | tile_2| + * ========================== + * |tile_3 | tile_4 | tile_5| + * ========================== + * |tile_3 | tile_4 | tile_5| + * -------------------------- + * |tile_3 | tile_4 | tile_5| + */ + + size_t number_of_sub_matrices = 3; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t global_matrix_tiles_in_rows = 4; + size_t global_matrix_tiles_in_cols = 3; + hicmapp::runtime::HicmaContext context; + + SlowestDimDecomposer slowest_dim_decomposer(number_of_sub_matrices, storage_layout); + std::vector matrix_specs = slowest_dim_decomposer.Decompose(global_matrix_tiles_in_rows, + global_matrix_tiles_in_cols); + + REQUIRE(matrix_specs.size() == number_of_sub_matrices); + int i = 0; + for (; i < number_of_sub_matrices - 1; i++) { + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 1); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + } + REQUIRE(matrix_specs[i].GetNumOfTilesInCol() == 3); + REQUIRE(matrix_specs[i].GetNumOfTilesInRow() == 2); + REQUIRE(matrix_specs[i].GetStartingIndexInCols() == 0); + REQUIRE(matrix_specs[i].GetStartingIndexInRows() == i); + + } + + /** These Test Cases need to be revised. A more general approach regardless of number of processes should be taken here */ +// SECTION("Two Dimension Decomposer, CM, 6 sub-matrices") { +// /** +// * __________________________ +// * |tile_0 || tile_2 || tile_4| +// * |tile_1 || tile_3 || tile_5| +// * -------------------------- +// */ +// StorageLayout storage_layout = StorageLayout::HicmaCM; +// hicmapp::runtime::HicmaContext context; +// +// size_t global_matrix_tiles_in_rows = 2; +// size_t global_matrix_tiles_in_cols = 3; +// +// +// int id = 0; +// int size = 1; +//#ifdef HICMAPP_USE_MPI +// MPI_Comm_rank(MPI_COMM_WORLD, &id); +// MPI_Comm_size(MPI_COMM_WORLD, &size); +//#endif +// +// size_t number_of_row_processes = std::max(size/2, 1); +// size_t number_of_col_processes = std::max(size/2, 1); +// TwoDimCyclicDecomposer two_dim_cyclic_decomposer(number_of_row_processes, number_of_col_processes); +// std::vector matrix_specs = two_dim_cyclic_decomposer.Decompose( +// global_matrix_tiles_in_rows, +// global_matrix_tiles_in_cols); +// +// size_t number_of_sub_matrices = size; +// size_t matrix_of_row_indexes[] = {0, 0, 0}; +// size_t matrix_of_col_indexes[] = {0, 1, 2,}; +// +// if (size == 1) { +// number_of_sub_matrices = 1; +// size_t matrix_of_row_indexes_one_proc[] = {0, 0, 0, 1, 1, 1}; +// size_t matrix_of_col_indexes_one_proc[] = {0, 1, 2, 0, 1, 2}; +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes_one_proc[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes_one_proc[i]); +// } +// } else { +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes[i]); +// } +// } +// REQUIRE(number_of_sub_matrices == matrix_specs.size()); +// +// } +// +// SECTION("Two Dimension Decomposer, RM, 12 sub-matrices, non divisible by col") { +// /** +// * __________________________ +// * |tile_0 | tile_1 || tile_4| +// * |tile_2 | tile_3 || tile_5| +// * --------------------------- +// * |tile_6 | tile_7 || tile_10| +// * |tile_8 | tile_9 || tile_11| +// * -------------------------- +// */ +// size_t matrix_of_row_indexes[] = {0, 0, 2, 2}; +// size_t matrix_of_col_indexes[] = {0, 2, 0, 2}; +// +// StorageLayout storage_layout = StorageLayout::HicmaCM; +// size_t number_of_row_processes = 2; +// size_t number_of_col_processes = 2; +// +// size_t global_matrix_tiles_in_rows = 4; +// size_t global_matrix_tiles_in_cols = 3; +// +// int id = 0; +// int size = 1; +//#ifdef HICMAPP_USE_MPI +// MPI_Comm_rank(MPI_COMM_WORLD, &id); +// MPI_Comm_size(MPI_COMM_WORLD, &size); +//#endif +// hicmapp::runtime::HicmaContext context; +// +// TwoDimCyclicDecomposer two_dim_cyclic_decomposer(number_of_row_processes, number_of_col_processes); +// std::vector matrix_specs = two_dim_cyclic_decomposer.Decompose( +// global_matrix_tiles_in_rows, +// global_matrix_tiles_in_cols); +// +// size_t number_of_sub_matrices = 4; +// if (size == 1) { +// number_of_sub_matrices = global_matrix_tiles_in_rows * global_matrix_tiles_in_cols; +// size_t matrix_of_row_indexes_one_proc[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; +// size_t matrix_of_col_indexes_one_proc[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes_one_proc[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes_one_proc[i]); +// } +// } else { +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes[i]); +// } +// } +// REQUIRE(number_of_sub_matrices == matrix_specs.size()); +// +// } +// +// SECTION("Two Dimension Decomposer, RM, 12 sub-matrices, non divisible by both col and row") { +// /** +// * __________________________ +// * |tile_0 | tile_1 || tile_6| +// * |tile_2 | tile_3 || tile_7| +// * |tile_4 | tile_5 || tile_8| +// * --------------------------- +// * |tile_9 | tile_10 || tile_11| +// * -------------------------- +// */ +// +// size_t matrix_of_row_indexes[] = {0, 0, 3, 3}; +// size_t matrix_of_col_indexes[] = {0, 2, 0, 2}; +// +// StorageLayout storage_layout = StorageLayout::HicmaCM; +// size_t number_of_row_processes = 3; +// size_t number_of_col_processes = 2; +// +// size_t global_matrix_tiles_in_rows = 4; +// size_t global_matrix_tiles_in_cols = 3; +// hicmapp::runtime::HicmaContext context; +// +// TwoDimCyclicDecomposer two_dim_cyclic_decomposer(number_of_row_processes, number_of_col_processes); +// std::vector matrix_specs = two_dim_cyclic_decomposer.Decompose( +// global_matrix_tiles_in_rows, +// global_matrix_tiles_in_cols); +// +// int id = 0; +// int size = 1; +//#ifdef HICMAPP_USE_MPI +// MPI_Comm_rank(MPI_COMM_WORLD, &id); +// MPI_Comm_size(MPI_COMM_WORLD, &size); +//#endif +// size_t number_of_sub_matrices = 4; +// if (size == 1) { +// number_of_sub_matrices = global_matrix_tiles_in_rows * global_matrix_tiles_in_cols; +// size_t matrix_of_row_indexes_one_proc[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; +// size_t matrix_of_col_indexes_one_proc[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes_one_proc[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes_one_proc[i]); +// } +// } else { +// for (int i = 0; i < number_of_sub_matrices; i++) { +// REQUIRE(matrix_specs[i].GetStartingIndexInRows() == matrix_of_row_indexes[i]); +// REQUIRE(matrix_specs[i].GetStartingIndexInCols() == matrix_of_col_indexes[i]); +// } +// } +// REQUIRE(number_of_sub_matrices == matrix_specs.size()); +// } + +} + +TEMPLATE_TEST_CASE("MatrixDecomposerTest", "[MatrixDecomposer]", float, double) { + TEST_MATRIX_DECOMPOSER(); +} diff --git a/tests/primitives/TestMatrix.cpp b/tests/primitives/TestMatrix.cpp new file mode 100644 index 0000000..f91308e --- /dev/null +++ b/tests/primitives/TestMatrix.cpp @@ -0,0 +1,944 @@ +#include +#include + +#include +#include +#include +#include + +using namespace hicmapp::common; +using namespace hicmapp::primitives; + +template +void TEST_MATRIX() { + + SECTION("Test CM, slowestdimdecomposer 2 submatrices") { + int id = 0, size = 1; + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Comm_size(MPI_COMM_WORLD, &size); + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + hicmapp::runtime::HicmaContext context(communicator); + printf(" SECTION 1 PROCESS # %d\n", id); + + /** + * matrix + * 0 3 || 6 9 + * ------ || ------- + * 1 4 || 7 10 + * ------ || ------- + * 2 5 || 8 11 + */ + T matrix_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T matrix_data_expected[] = {0, 3, 1, 4, 2, 5, 6, 9, 7, 10, 8, 11}; + size_t matrix_rows = 3; + size_t matrix_cols = 4; + size_t tile_rows = 1; + size_t tile_cols = 2; + float eps = 1e-6; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t rank = 0; + size_t number_of_sub_matrices = size; + size_t number_of_tile_per_sub_matrix = 3; + + SlowestDimDecomposer matrix_decomposer(number_of_sub_matrices, storage_layout); + Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, + matrix_decomposer, context, + rank); + /* TEST GET NUMBER OF SUB MATRICES */ + if (id < number_of_sub_matrices) { + + ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; + + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /* TEST GET NUMBER OF TILES IN ROW */ + REQUIRE(matrix.GetNumOfGlobalTilesInRows() == 3); + + /* TEST GET NUMBER OF TILES IN ROW */ + REQUIRE(matrix.GetNumOfGlobalTilesInCols() == 2); + + //TODO: test contains tile when fixed. + + /* TEST GET STORAGE LAYOUT */ + REQUIRE(matrix.GetStorageLayout() == storage_layout); + + /* TEST IS MATRIX VALID */ + REQUIRE(matrix.IsMatrixValid() == true); + + /* TEST NUMBER OF ROWS IN TILE */ + REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); + + /* TEST NUMBER OF COLS IN TILE */ + REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); + + /* TEST NUMBER OF ROWS IN MATRIX */ + REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); + + /* TEST NUMBER OF COLS IN MATRIX */ + REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); + + /* TEST GET SUBMATRICES */ + auto sub_matrices = matrix.GetSubMatrices(); + size_t offset = 0; + auto *host_mem = new T[tile_rows * tile_cols]; + for (int i = id; i < number_of_sub_matrices; i++) { + size_t number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, + sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + /* TEST GET SUBMATRIX */ + offset = 0; + for (int i = id; i < number_of_sub_matrices; i += size) { + auto sub_matrix = &matrix.GetSubMatrix(i); + size_t number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + /* TEST GET TILE POINTER */ + offset = id * tile_cols * tile_rows * number_of_tile_per_sub_matrix; + for (int i = 0; i < number_of_sub_matrices; i++) { + size_t col_idx = id + i; + for (int j = 0; j < number_of_tile_per_sub_matrix; j++) { + size_t row_idx = j; + auto tile = matrix.GetTilePointer(row_idx, col_idx); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + + } + } + delete[] host_mem; + } else { + REQUIRE(matrix.GetNumOfSubMatrices() == 0); + + } + + } + + SECTION("Test CM, default decomposer") { + /** + * matrix + * 0 3 | 6 9 + * ------ | ------- + * 1 4 | 7 10 + * ------ | ------- + * 2 5 | 8 11 + */ + int id = 0, size = 1; + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Comm_size(MPI_COMM_WORLD, &size); + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + + hicmapp::runtime::HicmaContext context(communicator); + + T matrix_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T matrix_data_expected[] = {0, 3, 1, 4, 2, 5, 6, 9, 7, 10, 8, 11}; + size_t matrix_rows = 3; + size_t matrix_cols = 4; + size_t global_number_of_tiles_in_col = 2; + size_t global_number_of_tiles_in_row = 3; + size_t tile_rows = 1; + size_t tile_cols = 2; + float eps = 1e-6; + StorageLayout storage_layout = StorageLayout::HicmaCM; + size_t rank = 0; + size_t number_of_sub_matrices = 1; + size_t global_number_of_tiles = 6; + size_t number_of_tiles_per_sub_matrix = 3; /**Slowest Dim Decomposer in the case where total number of processes <= global_number_of_tiles_in_col number of submatrices = 2*/ + Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, context, + rank); + + if (global_number_of_tiles_in_col >= size || id == size - 1) { + + ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; + /** TEST GET NUMBER OF SUB MATRICES **/ + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInRows() == global_number_of_tiles_in_row); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInCols() == global_number_of_tiles_in_col); + + ///TODO: test contains tile when fixed. + + /** TEST GET STORAGE LAYOUT **/ + REQUIRE(matrix.GetStorageLayout() == storage_layout); + + /** TEST IS MATRIX VALID **/ + REQUIRE(matrix.IsMatrixValid() == true); + + /** TEST NUMBER OF ROWS IN TILE **/ + REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); + + /** TEST NUMBER OF COLS IN TILE **/ + REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); + + /** TEST NUMBER OF ROWS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); + + /** TEST NUMBER OF COLS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); + + /** TEST GET SUBMATRICES **/ + auto sub_matrices = matrix.GetSubMatrices(); + size_t offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (id == 0) { + offset = 0; + } else if (id == 1) { + offset = 6; + } + auto *host_mem = new T[tile_rows * tile_cols]; + size_t number_of_tiles_in_sub_matrix; + for (int i = 0; i < number_of_sub_matrices; i++) { + number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, + sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + /** TEST GET SUBMATRIX **/ + offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (id == 0) { + offset = 0; + } else if (id == 1) { + offset = 6; + } + for (int i = 0; i < number_of_sub_matrices; i++) { + auto sub_matrix = &matrix.GetSubMatrix(i); + number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + + offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (id == 0) { + offset = 0; + } else if (id == 1) { + offset = 6; + } + number_of_sub_matrices = global_number_of_tiles_in_col >= size ? 1 : 2; + + for (int i = 0; i < number_of_sub_matrices; i++) { + size_t col_idx = id - size + 1 + i; + number_of_tiles_in_sub_matrix = 3; + + if (id == 0) { + col_idx = 0; + } else if (id == 1) { + col_idx = 1; + } + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + size_t row_idx = j; + auto tile = matrix.GetTilePointer(row_idx, col_idx); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + delete[] host_mem; + + } else { + REQUIRE(matrix.GetNumOfSubMatrices() == 0); + } + } + + SECTION("Test RM, slowestdimdecomposer 2 submatrices") { + int id = 0, size = 1; + /** + * matrix + * 0 1 | 2 3 + * ------ | ------- + * ================ + * 4 5 | 6 7 + * ------ | ------- + * 8 9 | 10 11 + */ + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Comm_size(MPI_COMM_WORLD, &size); + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + + hicmapp::runtime::HicmaContext context; + T matrix_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T matrix_data_expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T tiles_data_expected[][6] = {{0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, + {8, 9}, + {10, 11}}; + size_t matrix_rows = 3; + size_t matrix_cols = 4; + size_t tile_rows = 1; + size_t tile_cols = 2; + float eps = 1e-6; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t rank = 0; + size_t total_number_of_sub_matrices = size; + size_t number_of_sub_matrices = size; + size_t number_of_tile_per_sub_matrix[] = {2, 4}; + SlowestDimDecomposer matrix_decomposer(number_of_sub_matrices, storage_layout); + + Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, + matrix_decomposer, context, + rank); + context.SyncMainContext(); + /** TEST GET NUMBER OF SUB MATRICES **/ + if (id < number_of_sub_matrices) { + + ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; + + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInRows() == 3); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInCols() == 2); + + //TODO: test contains tile when fixed. + + /** TEST GET STORAGE LAYOUT **/ + REQUIRE(matrix.GetStorageLayout() == storage_layout); + + /** TEST IS MATRIX VALID **/ + REQUIRE(matrix.IsMatrixValid() == true); + + /** TEST NUMBER OF ROWS IN TILE **/ + REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); + + /** TEST NUMBER OF COLS IN TILE **/ + REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); + + /** TEST NUMBER OF ROWS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); + + /** TEST NUMBER OF COLS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); + + /** TEST GET SUBMATRICES **/ + auto sub_matrices = matrix.GetSubMatrices(); + + size_t offset = 0; + if (id == 1) { + offset = +id * tile_cols * tile_rows * 2; + } + + auto *host_mem = new T[tile_rows * tile_cols]; + for (int i = 0; i < number_of_sub_matrices; i++) { + size_t number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, + sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + /** TEST GET SUBMATRIX **/ + offset = 0; + if (id == 1) { + offset = +id * tile_cols * tile_rows * 2; + } + + for (int i = 0; i < number_of_sub_matrices; i++) { + auto sub_matrix = &matrix.GetSubMatrix(i); + size_t number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + offset = id * 2; + auto expected_offset = offset; + for (int i = 0, idx = id; i < number_of_sub_matrices, idx < total_number_of_sub_matrices; i++, idx += size) { + for (int j = 0; j < number_of_tile_per_sub_matrix[idx]; j++) { + size_t row = (offset + j) / 2; + size_t col = (offset + j) % 2; + + auto tile = matrix.GetTilePointer(row, col); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + expected_offset]) <= std::abs(eps)); + } + expected_offset += size * 2; + } + offset += size * 2; + } + delete[] host_mem; + + } else { + REQUIRE(matrix.GetNumOfSubMatrices() == 0); + } + } + + SECTION("Test RM, default decomposer") { + int id = 0, size = 1; + + /** + * matrix + * 0 1 | 2 3 + * ------ | ------- + * 4 5 | 6 7 + * ------ | ------- + * 8 9 | 10 11 + */ + + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Comm_size(MPI_COMM_WORLD, &size); + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + + hicmapp::runtime::HicmaContext context; + T matrix_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T matrix_data_expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T tiles_data_expected[][6] = {{0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, + {8, 9}, + {11, 10}}; + size_t matrix_rows = 3; + size_t matrix_cols = 4; + size_t tile_rows = 1; + size_t tile_cols = 2; + float eps = 1e-6; + size_t global_number_of_tiles = 6; + size_t global_number_of_tiles_in_rows = 3; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t rank = 0; + size_t total_number_of_sub_matrices = 1; + size_t number_of_sub_matrices = 1; + size_t number_of_tile_per_sub_matrix = 6; + + Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, context, rank); + if (global_number_of_tiles_in_rows >= size || id == size - 1) { + + ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; + + /** TEST GET NUMBER OF SUB MATRICES **/ + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInRows() == 3); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInCols() == 2); + + ///TODO: test contains tile when fixed. + + /** TEST GET STORAGE LAYOUT **/ + REQUIRE(matrix.GetStorageLayout() == storage_layout); + + /** TEST IS MATRIX VALID **/ + REQUIRE(matrix.IsMatrixValid() == true); + + /** TEST NUMBER OF ROWS IN TILE **/ + REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); + + /** TEST NUMBER OF COLS IN TILE **/ + REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); + + /** TEST NUMBER OF ROWS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); + + /** TEST NUMBER OF COLS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); + + /** TEST GET SUBMATRICES **/ + auto sub_matrices = matrix.GetSubMatrices(); + size_t offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (size <= global_number_of_tiles_in_rows) { + offset = 4 * id; + } + size_t number_of_tiles_in_sub_matrix; + auto *host_mem = new T[tile_rows * tile_cols]; + for (int i = 0; i < number_of_sub_matrices; i++) { + number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, + sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + /** TEST GET SUBMATRIX **/ + offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (size <= global_number_of_tiles_in_rows) { + offset = 4 * id; + } + for (int i = 0; i < number_of_sub_matrices; i++) { + auto sub_matrix = &matrix.GetSubMatrix(i); + number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); + for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { + hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), + tile_rows * tile_cols, context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_cols * tile_rows; k++) { + REQUIRE(std::abs(host_mem[k] - matrix_data_expected[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + offset = (id - size + 1) * tile_cols * tile_rows * global_number_of_tiles; + if (size <= global_number_of_tiles_in_rows) { + offset = 4 * id; + } + + int idx = 0; + if (size == 2 && id == 1) { + idx = 1; + } + int starting = id; + int until = 3 / size + id + idx; + if (size > global_number_of_tiles_in_rows) { + starting = 0; + until = 3; + } + for (int k = starting; k < until; k++) { + size_t row_idx = k; + for (int j = 0; j < matrix.GetNumOfGlobalTilesInCols(); j++) { + size_t col_idx = j; + auto tile = matrix.GetTilePointer(row_idx, col_idx); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, + context.GetMainContext(), + hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int i = 0; i < tile_rows * tile_cols; i++) { + REQUIRE(std::abs(host_mem[i] - matrix_data_expected[i + offset]) <= std::abs(eps)); + } + offset += tile_cols * tile_rows; + } + } + + /** TEST GET TILE **/ +// offset = id * 2; +// for (int i = 0, idx = id; +// i < number_of_sub_matrices, idx < total_number_of_sub_matrices; i++, idx += size) { +// for (int j = 0; j < number_of_tile_per_sub_matrix; j++) { +// size_t row = (offset + j) / 2; +// size_t col = (offset + j) % 2; +// +// auto tile = matrix.GetTilePointer(row, col); +// for (int k = 0; k < tile_cols * tile_rows; k++) { +// tiles_data_expected[j + offset][k]); +// } +// } +// offset += size * 2; +// } + delete[] host_mem; + + } else { + REQUIRE(matrix.GetNumOfSubMatrices() == 0); + } + } + + /** Decomposer Tests need to be Revised. Assumptions to be corrected */ +// SECTION("Test RM, twodimcyclicdecomposer 4 submatrices") { +// int id = 0, size = 1; +// /** +// * ______________ +// * | 0 | 1 || 6| +// * | 2 | 3 || 7| +// * | 4 | 5 || 8| +// * -------------- +// * | 9 |10 || 11| +// * -------------- +// */ +// hicmapp::runtime::HicmaCommunicator communicator; +//#ifdef HICMAPP_USE_MPI +// MPI_Comm_rank(MPI_COMM_WORLD, &id); +// MPI_Comm_size(MPI_COMM_WORLD, &size); +// communicator.SetMPICommunicator(MPI_COMM_WORLD); +//#endif +// +// hicmapp::runtime::HicmaContext context; +// T matrix_data[] = {0, 1, 6, 2, 3, 7, 4, 5, 8, 9, 10, 11}; +// T matrix_data_expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; +// T sub_matrix_1_data[] = {0, 1, 2, 3, 4, 5}; +// T sub_matrix_2_data[] = {6, 7, 8}; +// T sub_matrix_3_data[] = {9, 10}; +// T sub_matrix_4_data[] = {11}; +// T *sub_matrix_data[] = {sub_matrix_1_data, sub_matrix_2_data, sub_matrix_3_data, sub_matrix_4_data}; +// size_t sub_matrix_global_starting_row_index[] = {0, 0, 3, 3}; +// size_t sub_matrix_global_starting_col_index[] = {0, 2, 0, 2}; +// size_t matrix_rows = 4; +// size_t matrix_cols = 3; +// size_t tile_rows = 1; +// size_t tile_cols = 1; +// float eps = 1e-6; +// StorageLayout storage_layout = StorageLayout::HicmaRM; +// size_t rank = 0; +// size_t number_of_processes_in_row = 3; +// size_t number_of_processes_in_col = 2; +// size_t total_number_of_sub_matrices = 4; +// size_t number_of_sub_matrices = 4; +// +// TwoDimCyclicDecomposer matrix_decomposer(number_of_processes_in_row, number_of_processes_in_col); +// +// Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, +// matrix_decomposer, context, +// rank); +// size_t num_sub = matrix.GetNumOfSubMatrices(); +// if (id < number_of_sub_matrices) { +// +// if (size == 1) { +// // assuming 1 * 1 tiles. +// number_of_sub_matrices = matrix_rows * matrix_cols; +// } else { +// ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; +// number_of_sub_matrices = (size == 3 && id == 0) ? 2 : number_of_sub_matrices; +// } +// +// REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); +// +// /** TEST GET NUMBER OF TILES IN ROW **/ +// REQUIRE(matrix.GetNumOfGlobalTilesInRows() == 4); +// +// /** TEST GET NUMBER OF TILES IN ROW **/ +// REQUIRE(matrix.GetNumOfGlobalTilesInCols() == 3); +// +// /** TEST GET STORAGE LAYOUT **/ +// REQUIRE(matrix.GetStorageLayout() == storage_layout); +// +// /** TEST IS MATRIX VALID **/ +// REQUIRE(matrix.IsMatrixValid() == true); +// +// /** TEST NUMBER OF ROWS IN TILE **/ +// REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); +// +// /** TEST NUMBER OF COLS IN TILE **/ +// REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); +// +// /** TEST NUMBER OF ROWS IN MATRIX **/ +// REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); +// +// /** TEST NUMBER OF COLS IN MATRIX **/ +// REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); +// +// /** TEST GET SUBMATRICES **/ +// size_t expected_index = id; +// auto *host_mem = new T; +// auto sub_matrices = matrix.GetSubMatrices(); +// for (int i = 0; i < number_of_sub_matrices; i++) { +// size_t number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); +// for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { +// hcorepp::memory::Memcpy(host_mem, +// sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), 1, +// context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// if (size == 1) { +// REQUIRE(std::abs(*host_mem - matrix_data[i]) <= std::abs(eps)); +// } else { +// REQUIRE(std::abs(*host_mem - sub_matrix_data[expected_index][j]) <= std::abs(eps)); +// } +// } +// expected_index += size; +// } +// +// /** TEST GET SUBMATRIX **/ +// expected_index = id; +// for (int i = 0; i < number_of_sub_matrices; i++) { +// auto sub_matrix = &matrix.GetSubMatrix(i); +// size_t number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); +// for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { +// hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), 1, +// context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// if (size == 1) { +// REQUIRE(std::abs(*host_mem - matrix_data[i]) <= std::abs(eps)); +// } else { +// REQUIRE(std::abs(*host_mem - sub_matrix_data[expected_index][j]) <= std::abs(eps)); +// } +// } +// expected_index += size; +// } +// +// /** TEST GET TILE POINTER **/ +// for (size_t idx_i = id; idx_i < total_number_of_sub_matrices; idx_i += size) { +// +// size_t i = sub_matrix_global_starting_row_index[idx_i]; +// auto num_of_rows = std::min(number_of_processes_in_row, matrix_rows - i); +// size_t j = sub_matrix_global_starting_col_index[idx_i]; +// auto num_of_cols = std::min(number_of_processes_in_col, matrix_cols - j); +// +// for (int r = 0; r < num_of_rows; r++) { +// for (int c = 0; c < num_of_cols; c++) { +// auto global_idx_row = r + i; +// auto global_idx_col = c + j; +// auto tile = matrix.GetTilePointer(global_idx_row, global_idx_col); +// hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), 1, +// context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// REQUIRE(std::abs(*host_mem - matrix_data[global_idx_row * matrix_cols + global_idx_col]) <= +// std::abs(eps)); +// } +// } +// } +// delete host_mem; +// +// } else { +// REQUIRE(matrix.GetNumOfSubMatrices() == 0); +// +// } +// } + + SECTION("Test RM, non-divisible Matrix") { + /** + * 0 1 || 2 3 + * ================== + * 4 5 || 6 7 + * ================== + * 8 9 || 10 11 + * ================== + * 12 13 || 14 15 + * --------||-------- + * 16 17 || 18 19 + * + */ + int id = 0, size = 1; + + hicmapp::runtime::HicmaCommunicator communicator; +#ifdef HICMAPP_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &id); + MPI_Comm_size(MPI_COMM_WORLD, &size); + communicator.SetMPICommunicator(MPI_COMM_WORLD); +#endif + + hicmapp::runtime::HicmaContext context; + T matrix_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + T matrix_data_expected[][4] = {{0, 1, 2, 3}, + {4, 5, 6, 7}, + {8, 9, 10, 11}, + {12, 13, 14, 15}, + {16, 17, 18, 19}}; + size_t matrix_rows = 5; + T tiles_data_expected[][2] = {{0, 1}, + {2, 3}, + {4, 5}, + {6, 7}, + {8, 9}, + {10, 11}, + {12, 13}, + {14, 15}, + {16, 17}, + {18, 19}}; + size_t matrix_cols = 4; + size_t tile_rows = 1; + size_t tile_cols = 2; + StorageLayout storage_layout = StorageLayout::HicmaRM; + size_t rank = 0; + size_t total_number_of_tiles = 10; + float eps = 1e-6; + size_t total_number_of_sub_matrices = size; + size_t number_of_sub_matrices = size; + SlowestDimDecomposer matrix_decomposer(number_of_sub_matrices, storage_layout); + + Matrix matrix(matrix_data, matrix_rows, matrix_cols, tile_rows, tile_cols, storage_layout, + matrix_decomposer, context, + rank); + if (id < number_of_sub_matrices) { + + ///for MPI support +// number_of_sub_matrices = number_of_sub_matrices / size == 0 ? 1 : number_of_sub_matrices / size; + size_t remainder_sub_matrices = total_number_of_sub_matrices - (number_of_sub_matrices * size); + + if (id == 0 && size < total_number_of_sub_matrices) { + number_of_sub_matrices += remainder_sub_matrices; + } + /** TEST GET NUMBER OF SUB MATRICES **/ + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /** TEST GET NUMBER OF SUB MATRICES **/ + REQUIRE(matrix.GetNumOfSubMatrices() == number_of_sub_matrices); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInRows() == 5); + + /** TEST GET NUMBER OF TILES IN ROW **/ + REQUIRE(matrix.GetNumOfGlobalTilesInCols() == 2); + + ///TODO: test contains tile when fixed. + + /** TEST GET STORAGE LAYOUT **/ + REQUIRE(matrix.GetStorageLayout() == storage_layout); + + /** TEST IS MATRIX VALID **/ + REQUIRE(matrix.IsMatrixValid() == true); + + /** TEST NUMBER OF ROWS IN TILE **/ + REQUIRE(matrix.GetNumOfRowsInTile() == tile_rows); + + /** TEST NUMBER OF COLS IN TILE **/ + REQUIRE(matrix.GetNumOfColsInTile() == tile_cols); + + /** TEST NUMBER OF ROWS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfRowsInMatrix() == matrix_rows); + + /** TEST NUMBER OF COLS IN MATRIX **/ + REQUIRE(matrix.GetGlobalNumOfColsInMatrix() == matrix_cols); + } + } + + /** These tests need to be revised as well */ + +//// for (int i = 0, j = id; i < number_of_sub_matrices && j < total_number_of_sub_matrices; i++, j += size) { +//// REQUIRE(matrix.GetSubMatrix(i).GetNumberofTiles() == number_of_tiles_per_sub_matrix[j]); +//// } +// +// +// /** TEST GET SUB-MATRICES*/ +// auto sub_matrices = matrix.GetSubMatrices(); +// size_t offset = id * 2; +// auto *host_mem = new T[tile_rows * tile_cols]; +// for (int i = 0; i < number_of_sub_matrices; i++) { +// size_t number_of_tiles_in_sub_matrix = sub_matrices[i]->GetNumberofTiles(); +// for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { +// hcorepp::memory::Memcpy(host_mem, +// sub_matrices[i]->GetTiles()[j]->GetTileSubMatrix(0), +// tile_rows * tile_cols, context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// for (int k = 0; k < tile_cols * tile_rows; k++) { +// REQUIRE(std::abs(host_mem[k] - tiles_data_expected[j + offset][k]) <= std::abs(eps)); +// } +// } +// offset += size * 2; +// } +// +// /** TEST GET SUBMATRIX **/ +// offset = id * 2; +// +// for (int i = 0; i < number_of_sub_matrices; i++) { +// auto sub_matrix = &matrix.GetSubMatrix(i); +// size_t number_of_tiles_in_sub_matrix = sub_matrix->GetNumberofTiles(); +// for (int j = 0; j < number_of_tiles_in_sub_matrix; j++) { +// hcorepp::memory::Memcpy(host_mem, sub_matrix->GetTiles()[j]->GetTileSubMatrix(0), +// tile_rows * tile_cols, context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// for (int k = 0; k < tile_cols * tile_rows; k++) { +// REQUIRE(std::abs(host_mem[k] - tiles_data_expected[j + offset][k]) <= std::abs(eps)); +// } +// } +// offset += size * 2; +// } +//// +//// /** TEST GET TILE POINTER **/ +// offset = id * 2; +// for (int i = 0, idx = id; +// i < number_of_sub_matrices, idx < total_number_of_sub_matrices; i++, idx += size) { +// for (int j = 0; j < number_of_tiles_per_sub_matrix[idx]; j++) { +// size_t row = (offset + j) / 2; +// size_t col = (offset + j) % 2; +// +// auto tile = matrix.GetTilePointer(row, col); +// hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, +// context.GetMainContext(), +// hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); +// context.SyncMainContext(); +// for (int k = 0; k < tile_cols * tile_rows; k++) { +// REQUIRE(std::abs(host_mem[k] - tiles_data_expected[j + offset][k]) <= std::abs(eps)); +// } +// } +// offset += size * 2; +// } +// delete[] host_mem; +// +// } else { +// REQUIRE(matrix.GetNumOfSubMatrices() == 0); +// } +// } +} + + +TEMPLATE_TEST_CASE("MatrixTest", "[Matrix]", double, float) { + TEST_MATRIX(); +} + diff --git a/tests/primitives/TestSubMatrix.cpp b/tests/primitives/TestSubMatrix.cpp new file mode 100644 index 0000000..e2746bd --- /dev/null +++ b/tests/primitives/TestSubMatrix.cpp @@ -0,0 +1,355 @@ +#include +#include +#include + +using namespace hicmapp::common; +using namespace hicmapp::primitives; + +template +void TEST_SUBMATRIX() { + SECTION("Test CM, 1 sub_matrix matrix") { + /** assuming a matrix composed of + * 1 submatrix and 2 tiles: + * + 0 6 + 1 7 + 2 8 + ----- + 3 9 + 4 10 + 5 11 + * + **/ + hicmapp::runtime::HicmaContext context; + size_t global_matrix_cols = 2; + size_t global_matrix_rows = 6; + size_t sub_matrix_cols = 2; + size_t sub_matrix_rows = 6; + size_t tile_cols = 2; + size_t tile_rows = 3; + float eps = 1e-6; + size_t tile_global_start_index_in_rows = 0; + size_t tile_global_start_index_in_cols = 0; + size_t owner_id = 0; + T data_array[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T tiles_data[] = {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11}; + + StorageLayout storage_layout = hicmapp::common::StorageLayout::HicmaCM; + + auto sub_matrix = new SubMatrix(data_array, tile_rows, tile_cols, global_matrix_rows, global_matrix_cols, + sub_matrix_rows, sub_matrix_cols, tile_global_start_index_in_rows, + tile_global_start_index_in_cols, storage_layout, owner_id, context); + + /** TEST GET NUMBER OF TILES **/ + size_t number_of_tiles = sub_matrix->GetNumberofTiles(); + REQUIRE(number_of_tiles == 2); + + /** TEST GET NUMBER OF TILES IN ROW **/ + size_t number_of_tiles_in_row = sub_matrix->GetNumOfTilesinRows(); + REQUIRE(number_of_tiles_in_row == 2); + + /** TEST GET NUMBER OF TILES IN COL **/ + size_t number_of_tiles_in_col = sub_matrix->GetNumOfTilesinCols(); + REQUIRE(number_of_tiles_in_col == 1); + + /** TEST GET NUMBER OF TILES ROWS **/ + REQUIRE(sub_matrix->GetTileRows() == tile_rows); + + /** TEST GET NUMBER OF TILES COLS **/ + REQUIRE(sub_matrix->GetTileCols() == tile_cols); + + /** TEST GET TILE **/ + size_t offset = 0; + auto tiles = sub_matrix->GetTiles(); + auto* host_mem = new T[tile_cols * tile_rows]; + for (int i = 0; i < number_of_tiles; i++) { + hcorepp::memory::Memcpy(host_mem, tiles[i]->GetTileSubMatrix(0), tile_rows * tile_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_rows * tile_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + + /** TEST GET TILE POINTER **/ + offset = 0; + for (int i = 0; i < number_of_tiles_in_row; i++) { + for (int j = 0; j < number_of_tiles_in_col; j++) { + auto tile = sub_matrix->GetTilePointer(i, j); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_rows * tile_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + /** TEST CONTAINS TILE **/ + REQUIRE(sub_matrix->ContainsTile(0, 0) == true); + REQUIRE(sub_matrix->ContainsTile(0, 1) == false); + REQUIRE(sub_matrix->ContainsTile(1, 0) == true); + REQUIRE(sub_matrix->ContainsTile(1, 1) == false); + REQUIRE(sub_matrix->ContainsTile(0, 2) == false); + + /** TEST GetSubMatrixOwnerId **/ + REQUIRE(sub_matrix->GetSubMatrixOwnerId() == 0); + + /** TEST IS VALID **/ + REQUIRE(sub_matrix->IsValid() == true); + delete [] host_mem; + delete sub_matrix; + } + + SECTION("Test CM, 1 sub_matrix matrix, null-pointer data") { + + /** assuming a matrix composed of + * 1 submatrix and 4 tiles: + * + * + * n n | n n + * n n | n n + * -----|------ + * n n | n n + * n n | n n + * + **/ + hicmapp::runtime::HicmaContext context; + size_t global_matrix_cols = 4; + size_t global_matrix_rows = 4; + size_t sub_matrix_cols = 4; + size_t sub_matrix_rows = 4; + size_t tile_cols = 2; + size_t tile_rows = 2; + size_t tile_global_start_index_in_rows = 0; + size_t tile_global_start_index_in_cols = 0; + size_t owner_id = 0; + T *data_array = nullptr; + + StorageLayout storage_layout = hicmapp::common::StorageLayout::HicmaCM; + + auto sub_matrix = new SubMatrix(data_array, tile_rows, tile_cols, global_matrix_rows, global_matrix_cols, + sub_matrix_rows, sub_matrix_cols, tile_global_start_index_in_rows, + tile_global_start_index_in_cols, storage_layout, owner_id, context); + + /** TEST CONTAINS TILE **/ + REQUIRE(sub_matrix->ContainsTile(0, 0) == true); + REQUIRE(sub_matrix->ContainsTile(0, 1) == true); + REQUIRE(sub_matrix->ContainsTile(1, 0) == true); + REQUIRE(sub_matrix->ContainsTile(1, 1) == true); + REQUIRE(sub_matrix->ContainsTile(0, 2) == false); + + /** TEST GetSubMatrixOwnerId **/ + REQUIRE(sub_matrix->GetSubMatrixOwnerId() == 0); + + /** TEST IS VALID **/ + REQUIRE(sub_matrix->IsValid() == true); + delete sub_matrix; + } + + SECTION("Test RM, 1 sub_matrix matrix") { + /** assuming a matrix composed of + * 1 submatrix and 2 tiles: + * + 0 1 + 2 3 + 4 5 + ------- + 6 7 + 8 9 + 10 11 + * + **/ + hicmapp::runtime::HicmaContext context; + size_t global_matrix_cols = 2; + size_t global_matrix_rows = 6; + size_t sub_matrix_cols = 2; + size_t sub_matrix_rows = 6; + float eps = 1e-6; + size_t tile_cols = 2; + size_t tile_rows = 3; + size_t tile_global_start_index_in_rows = 0; + size_t tile_global_start_index_in_cols = 0; + size_t owner_id = 0; + T data_array[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + T tiles_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + + StorageLayout storage_layout = hicmapp::common::StorageLayout::HicmaRM; + + auto sub_matrix = new SubMatrix(data_array, tile_rows, tile_cols, global_matrix_rows, global_matrix_cols, + sub_matrix_rows, sub_matrix_cols, tile_global_start_index_in_rows, + tile_global_start_index_in_cols, storage_layout, owner_id, context); + + /** TEST GET NUMBER OF TILES **/ + size_t number_of_tiles = sub_matrix->GetNumberofTiles(); + REQUIRE(number_of_tiles == 2); + + /** TEST GET NUMBER OF TILES IN ROW **/ + size_t number_of_tiles_in_row = sub_matrix->GetNumOfTilesinRows(); + REQUIRE(number_of_tiles_in_row == 2); + + /** TEST GET NUMBER OF TILES IN COL **/ + size_t number_of_tiles_in_col = sub_matrix->GetNumOfTilesinCols(); + REQUIRE(number_of_tiles_in_col == 1); + + /** TEST GET NUMBER OF TILES ROWS **/ + REQUIRE(sub_matrix->GetTileRows() == tile_rows); + + /** TEST GET NUMBER OF TILES COLS **/ + REQUIRE(sub_matrix->GetTileCols() == tile_cols); + + /** TEST GET TILE **/ + size_t offset = 0; + auto tiles = sub_matrix->GetTiles(); + auto* host_mem = new T[tile_cols * tile_rows]; + for (int i = 0; i < number_of_tiles; i++) { + hcorepp::memory::Memcpy(host_mem, tiles[i]->GetTileSubMatrix(0), tile_rows * tile_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_rows * tile_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + + /** TEST GET TILE POINTER **/ + offset = 0; + for (int i = 0; i < number_of_tiles_in_row; i++) { + for (int j = 0; j < number_of_tiles_in_col; j++) { + auto tile = sub_matrix->GetTilePointer(i, j); + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_rows * tile_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_rows * tile_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + offset += tile_rows * tile_cols; + } + } + + /** TEST CONTAINS TILE **/ + REQUIRE(sub_matrix->ContainsTile(0, 0) == true); + REQUIRE(sub_matrix->ContainsTile(0, 1) == false); + REQUIRE(sub_matrix->ContainsTile(1, 0) == true); + REQUIRE(sub_matrix->ContainsTile(1, 1) == false); + REQUIRE(sub_matrix->ContainsTile(0, 2) == false); + + /** TEST GetSubMatrixOwnerId **/ + REQUIRE(sub_matrix->GetSubMatrixOwnerId() == 0); + + /** TEST IS VALID **/ + REQUIRE(sub_matrix->IsValid() == true); + delete [] host_mem; + delete sub_matrix; + } + + SECTION("Test RM, 1 sub_matrix matrix, non-divisible") { + /** assuming a matrix composed of + * 1 submatrix and 2 tiles: + * + 0 1 | 2 3 | 4 + 5 6 | 7 8 | 9 + ---------------------------- + 10 11 | 12 13 | 14 + 15 16 | 17 18 | 19 + ---------------------------- + 20 21 | 22 23 | 24 + * + **/ + hicmapp::runtime::HicmaContext context; + size_t global_matrix_cols = 5; + size_t global_matrix_rows = 5; + size_t sub_matrix_cols = 5; + size_t sub_matrix_rows = 5; + size_t tile_cols = 2; + size_t tile_rows = 2; + float eps = 1e-6; + size_t tile_global_start_index_in_rows = 0; + size_t tile_global_start_index_in_cols = 0; + size_t owner_id = 0; + T data_array[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + T tiles_data[] = {0, 1, 5, 6, 2, 3, 7, 8, 4, 9, 10, 11, 15, 16, 12, 13, 17, 18, 14, 19, 20, 21, 22, 23, 24}; + + StorageLayout storage_layout = hicmapp::common::StorageLayout::HicmaRM; + + auto sub_matrix = new SubMatrix(data_array, tile_rows, tile_cols, global_matrix_rows, global_matrix_cols, + sub_matrix_rows, sub_matrix_cols, tile_global_start_index_in_rows, + tile_global_start_index_in_cols, storage_layout, owner_id, context); + + /** TEST GET NUMBER OF TILES **/ + size_t number_of_tiles = sub_matrix->GetNumberofTiles(); + REQUIRE(number_of_tiles == 9); + + /** TEST GET NUMBER OF TILES IN ROW **/ + size_t number_of_tiles_in_row = sub_matrix->GetNumOfTilesinRows(); + REQUIRE(number_of_tiles_in_row == 3); + + /** TEST GET NUMBER OF TILES IN COL **/ + size_t number_of_tiles_in_col = sub_matrix->GetNumOfTilesinCols(); + REQUIRE(number_of_tiles_in_col == 3); + + /** TEST GET NUMBER OF TILES ROWS **/ + REQUIRE(sub_matrix->GetTileRows() == tile_rows); + + /** TEST GET NUMBER OF TILES COLS **/ + REQUIRE(sub_matrix->GetTileCols() == tile_cols); + + /** TEST GET TILE **/ + size_t offset = 0; + auto tiles = sub_matrix->GetTiles(); + for (int i = 0; i < number_of_tiles; i++) { + size_t tile_i_rows = tiles[i]->GetNumOfRows(); + size_t tile_i_cols = tiles[i]->GetNumOfCols(); + auto* host_mem = new T[tile_i_cols * tile_i_rows]; + hcorepp::memory::Memcpy(host_mem, tiles[i]->GetTileSubMatrix(0), tile_i_rows * tile_i_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_i_rows * tile_i_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + delete [] host_mem; + offset += tile_i_rows * tile_i_cols; + } + + /** TEST GET TILE POINTER **/ + offset = 0; + for (int i = 0; i < number_of_tiles_in_row; i++) { + for (int j = 0; j < number_of_tiles_in_col; j++) { + auto tile = sub_matrix->GetTilePointer(i, j); + size_t tile_i_rows = tile->GetNumOfRows(); + size_t tile_i_cols = tile->GetNumOfCols(); + auto* host_mem = new T[tile_i_cols * tile_i_rows]; + hcorepp::memory::Memcpy(host_mem, tile->GetTileSubMatrix(0), tile_i_rows * tile_i_cols, context.GetMainContext(), hcorepp::memory::MemoryTransfer::DEVICE_TO_HOST); + context.SyncMainContext(); + for (int k = 0; k < tile_i_rows * tile_i_cols; k++) { + REQUIRE(std::abs(host_mem[k] - tiles_data[k + offset]) <= std::abs(eps)); + } + delete [] host_mem; + offset += tile_i_rows * tile_i_cols; + } + } + + REQUIRE(sub_matrix->ContainsTile(0, 0) == true); + REQUIRE(sub_matrix->ContainsTile(0, 1) == true); + REQUIRE(sub_matrix->ContainsTile(0, 2) == true); + REQUIRE(sub_matrix->ContainsTile(0, 3) == false); + REQUIRE(sub_matrix->ContainsTile(1, 0) == true); + REQUIRE(sub_matrix->ContainsTile(1, 1) == true); + REQUIRE(sub_matrix->ContainsTile(1, 2) == true); + REQUIRE(sub_matrix->ContainsTile(1, 3) == false); + REQUIRE(sub_matrix->ContainsTile(2, 0) == true); + REQUIRE(sub_matrix->ContainsTile(2, 1) == true); + REQUIRE(sub_matrix->ContainsTile(2, 2) == true); + REQUIRE(sub_matrix->ContainsTile(2, 3) == false); + REQUIRE(sub_matrix->ContainsTile(3, 0) == false); + + + + /** TEST GetSubMatrixOwnerId **/ + REQUIRE(sub_matrix->GetSubMatrixOwnerId() == 0); + + /** TEST IS VALID **/ + REQUIRE(sub_matrix->IsValid() == true); + delete sub_matrix;} +} + +TEMPLATE_TEST_CASE("SubMatrixTest", "[SubMatrix]", float, double) { + TEST_SUBMATRIX(); +} \ No newline at end of file diff --git a/tests/test_main.cpp b/tests/test_main.cpp new file mode 100644 index 0000000..a887290 --- /dev/null +++ b/tests/test_main.cpp @@ -0,0 +1,33 @@ +/** + * @copyright (c) 2022 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + */ + +#ifndef HICMAPP_USE_MPI +#define CATCH_CONFIG_MAIN +#include +#endif + + +#ifdef HICMAPP_USE_MPI +#define CATCH_CONFIG_RUNNER + +#include + +#include + +bool is_init = false; + +int main(int argc, char *argv[]) { + + if (!is_init) { + int required = 0; + MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &required); + is_init = true; + } + int result = Catch::Session().run(argc, argv); + MPI_Finalize(); + return result; +} + +#endif