diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 05db2ef6bb..f398087664 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -15,7 +15,7 @@ _Delete the items that do not apply_ - Refactoring (no functional changes, no api changes) - Build related changes - Testing changes (e.g. new unit/integration/performance tests) -- Documentation changes +- Documentation or build script changes - Other (please describe): ### Does this introduce a breaking change? diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml index e71db54b3b..8df083833f 100644 --- a/.github/workflows/ci-github-actions-self-hosted.yaml +++ b/.github/workflows/ci-github-actions-self-hosted.yaml @@ -52,7 +52,7 @@ jobs: # just like any other third-party service - name: Create PR status if: steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-sulfur CI ${{ matrix.jobname }}" @@ -91,7 +91,7 @@ jobs: - name: Report PR status if: always() && steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-sulfur CI ${{matrix.jobname}}" @@ -151,7 +151,7 @@ jobs: # just like any other third-party service - name: Create PR status if: steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-sulfur CI ${{ matrix.jobname }}" @@ -190,7 +190,7 @@ jobs: - name: Report PR status if: always() && steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-sulfur CI ${{matrix.jobname}}" @@ -247,7 +247,7 @@ jobs: # just like any other third-party service - name: Create PR status if: steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-nitrogen CI ${{matrix.jobname}}" @@ -286,7 +286,7 @@ jobs: - name: Report PR status if: always() && steps.check.outputs.triggered == 'true' - uses: Sibz/github-status-action@v1.1.6 + uses: guibranco/github-status-action-v2@v1.1.7 with: authToken: ${{secrets.GITHUB_TOKEN}} context: "ornl-nitrogen CI ${{matrix.jobname}}" diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml index bdd90a687c..361605c136 100644 --- a/.github/workflows/ci-github-actions.yaml +++ b/.github/workflows/ci-github-actions.yaml @@ -25,17 +25,19 @@ jobs: GCC9-NoMPI-Debug-Real, GCC9-NoMPI-NoOMP-Real, GCC9-NoMPI-NoOMP-Complex, - GCC9-NoMPI-Sandbox-Real, + GCC9-MPI-Sandbox-Real, + GCC9-NoMPI-Sandbox-Complex, GCC9-MPI-Gcov-Real, GCC9-MPI-Gcov-Complex, - GCC11-NoMPI-Werror-Real, - GCC11-NoMPI-Werror-Complex, - GCC11-NoMPI-Werror-Real-Mixed, - GCC11-NoMPI-Werror-Complex-Mixed, + GCC12-NoMPI-Werror-Real, + GCC12-NoMPI-Werror-Complex, + GCC12-NoMPI-Werror-Real-Mixed, + GCC12-NoMPI-Werror-Complex-Mixed, Clang14-NoMPI-ASan-Real, Clang14-NoMPI-ASan-Complex, Clang14-NoMPI-UBSan-Real, Clang16-NoMPI-Offload-Real, + Clang16-NoMPI-Offload-Complex, ] include: - jobname: GCC9-NoMPI-Debug-Real @@ -53,11 +55,16 @@ jobs: image: ghcr.io/qmcpack/ubuntu22-openmpi:latest options: -u 1001 - - jobname: GCC9-NoMPI-Sandbox-Real + - jobname: GCC9-MPI-Sandbox-Real container: image: ghcr.io/qmcpack/ubuntu22-openmpi:latest options: -u 1001 + - jobname: GCC9-NoMPI-Sandbox-Complex + container: + image: ghcr.io/qmcpack/ubuntu22-serial:latest + options: -u 1001 + - jobname: GCC9-MPI-Gcov-Real container: image: ghcr.io/qmcpack/ubuntu22-openmpi:latest @@ -68,24 +75,24 @@ jobs: image: ghcr.io/qmcpack/ubuntu22-openmpi:latest options: -u 1001 - - jobname: GCC11-NoMPI-Werror-Real + - jobname: GCC12-NoMPI-Werror-Real container: - image: ghcr.io/qmcpack/ubuntu2110-serial:latest + image: ghcr.io/qmcpack/ubuntu22-serial:latest options: -u 1001 - - jobname: GCC11-NoMPI-Werror-Complex + - jobname: GCC12-NoMPI-Werror-Complex container: - image: ghcr.io/qmcpack/ubuntu2110-serial:latest + image: ghcr.io/qmcpack/ubuntu22-serial:latest options: -u 1001 - - jobname: GCC11-NoMPI-Werror-Real-Mixed + - jobname: GCC12-NoMPI-Werror-Real-Mixed container: - image: ghcr.io/qmcpack/ubuntu2110-serial:latest + image: ghcr.io/qmcpack/ubuntu22-serial:latest options: -u 1001 - - jobname: GCC11-NoMPI-Werror-Complex-Mixed + - jobname: GCC12-NoMPI-Werror-Complex-Mixed container: - image: ghcr.io/qmcpack/ubuntu2110-serial:latest + image: ghcr.io/qmcpack/ubuntu22-serial:latest options: -u 1001 - jobname: Clang14-NoMPI-ASan-Real @@ -108,6 +115,11 @@ jobs: image: ghcr.io/qmcpack/ubuntu22-clang:latest options: -u 1001 + - jobname: Clang16-NoMPI-Offload-Complex + container: + image: ghcr.io/qmcpack/ubuntu22-clang:latest + options: -u 1001 + steps: - name: Checkout Action uses: actions/checkout@v4 @@ -127,15 +139,17 @@ jobs: - name: Upload Coverage if: contains(matrix.jobname, 'Gcov') && github.repository_owner == 'QMCPACK' - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: files: ../qmcpack-build/coverage.xml flags: tests-deterministic # optional name: codecov-QMCPACK # optional fail_ci_if_error: true # optional (default = false) + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} macos: - runs-on: macos-11 + runs-on: macos-14 env: GH_JOBNAME: ${{ matrix.jobname }} GH_OS: macOS @@ -143,7 +157,7 @@ jobs: strategy: fail-fast: false matrix: - jobname: [macOS-GCC11-NoMPI-Real] + jobname: [macOS-GCC14-NoMPI-Real] steps: - name: Checkout Action @@ -152,12 +166,13 @@ jobs: - name: Set Python Version uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.12" - name: Setup Dependencies run: | - brew install ninja hdf5 fftw boost - pip3 install numpy h5py pandas + brew upgrade || brew link --overwrite python@3.12 + brew install gcc@14 ninja hdf5 fftw boost + python3 -m pip install numpy==1.26.4 h5py pandas - name: Configure run: tests/test_automation/github-actions/ci/run_step.sh configure diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 99c9b0b9e3..053be0c474 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -1,7 +1,7 @@ # Check compiler version if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0) message(STATUS "Compiler Version ${CMAKE_CXX_COMPILER_VERSION}") - message(FATAL_ERROR "Requires clang 7.0 or higher ") + message(FATAL_ERROR "Requires Clang 7.0 or higher.") endif() if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 11.0.0 @@ -16,6 +16,10 @@ if(QMC_OMP) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") if(ENABLE_OFFLOAD) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16.0) + message(FATAL_ERROR "Requires Clang 16.0 or higher for OpenMP offload") + endif() + if(DEFINED OFFLOAD_TARGET) set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") if(DEFINED OFFLOAD_ARCH) diff --git a/CMake/DetermineDeviceArchitectures.cmake b/CMake/DetermineDeviceArchitectures.cmake index 5c5c573b47..1b95d8604c 100644 --- a/CMake/DetermineDeviceArchitectures.cmake +++ b/CMake/DetermineDeviceArchitectures.cmake @@ -63,6 +63,7 @@ function(verifyNVIDIAGPUconsistency) endfunction() # auto detect QMC_GPU_ARCHS if not set by user and GPU features are enabled. +# CMAKE_CUDA/HIP_ARCHITECTURES are used as hints if(NOT QMC_GPU_ARCHS AND ENABLE_CUDA) if(QMC_CUDA2HIP) detectAMDGPU() @@ -93,3 +94,13 @@ endif() set(QMC_GPU_ARCHS ${QMC_GPU_ARCHS} CACHE STRING "Accelerator device architectures" FORCE) + +# QMC_GPU_ARCHS is the single source of truth and thus overwrite CMAKE_CUDA/HIP_ARCHITECTURES +if(ENABLE_CUDA) + if(QMC_CUDA2HIP) + set(CMAKE_HIP_ARCHITECTURES ${QMC_GPU_ARCHS} CACHE STRING "HIP architectures" FORCE) + else() + string(REPLACE "sm_" "" CUDA_ARCH_NUMBERS "${QMC_GPU_ARCHS}") + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_NUMBERS} CACHE STRING "CUDA architectures" FORCE) + endif() +endif() diff --git a/CMake/FindMKL.cmake b/CMake/FindMKL.cmake index 76e5b66624..a52b0b683c 100644 --- a/CMake/FindMKL.cmake +++ b/CMake/FindMKL.cmake @@ -85,7 +85,7 @@ endif(HAVE_MKL) if(HAVE_MKL AND ENABLE_SYCL) find_library(MKL_SYCL mkl_sycl HINTS ${MKL_ROOT} $ENV{MKLROOT} $ENV{MKL_ROOT} $ENV{MKL_HOME} - PATH_SUFFIXES lib/intel64 + PATH_SUFFIXES lib/intel64 lib REQUIRED ) diff --git a/CMake/IntelCompilers.cmake b/CMake/IntelCompilers.cmake index ea06679655..36eb3dbfc0 100644 --- a/CMake/IntelCompilers.cmake +++ b/CMake/IntelCompilers.cmake @@ -17,10 +17,20 @@ endif() if(QMC_OMP) if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") if(ENABLE_OFFLOAD) - set(OFFLOAD_TARGET - "spir64" - CACHE STRING "Offload target architecture") - set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") + if(DEFINED OFFLOAD_ARCH OR QMC_GPU_ARCHS) + # for ahead-of-time compilation and linking + set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=spir64_gen") + if(DEFINED OFFLOAD_ARCH) + set(OpenMP_OFFLOAD_LINKER_FLAGS "-Xs \"-device ${OFFLOAD_ARCH}\"") + else() + set(OpenMP_OFFLOAD_LINKER_FLAGS "-Xs \"-device ${QMC_GPU_ARCHS}\"") + endif() + else() + set(OFFLOAD_TARGET + "spir64" + CACHE STRING "Offload target architecture") + set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") + endif() endif(ENABLE_OFFLOAD) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fiopenmp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fiopenmp") diff --git a/CMake/IntelDPCPPConfig-modified.cmake b/CMake/IntelDPCPPConfig-modified.cmake index 6ebe675491..4dd5b82510 100644 --- a/CMake/IntelDPCPPConfig-modified.cmake +++ b/CMake/IntelDPCPPConfig-modified.cmake @@ -239,7 +239,7 @@ if(WIN32) list(APPEND SYCL_FLAGS "/EHsc") endif() -set(SYCL_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SYCL_FLAGS}") +set(SYCL_CXX_FLAGS "${SYCL_FLAGS}") # And now test the assumptions. diff --git a/CMake/TestCXXMainCompiles.cmake b/CMake/TestCXXMainCompiles.cmake new file mode 100644 index 0000000000..da6ffe388a --- /dev/null +++ b/CMake/TestCXXMainCompiles.cmake @@ -0,0 +1,18 @@ +# Check that the configured compiler works on a C++ main function +# Note: whitespaces not allowed in STAGE_NAME +function(TestCXXMainCompiles STAGE_NAME) + if(STAGE_NAME MATCHES " ") + message(FATAL_ERROR "TestCXXMainCompiles whitespaces not allowed in the stage name. The given value is '${STAGE_NAME}'.") + endif() + set(TEST_CXX_COMPILE_MAIN_DIR ${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp) + file(WRITE ${TEST_CXX_COMPILE_MAIN_DIR}/try_cxx_main.cpp "int main(){}") + set(TEST_RESULT_VAR_NAME TEST_RESULT_${STAGE_NAME}) + try_compile( + ${TEST_RESULT_VAR_NAME} + ${TEST_CXX_COMPILE_MAIN_DIR} + SOURCES ${TEST_CXX_COMPILE_MAIN_DIR}/try_cxx_main.cpp + OUTPUT_VARIABLE COMPILE_OUTPUT) + if(NOT ${TEST_RESULT_VAR_NAME}) + message(FATAL_ERROR "Failed in compiling a main() function in stage ${STAGE_NAME}. Output:\n${COMPILE_OUTPUT}") + endif() +endfunction() diff --git a/CMake/TestCxx17Library.cmake b/CMake/TestCxx17Library.cmake index 4f6a2b7772..6cdf6cfe5d 100644 --- a/CMake/TestCxx17Library.cmake +++ b/CMake/TestCxx17Library.cmake @@ -41,7 +41,7 @@ if(NOT CXX17_LIBRARY_OKAY) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") message( "Compiler detected is namely clang++ or a vendor variant (icpx, amdclang++, armclang++).\n If not using libcxx, ensure a GCC toolchain version equal or greater " - "than 9.0 gets picked up. Check with ' -v'. Or use the --gcc-toolchain compiler option " + "than 9.0 gets picked up. Check with ' -v'. Or use the --gcc-install-dir (--gcc-toolchain deprecated) compiler option " "(added to both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS) to point to a newer GCC installation." ) elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") diff --git a/CMake/ctest_script.cmake b/CMake/ctest_script.cmake index e7c302cd28..2458ab4971 100644 --- a/CMake/ctest_script.cmake +++ b/CMake/ctest_script.cmake @@ -263,7 +263,7 @@ ctest_build() # Submit the results to oblivion set(CTEST_DROP_METHOD "https") set(CTEST_DROP_SITE "cdash.qmcpack.org") -set(CTEST_DROP_LOCATION "/CDash/submit.php?project=QMCPACK") +set(CTEST_DROP_LOCATION "/submit.php?project=QMCPACK") set(CTEST_DROP_SITE_CDASH TRUE) set(DROP_SITE_CDASH TRUE) ctest_submit(PARTS Configure Build) diff --git a/CMake/inspectCompiler.cmake b/CMake/inspectCompiler.cmake index 6430d255ea..c17e5b5eab 100644 --- a/CMake/inspectCompiler.cmake +++ b/CMake/inspectCompiler.cmake @@ -4,20 +4,6 @@ # Note: vendor compilers can be just rebranded customized Clang compiler. # It requires more recent CMake to handle it properly. We need to handle such cases for older CMake. -execute_process( - COMMAND ${CMAKE_CXX_COMPILER} --version - RESULT_VARIABLE VERSION_QUERY_RETURN - OUTPUT_VARIABLE VERSION_QUERY_OUTPUT) - -if(VERSION_QUERY_RETURN EQUAL 0) - if(CMAKE_VERSION VERSION_LESS 3.20 - AND VERSION_QUERY_OUTPUT MATCHES "Intel" - AND VERSION_QUERY_OUTPUT MATCHES "oneAPI") - # require 3.20 to recognize IntelLLVM compiler ID and check accurate version numbers. - message(FATAL_ERROR "Using Intel OneAPI compilers requires CMake 3.20.0 or later.") - endif() -endif() - if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") set(COMPILER GNU) elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL") diff --git a/CMake/macros.cmake b/CMake/macros.cmake index 3596e010c0..471d32039a 100644 --- a/CMake/macros.cmake +++ b/CMake/macros.cmake @@ -147,7 +147,7 @@ function( set_tests_properties( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION - "ERROR" + "QMCPACK ERROR" PASS_REGULAR_EXPRESSION "QMCPACK execution completed successfully" PROCESSORS @@ -166,7 +166,7 @@ function( set_tests_properties( ${TESTNAME} PROPERTIES FAIL_REGULAR_EXPRESSION - "ERROR" + "QMCPACK ERROR" PASS_REGULAR_EXPRESSION "QMCPACK execution completed successfully" PROCESSORS diff --git a/CMakeLists.txt b/CMakeLists.txt index 271631d012..e2e0bfc237 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ ###################################################################### # CMake version and policies ###################################################################### -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.21.0) # Note that cmake_minimum_required affects policy defaults. # All policies known to the running version of CMake and introduced in # cmake_minimum_required version or earlier will be set to use NEW behavior @@ -9,6 +9,9 @@ cmake_minimum_required(VERSION 3.17.0) if(POLICY CMP0127) cmake_policy(SET CMP0127 NEW) # Condition evaluation v3.22+ https://cmake.org/cmake/help/latest/policy/CMP0127.html endif() +if(POLICY CMP0144) + cmake_policy(SET CMP0144 NEW) # v3.27+ find_package() uses upper-case _ROOT variables. +endif() ###################################################################### # QMCPACK project @@ -82,14 +85,8 @@ endif(ENABLE_CUDA AND ENABLE_SYCL) if(ENABLE_CUDA) if(QMC_CUDA2HIP) message(STATUS "ENABLE_CUDA enabled, QMC_CUDA2HIP enabled") - if(CMAKE_VERSION VERSION_LESS 3.21.0) - message(FATAL_ERROR "ENABLE_ROCM or QMC_CUDA2HIP require CMake 3.21.0 or later") - endif() else(QMC_CUDA2HIP) message(STATUS "ENABLE_CUDA enabled, QMC_CUDA2HIP disabled") - if(CMAKE_VERSION VERSION_LESS 3.18.0) - message(FATAL_ERROR "ENABLE_CUDA require CMake 3.18.0 or later") - endif() endif() else(ENABLE_CUDA) if(QMC_CUDA2HIP) @@ -182,7 +179,6 @@ option(BUILD_AFQMC_WITH_NCCL "Build AFQMC with NCCL library." OFF) if(BUILD_AFQMC AND NOT QMC_MPI) message(FATAL_ERROR "AFQMC requires building with MPI (QMC_MPI=1). Set BUILD_AFQMC=0 or configure MPI.") endif() -option(BUILD_FCIQMC "Build with FCIQMC" OFF) option(QMC_BUILD_STATIC "Link to static libraries" OFF) option(ENABLE_TIMERS "Enable internal timers" ON) option(ENABLE_STACKTRACE "Enable use of boost::stacktrace" OFF) @@ -239,11 +235,25 @@ if(QMC_OMP) FATAL_ERROR "No compiler support for OpenMP found. Switching to a compiler with OpenMP support is recommended." "Alternatively, you will need to run CMake configure with -DQMC_OMP=OFF") endif() + + # explicitly set OpenMP runtime library rpath like other normal libraries. + list(GET OpenMP_CXX_LIBRARIES 0 OpenMP_LIBRARY_FILE) + if(OpenMP_LIBRARY_FILE) + cmake_path(GET OpenMP_LIBRARY_FILE PARENT_PATH OpenMP_LIBRARY_DIR) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES ${OpenMP_LIBRARY_DIR} isSystemDir) + if("${isSystemDir}" STREQUAL "-1") + message(STATUS "Append OpenMP runtime library path ${OpenMP_LIBRARY_DIR}") + list(APPEND CMAKE_BUILD_RPATH ${OpenMP_LIBRARY_DIR}) + list(APPEND CMAKE_INSTALL_RPATH ${OpenMP_LIBRARY_DIR}) + endif("${isSystemDir}" STREQUAL "-1") + endif() endif(QMC_OMP) #------------------------------------------------------------------------------- # Set vendor specific compiler options #------------------------------------------------------------------------------- +include(TestCXXMainCompiles) +TestCXXMainCompiles("Before_Customization") if(CMAKE_TOOLCHAIN_FILE) message(STATUS "Using ${CMAKE_TOOLCHAIN_FILE} toolchain ") else(CMAKE_TOOLCHAIN_FILE) @@ -284,7 +294,7 @@ else(CMAKE_TOOLCHAIN_FILE) endif() include(inspectCompiler) - + TestCXXMainCompiles("After_Customization") endif(CMAKE_TOOLCHAIN_FILE) if(NOT CMAKE_CXX_COMPILER_ID STREQUAL CMAKE_C_COMPILER_ID) @@ -332,7 +342,7 @@ if(NOT "${ENABLE_SANITIZER}" STREQUAL "none") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_SAN}") - + TestCXXMainCompiles("With_Sanitizers") endif() #----------------------------------------------------------------------- @@ -368,32 +378,27 @@ if(alignment_remainder) endif() message(STATUS "QMC_SIMD_ALIGNMENT is set to ${QMC_SIMD_ALIGNMENT}") -#--------------------------------------------------------- -# Determine if OpenMP taskloop works with the CXX compiler -#--------------------------------------------------------- -include(TestOpenMPtaskloop) -option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY}) -message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}") - -#--------------------------------------------------------- -# Set up OpenMP offload compile options -#--------------------------------------------------------- -set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF) -if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS) - message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") - if(${COMPILER} MATCHES "Clang" - AND OPENMP_OFFLOAD_COMPILE_OPTIONS MATCHES "gfx" - AND QMC_CUDA2HIP) - # As of 11/2021, QMC_OFFLOAD_MEM_ASSOCIATED=ON is needed for AMD and mainline LLVM compilers - # when using OpenMP offload to AMD GPU together with HIP. - set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON) +if(QMC_OMP) + #--------------------------------------------------------- + # Determine if OpenMP taskloop works with the CXX compiler + #--------------------------------------------------------- + include(TestOpenMPtaskloop) + option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY}) + message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}") + + #--------------------------------------------------------- + # Set up OpenMP offload compile options + #--------------------------------------------------------- + if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS) + message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") + if(DEFINED OpenMP_OFFLOAD_LINKER_FLAGS) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_OFFLOAD_LINKER_FLAGS}") + endif() endif() + + TestCXXMainCompiles("With_OpenMP") endif() -# Some OpenMP offload runtime libraries have composibility issue with a vendor native runtime. -# A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them. -cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime" - ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF) #------------------------------------------------------------------------------------- # consider making this always on if OpenMP is no longer UB with Thread Support Library @@ -408,8 +413,12 @@ if(QMC_EXP_THREADING) add_definitions(-DQMC_EXP_THREADING) endif(QMC_EXP_THREADING) +###################################################################### +# Check external libraries. +###################################################################### + #------------------------------------------------------------------- -# Check MPI installation. MPI is treated as a part of compiler +# Check MPI installation. MPI is treated as a library #------------------------------------------------------------------- if(QMC_MPI) # for backward compatibility with MPIEXEC @@ -488,36 +497,6 @@ else(QMC_MPI) message(STATUS "MPI is disabled") endif(QMC_MPI) -#-------------------------------------------------------------- -# final test for compile options before searching for libraries -#-------------------------------------------------------------- -include(CheckCXXSourceCompiles) -check_cxx_source_compiles("int main(){}" TEST_CXX_COMPILE_MAIN) -if(NOT TEST_CXX_COMPILE_MAIN) - unset(TEST_CXX_COMPILE_MAIN CACHE) - message(FATAL_ERROR "Failed in compiling a main() function likely due to incorrect compile options. " - "Check error in \"${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/CMakeError.log\".") -endif() - -#-------------------------------------------------------------- -# Workaround for breakage of OMP Target kernels at -O0 by Clang -#-------------------------------------------------------------- -# When the build type is Debug, default -O0 compilation is broken by -# Clang 15 as of 5d2ce7663b10c107328a4ae0c678165209e64619. -# Previous compilers are not suggested for clang offload builds. -# -# You can set this option to false on the command line to check -# if this problem has been fixed. - -if((${COMPILER} MATCHES "Clang") AND ENABLE_OFFLOAD) - option(ENABLE_OFFLOAD_CLANG_DEBUG_O3 "build OMP target kernels with -O3 in the build type Debug" ON) -endif() -mark_as_advanced(ENABLE_OFFLOAD_CLANG_DEBUG_O3) - -###################################################################### -# Check external libraries. -###################################################################### - #------------------------------------------------------------------- # check OS related libraries #------------------------------------------------------------------- @@ -779,7 +758,7 @@ if(ENABLE_CUDA AND NOT QMC_CUDA2HIP) message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") endif() -if(USE_NVTX_API) +if(USE_NVTX_API AND NOT QMC_CUDA2HIP) message(STATUS "Enabling use of CUDA NVTX APIs") find_library( NVTX_API_LIB NAME nvToolsExt @@ -792,7 +771,7 @@ if(USE_NVTX_API) link_libraries(${NVTX_API_LIB}) else() message(STATUS "CUDA NVTX APIs disabled") -endif(USE_NVTX_API) +endif(USE_NVTX_API AND NOT QMC_CUDA2HIP) #------------------------------------------------------------------- # set up ROCM compiler options and libraries @@ -851,6 +830,20 @@ if(ENABLE_ROCM) option(QMC_DISABLE_HIP_HOST_REGISTER "Disable hipHostRegister for pinning host memory" ON) endif(ENABLE_ROCM) +if(USE_NVTX_API AND QMC_CUDA2HIP) + message(STATUS "Enabling use of ROCm rocTX APIs") + find_library( + ROCTX_API_LIB NAME libroctx64.so + PATH_SUFFIXES lib lib64) + if(NOT ROCTX_API_LIB) + message(FATAL_ERROR "USE_ROCTX_API set but ROCTX_API_LIB not found") + endif(NOT ROCTX_API_LIB) + message("ROCm rocTX library: ${ROCTX_API_LIB}") + link_libraries(${ROCTX_API_LIB}) + else() + message(STATUS "ROCm rocTX APIs disabled") +endif(USE_NVTX_API AND QMC_CUDA2HIP) + #------------------------------------------------------------------- # set up HIP compiler options #------------------------------------------------------------------- @@ -903,6 +896,23 @@ if(ENABLE_SYCL) endif() endif(ENABLE_SYCL) +#-------------------------------------------------------------------- +# Resolve Vendor(CUDA/HIP/SYCL) and OpenMP runtime incompatibilities +#-------------------------------------------------------------------- +# Some OpenMP offload runtime libraries have composibility issue with vendor native ones. +# A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them. +set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF) +if(ENABLE_OFFLOAD) + # Known issue HIP<5.5 https://github.com/ROCm/aomp/issues/253 + message("check ${COMPILER} ${QMC_CUDA2HIP} ${hip_VERSION}") + if(${COMPILER} MATCHES "Clang" AND QMC_CUDA2HIP AND hip_VERSION VERSION_LESS "5.5") + set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON) + endif() +endif() +cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime" + ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF) + + #------------------------------------------------------------------- # set up VTune ittnotify library #------------------------------------------------------------------- @@ -1145,6 +1155,7 @@ message(STATUS "Ready to parse QMCPACK source tree") add_subdirectory(external_codes) add_subdirectory(src) +add_subdirectory(doxygen) if(NOT QMC_BUILD_SANDBOX_ONLY) if(NOT QMC_NO_SLOW_CUSTOM_TESTING_COMMANDS) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..b74cd5d0fd --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +kentpr at ornl dot gov and brenda_rubenstein at brown dot edu. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/README.md b/README.md index e982d005dc..1a6bed6c88 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ P. Kent _et al._ J. Chem. Phys. **152** 174105 (2020), https://doi.org/10.1063/5 # Installation Prerequisites * C++ 17 and C99 capable compilers. - * CMake v3.17.0 or later, build utility, http://www.cmake.org + * CMake v3.21.0 or later, build utility, http://www.cmake.org * BLAS/LAPACK, numerical library. Use vendor and platform-optimized libraries. * LibXml2, XML parser, http://xmlsoft.org/ * HDF5 v1.10.0 or later, portable I/O library, http://www.hdfgroup.org/HDF5/ @@ -53,7 +53,8 @@ P. Kent _et al._ J. Chem. Phys. **152** 174105 (2020), https://doi.org/10.1063/5 * FFTW, FFT library, http://www.fftw.org/ * MPI, parallel library. Optional, but a near requirement for production calculations. * Python3. Older versions are not supported as of January 2020. - * CUDA v11.0 or later. Optional, but required for builds with NVIDIA GPU support. + * CUDA v11.0 or later. Optional, but required for builds with NVIDIA GPU support. Use 12.3 or newer if possible. 11.3-12.2 have + a bug affecting multideterminant calculations. Single determinant calculations are OK. We aim to support open source compilers and libraries released within two years of each QMCPACK release. Use of software versions over two years old may work but is discouraged and untested. Proprietary compilers (Intel, NVHPC) are generally supported over the @@ -65,14 +66,14 @@ Nightly testing currently includes at least the following software versions: * Compilers * GCC 13.2.0, 11.4.0 - * Clang/LLVM 16.0.6 + * Clang/LLVM 17.0.4 * Boost 1.83.0, 1.77.0 -* HDF5 1.14.2 +* HDF5 1.14.3 * FFTW 3.3.10, 3.3.8 -* CMake 3.27.4, 3.21.3 +* CMake 3.27.9, 3.21.4 * MPI - * OpenMPI 4.1.5 -* CUDA 11.2 + * OpenMPI 4.1.6 +* CUDA 12.3 GitHub Actions-based tests include additional version combinations from within our two year support window. On a developmental basis we also check the latest Clang and GCC development versions, AMD Clang and Intel OneAPI compilers. @@ -90,28 +91,34 @@ sphinx from the sources in docs/. A PDF version is still available at https://qm ## Quick build - If you are feeling lucky and are on a standard UNIX-like system such - as a Linux workstation: +On a standard UNIX-like system such as a Linux workstation: - * Safest quick build option is to specify the C and C++ compilers - through their MPI wrappers. Here we use Intel MPI and Intel - compilers. Move to the build directory, run CMake and make +* Safest quick build option is to specify the C and C++ compilers + through their MPI wrappers. Here we use Intel MPI and Intel + compilers. Move to the build directory, run CMake and make ``` cd build cmake -DCMAKE_C_COMPILER=mpiicc -DCMAKE_CXX_COMPILER=mpiicpc .. make -j 8 ``` - * Substitute mpicc and mpicxx or other wrapped compiler names to suit - your system. e.g. With OpenMPI use +* Substitute mpicc and mpicxx or other wrapped compiler names to suit + your system. e.g. With OpenMPI use ``` cd build cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx .. make -j 8 ``` +* Non-MPI build: +``` +cd build +cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DQMC_MPI=0 .. +make -j 8 +``` + * If you are feeling particularly lucky, you can skip the compiler - specification: + specification: ``` cd build cmake .. @@ -319,33 +326,34 @@ performance tests are provided to aid in monitoring performance. From the build directory, invoke ctest specifying only the unit tests ``` -ctest -R unit +ctest -j 16 -R unit --output-on-failure ``` -All of these tests should pass. +All of these tests should pass within a few minutes. Modify the parallization setting (-j 16) to suit the core count of your system. ## Run the deterministic tests From the build directory, invoke ctest specifying only tests that are deterministic and known to be reliable. ``` -ctest -R deterministic -LE unstable +ctest -j 16 -R deterministic -LE unstable --output-on-failure ``` -These tests currently take a few seconds to run, and include all the unit tests. All tests should pass. Failing tests likely +These tests currently take a few minutes to run, and include all the unit tests. All tests should pass. Failing tests likely indicate a significant problem that should be solved before using QMCPACK further. This ctest invocation can be used as part of an -automated installation verification process. +automated installation verification process. Many of the tests use a multiple of 16 processes, so on large core count machines +a significant speedup can be obtained with -j 64 etc. ## Run the short (quick) tests - From the build directory, invoke ctest specifying only tests - including "short" to run that are known to be stable. +From the build directory, invoke ctest specifying only tests +including "short" to run that are known to be stable. ``` -ctest -R short -LE unstable +ctest -j 16 -R short -LE unstable --output-on-failure ``` - These tests currently take up to around one hour. On average, all - tests should pass at a three sigma level of reliability. Any - initially failing test should pass when rerun. +These tests currently take up to around one hour. On average, all +tests should pass at a three sigma level of reliability. Any +initially failing test should pass when rerun. ## Run individual tests @@ -356,7 +364,7 @@ ctest -R name-of-test-to-run # Contributing -Contributions of any size are very welcome. Guidance for contributing to QMCPACK is included in Chapter 1 of the manual +Contributions of any size are very welcome. Guidance for contributing to QMCPACK is included in the manual https://qmcpack.readthedocs.io/en/develop/introduction.html#contributing-to-qmcpack. We use a git flow model including pull request reviews. A continuous integration system runs on pull requests. See https://github.com/QMCPACK/qmcpack/wiki for details. For an extensive contribution, it can be helpful to discuss on the [Google QMCPACK diff --git a/codecov.yaml b/codecov.yml similarity index 89% rename from codecov.yaml rename to codecov.yml index a8fcc37a80..301c7047b7 100644 --- a/codecov.yaml +++ b/codecov.yml @@ -14,8 +14,9 @@ comment: off ignore: - "external_codes" - "tests" - - "src/*/tests" - - "src/*/*/tests" + - "src/**/tests/*.cpp" + - "src/**/tests/*.h" + - "src/**/tests/*.hpp" # Fixes report prefix paths from CI dynamic coverage action # from https://docs.codecov.io/docs/fixing-paths diff --git a/config/blue_waters.sh b/config/blue_waters.sh deleted file mode 100755 index 4a18fb7ff0..0000000000 --- a/config/blue_waters.sh +++ /dev/null @@ -1,136 +0,0 @@ -#!/bin/bash - -################################################################ -## This script builds available configurations of QMCPACK ## -## on Blue Waters, University of Illinois Urbana-Champaign ## -## highly similar to OLCF Titan build script ## -## ## -## Last modified: 19 May 2020 ## -################################################################ - -# Load required modules (assuming default settings have not been modified) -source $MODULESHOME/init/bash -if (echo $LOADEDMODULES | grep -q pgi) -then -module unload PrgEnv-pgi -fi -if (echo $LOADEDMODULES | grep -q cray) -then -module unload PrgEnv-cray -fi -if (echo $LOADEDMODULES | grep -q cuda) -then -module unload cudatoolkit -fi -if (echo $LOADEDMODULES | grep -q hdf5) -then -module unload cray-hdf5 -fi -# bwpy conflicts with boost and cmake? -if (echo $LOADEDMODULES | grep -q bwpy) -then -module unload bwpy -fi - -# for C++14 support (gcc/6.3.0) -module load PrgEnv-gnu -# disable user statistics reporting (otherwise crash) -module unload darshan -# use parallel HDF5 to speed up I/O of large jobs -module load cray-hdf5-parallel - -## FFT library is important for orbital splining -#module load fftw -# 2020-01-30: fftw breaks mpi? set FFTW_HOME instead of load module -## 2019-10-25: bwpy conflicts with cmake and boost -#module load boost/1.63.0 -# 2020-01-30: python3 is required, set BOOST_ROOT instead of load module - -# miscellaneous -#module load bwpy # numpy, h5py libraries are used in ctest -module load bwpy -module load libxml2 -module load cmake/3.9.4 - -# always use dynamic linking -export CRAYPE_LINK_TYPE=dynamic - -# use AMD optimized math libraries (performance critical!) -XT_FLAGS="-DHAVE_AMDLIBM=1" - -# Set cmake variables, shared for cpu builds; not used in gpu builds -AMD_LIB_HOME=/projects/sciteam/bbak/soft/amdlibm-3-0-2 -CMAKE_FLAGS="-D CMAKE_C_FLAGS=$XT_FLAGS \ - -D CMAKE_CXX_FLAGS=$XT_FLAGS \ - -D QMC_INCLUDE=$AMD_LIB_HOME/include \ - -D QMC_EXTRA_LIBS=$AMD_LIB_HOME/lib/static/libamdlibm.a -" - -# Set environment variables -export FFTW_HOME=/opt/cray/fftw/3.3.4.10/interlagos -export BOOST_ROOT=/sw/xe/boost/1.63.0/sles11.3_gnu5.3.0 - -export CC=cc -export CXX=CC - -################################################################ -## CPU Binaries ## -################################################################ - -target=qmcpack - -# Configure and build cpu real -suffix=_cpu_real -echo "" -echo "" -echo "building qmcpack for cpu real" -mkdir build$suffix -cd build$suffix -cmake $CMAKE_FLAGS .. -make -j 32 $target -cd .. -ln -s ./build$suffix/bin/qmcpack ./qmcpack$suffix - -# Configure and build cpu complex -suffix=_cpu_comp -echo "" -echo "" -echo "building qmcpack for cpu complex" -mkdir build$suffix -cd build$suffix -cmake $CMAKE_FLAGS -D QMC_COMPLEX=1 .. -make -j 32 $target -cd .. -ln -s ./build$suffix/bin/qmcpack_complex ./qmcpack$suffix - -################################################################ -## GPU Binaries ## -################################################################ - -module load cudatoolkit - -# Configure and build gpu real -suffix=_gpu_real -echo "" -echo "" -echo "building qmcpack for gpu real" -mkdir build$suffix -cd build$suffix -cmake -D QMC_CUDA=1 -DCUDA_HOST_COMPILER=$(which CC) .. -cmake -D QMC_CUDA=1 -DCUDA_HOST_COMPILER=$(which CC) .. -make -j 32 $target -cd .. -ln -s ./build$suffix/bin/qmcpack ./qmcpack$suffix - -# Configure and build gpu complex -suffix=_gpu_comp -echo "" -echo "" -echo "building qmcpack for gpu complex" -mkdir build$suffix -cd build$suffix -cmake -D QMC_COMPLEX=1 -D QMC_CUDA=1 -DCUDA_HOST_COMPILER=$(which CC) .. -cmake -D QMC_COMPLEX=1 -D QMC_CUDA=1 -DCUDA_HOST_COMPILER=$(which CC) .. -make -j 32 $target -cd .. -ln -s ./build$suffix/bin/qmcpack_complex ./qmcpack$suffix diff --git a/config/build_alcf_aurora_icpx.sh b/config/build_alcf_aurora_icpx.sh new file mode 100755 index 0000000000..fdac26d39c --- /dev/null +++ b/config/build_alcf_aurora_icpx.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# This recipe is intended for ALCF Aurora https://www.alcf.anl.gov/support-center/aurora-sunspot +# last revision: Sep 23th 2024 +# +# How to invoke this script? +# build_alcf_aurora_icpx.sh # build all the variants assuming the current directory is the source directory. +# build_alcf_aurora_icpx.sh # build all the variants with a given source directory +# build_alcf_aurora_icpx.sh # build all the variants with a given source directory and install to + +for module_name in oneapi/release oneapi/eng-compiler +do + if module is-loaded $module_name ; then module unload $module_name; fi +done + +module load oneapi/eng-compiler/2024.07.30.002 +module load cmake hdf5/1.14.3 boost/1.84.0 +module list >& module_list.txt + +echo "**********************************" +echo '$ icpx -v' +icpx -v +echo "**********************************" + +TYPE=Release +Machine=aurora +Compiler=icpx20240629 + +if [[ $# -eq 0 ]]; then + source_folder=`pwd` +elif [[ $# -eq 1 ]]; then + source_folder=$1 +else + source_folder=$1 + install_folder=$2 +fi + +if [[ -f $source_folder/CMakeLists.txt ]]; then + echo Using QMCPACK source directory $source_folder +else + echo "Source directory $source_folder doesn't contain CMakeLists.txt. Pass QMCPACK source directory as the first argument." + exit +fi + +for name in offload_sycl_real_MP offload_sycl_real offload_sycl_cplx_MP offload_sycl_cplx \ + cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx +do + +CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DMPIEXEC_PREFLAGS='--cpu-bind;depth;-d;8'" +unset CMAKE_CXX_FLAGS + +if [[ $name == *"cplx"* ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON" +fi + +if [[ $name == *"_MP"* ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON" +fi + +if [[ $name == *"offload"* ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DQMC_GPU_ARCHS=pvc" + CMAKE_CXX_FLAGS="-mllvm -vpo-paropt-atomic-free-reduction-slm=true" +fi + +if [[ $name == *"sycl"* ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_SYCL=ON" +fi + +folder=build_${Machine}_${Compiler}_${name} + +if [[ -v install_folder ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DCMAKE_INSTALL_PREFIX=$install_folder/$folder" +fi + +echo "**********************************" +echo "folder $folder" +echo "CMAKE_FLAGS: $CMAKE_FLAGS" +echo "CMAKE_CXX_FLAGS: $CMAKE_CXX_FLAGS" +echo "**********************************" + +mkdir $folder +cd $folder + +if [ ! -f CMakeCache.txt ] ; then +cmake $CMAKE_FLAGS -DCMAKE_CXX_FLAGS="$CMAKE_CXX_FLAGS" \ + -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx $source_folder +fi + +if [[ -v install_folder ]]; then + make -j16 install && chmod -R -w $install_folder/$folder +else + make -j16 +fi + +cd .. + +echo +done diff --git a/config/build_alcf_polaris_Clang.sh b/config/build_alcf_polaris_Clang.sh index 947e1c6732..7d65906b77 100755 --- a/config/build_alcf_polaris_Clang.sh +++ b/config/build_alcf_polaris_Clang.sh @@ -1,23 +1,23 @@ #!/bin/bash # This recipe is intended for ALCF Polaris https://www.alcf.anl.gov/polaris # It builds all the varaints of QMCPACK in the current directory -# last revision: Sep 5th 2023 +# last revision: April 25th 2024 # # How to invoke this script? # build_alcf_polaris_Clang.sh # build all the variants assuming the current directory is the source directory. # build_alcf_polaris_Clang.sh # build all the variants with a given source directory # build_alcf_polaris_Clang.sh # build all the variants with a given source directory and install to +module use /soft/modulefiles +module load spack-pe-base cmake module load mpiwrappers/cray-mpich-llvm llvm/release-17.0.0 -module load cudatoolkit-standalone/11.2.2 -module load cray-fftw/3.3.8.13 -module load cray-hdf5-parallel/1.12.1.3 -module load cmake/3.23.2 +module load cudatoolkit-standalone/12.3.2 +module load craype-x86-milan cray-fftw cray-hdf5-parallel export BOOST_ROOT=/soft/applications/qmcpack/boost_1_79_0 export CMAKE_PREFIX_PATH=/soft/libraries/openblas/0.3.20-omp:$CMAKE_PREFIX_PATH -module list >& module_list.txt +module list 2>&1 | tee module_list.txt echo "**********************************" echo '$ clang -v' diff --git a/config/build_alcf_sunspot_icpx.sh b/config/build_alcf_sunspot_icpx.sh index 7fd8458f57..55e07597bf 100755 --- a/config/build_alcf_sunspot_icpx.sh +++ b/config/build_alcf_sunspot_icpx.sh @@ -1,21 +1,22 @@ #!/bin/bash # This recipe is intended for ALCF Sunspot https://www.alcf.anl.gov/support-center/aurora-sunspot -# last revision: Jan 8th 2023 +# last revision: June 11th 2024 # # How to invoke this script? # build_alcf_sunspot_icpx.sh # build all the variants assuming the current directory is the source directory. # build_alcf_sunspot_icpx.sh # build all the variants with a given source directory # build_alcf_sunspot_icpx.sh # build all the variants with a given source directory and install to -module load spack libxml2 cmake -module load cray-hdf5 -module load oneapi/eng-compiler/2023.05.15.007 +for module_name in oneapi/release oneapi/eng-compiler +do + if module is-loaded $module_name ; then module unload $module_name; fi +done +module load spack-pe-gcc cmake +module load oneapi/eng-compiler/2024.04.15.002 +module load hdf5/1.14.3 boost/1.84.0 module list >& module_list.txt -# edit this line for your own boost header files. -export BOOST_ROOT=/home/yeluo/opt/boost_1_80_0 - echo "**********************************" echo '$ icpx -v' icpx -v @@ -23,7 +24,7 @@ echo "**********************************" TYPE=Release Machine=sunspot -Compiler=icpx20230613 +Compiler=icpx20240227 if [[ $# -eq 0 ]]; then source_folder=`pwd` @@ -57,7 +58,7 @@ if [[ $name == *"_MP"* ]]; then fi if [[ $name == *"offload"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON" + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DQMC_GPU_ARCHS=pvc" CMAKE_CXX_FLAGS="-mllvm -vpo-paropt-atomic-free-reduction-slm=true" fi diff --git a/config/build_alcf_theta.sh b/config/build_alcf_theta.sh deleted file mode 100644 index 91c93bc645..0000000000 --- a/config/build_alcf_theta.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# This recipe is intended for ALCF Theta https://www.alcf.anl.gov/theta -# It builds all the varaints of QMCPACK in the current directory -# last revision: Sep 12, 2023 -# -# How to invoke this script? -# build_alcf_theta.sh # build all the variants assuming the current directory is the source directory. -# build_alcf_theta.sh # build all the variants with a given source directory -# build_alcf_theta.sh # build all the variants with a given source directory and install to - -#Note: the Intel classic compiler on Theta was too old, use GCC with MKL. - -module unload PrgEnv-intel -module load PrgEnv-gnu -module unload cray-libsci -module load cray-hdf5-parallel -module load cmake/3.20.4 -module load intel/19.1.2.254 - -module list >& module_list.txt - -export CC=cc -export CXX=CC -export BOOST_ROOT=/soft/libraries/boost/1.64.0/gnu -export CRAYPE_LINK_TYPE=dynamic - -#TYPE=RelWithDebInfo -TYPE=Release -Compiler=GCC - -if [[ $# -eq 0 ]]; then - source_folder=`pwd` -elif [[ $# -eq 1 ]]; then - source_folder=$1 -else - source_folder=$1 - install_folder=$2 -fi - - -CURRENT_FOLDER=`pwd` - -for name in real real_MP cplx cplx_MP -do - -CMAKE_FLAGS="-D CMAKE_SYSTEM_NAME=CrayLinuxEnvironment -D CMAKE_BUILD_TYPE=$TYPE -D MPIEXEC_EXECUTABLE=/bin/sh -D MPIEXEC_NUMPROC_FLAG=$source_folder/tests/scripts/aprunhelper.sh" - -if [[ $name == *"cplx"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=ON" -fi - -if [[ $name == *"_MP"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_MIXED_PRECISION=ON" -fi - -folder=build_KNL_${Compiler}_${name} - -if [[ -v install_folder ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -DCMAKE_INSTALL_PREFIX=$install_folder/$folder" -fi - -echo "**********************************" -echo "$folder" -echo "$CMAKE_FLAGS" -echo "**********************************" -mkdir $folder -cd $folder -if [ ! -f CMakeCache.txt ] ; then - cmake $CMAKE_FLAGS $source_folder -fi -if [[ -v install_folder ]]; then - make -j16 install && chmod -R -w $install_folder/$folder -else - make -j16 -fi -cd .. - -echo -done diff --git a/config/build_anl_lcrc_improv.sh b/config/build_anl_lcrc_improv.sh new file mode 100755 index 0000000000..3c4736785e --- /dev/null +++ b/config/build_anl_lcrc_improv.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +################################################################ +## * This script builds available configurations of QMCPACK ## +## on Improv, at Argonne National Lab. ## +## ## +## * Execute this script in trunk/ ## +## ./config/build_lcrc_improv.sh ## +## ## +## Last verified: May 15, 2024 ## +################################################################ + +# module files resulting from module imports below: +# Currently Loaded Modulefiles: +# 1) xalt/3.0.1 (S) 2) cmake/3.27.4 3) .binutils/2.41 (H) +# 4) gcc/13.2.0 5) openmpi/5.0.2-gcc-13.2.0 + +source $MODULESHOME/init/bash +module purge +module load cmake/3.27.4 +module load gcc/13.2.0 +module load openmpi/5.0.2-gcc-13.2.0 +module list + +SPACK_ROOT=/gpfs/fs1/soft/improv/software/spack-built/linux-rhel8-zen3/gcc-13.2.0 +export CMAKE_PREFIX_PATH=$SPACK_ROOT/fftw-3.3.10-x5237xr:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=$SPACK_ROOT/libxml2-2.10.3-xkoaaap:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=$SPACK_ROOT/hdf5-1.14.3-6qo7t6e:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=$SPACK_ROOT/openblas-0.3.26-blohgyt:$CMAKE_PREFIX_PATH + +CMAKE_FLAGS="-DENABLE_PPCONVERT=0 \ + -DBOOST_ROOT=/gpfs/fs1/soft/improv/software/custom-built/boost/1.84.0 \ + -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_CXX_COMPILER=mpicxx" + +# Configure and build cpu real. +echo "" +echo "" +echo "building QMCPACK for cpu real for Improv" +mkdir -p build_improv_cpu_real +cd build_improv_cpu_real +cmake $CMAKE_FLAGS .. +make -j 32 +cd .. +ln -sf ./build_improv_cpu_real/bin/qmcpack ./qmcpack_improv_cpu_real + +# Configure and build cpu complex. +echo "" +echo "" +echo "building QMCPACK for cpu complex for Improv" +mkdir -p build_improv_cpu_complex +cd build_improv_cpu_complex +cmake -DQMC_COMPLEX=1 $CMAKE_FLAGS .. +make -j 32 +cd .. +ln -sf ./build_improv_cpu_complex/bin/qmcpack_complex ./qmcpack_improv_cpu_complex + diff --git a/config/build_nersc_cori.sh b/config/build_nersc_cori.sh deleted file mode 100755 index b7fc0344fd..0000000000 --- a/config/build_nersc_cori.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash - -################################################################ -## * This script builds available configurations of QMCPACK ## -## on NERSC Cori. ## -## ## -## * Execute this script in trunk/ ## -## ./config/build_nersc_cori_hsw.sh ## -## ## -## Last verified: Apr 19, 2021 ## -################################################################ - - - -export CRAYPE_LINK_TYPE=dynamic - -module unload cray-libsci -module load boost/1.70.0 -module load cray-hdf5-parallel -module load cmake -module load gcc/7.3.0 # Make C++ 14 standard library available to the Intel compiler -module list - -# module files resulting from module imports above: -#Currently Loaded Modulefiles: -# 1) modules/3.2.11.4 10) dmapp/7.1.1-7.0.1.1_4.72__g38cf134.ari 19) craype-haswell -# 2) altd/2.0 11) gni-headers/5.0.12.0-7.0.1.1_6.46__g3b1768f.ari 20) cray-mpich/7.7.10 -# 3) darshan/3.2.1 12) xpmem/2.2.20-7.0.1.1_4.28__g0475745.ari 21) craype-hugepages2M -# 4) craype-network-aries 13) job/2.2.4-7.0.1.1_3.55__g36b56f4.ari 22) boost/1.70.0 -# 5) intel/19.0.3.199 14) dvs/2.12_2.2.167-7.0.1.1_17.11__ge473d3a2 23) cray-hdf5-parallel/1.10.5.2 -# 6) craype/2.6.2 15) alps/6.6.58-7.0.1.1_6.30__g437d88db.ari 24) cmake/3.21.3 -# 7) udreg/2.3.2-7.0.1.1_3.61__g8175d3d.ari 16) rca/2.2.20-7.0.1.1_4.74__g8e3fb5b.ari 25) gcc/7.3.0 -# 8) ugni/6.0.14.0-7.0.1.1_7.63__ge78e5b0.ari 17) atp/2.1.3 -# 9) pmi/5.0.14 18) PrgEnv-intel/6.0.5 - - -# Haswell CPU Complex -mkdir build_nersc_cori_hsw_cmplx -cd build_nersc_cori_hsw_cmplx -cmake -DQMC_SYMLINK_TEST_FILES=0 -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DQMC_COMPLEX=1 .. -nice make -j 8 -cd .. -appendage= -if [ -f "build_nersc_cori_hsw_cmplx/bin/qmcpack_complex" ]; then - appendage=_complex -fi -ls -l build_nersc_cori_hsw_cmplx/bin/qmcpack${appendage} -ln -sf ./build_nersc_cori_hsw_cmplx/bin/qmcpack${appendage} ./qmcpack_nersc_cori_cpu_hsw_comp - - -# Haswell CPU Real -mkdir build_nersc_cori_hsw -cd build_nersc_cori_hsw -cmake -DQMC_SYMLINK_TEST_FILES=0 -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DQMC_COMPLEX=0 .. -nice make -j 8 -ls -l bin/qmcpack -cd .. -ln -sf ./build_nersc_cori_hsw/bin/qmcpack ./qmcpack_nersc_cori_cpu_hsw - -# Swap modules for KNL recipe -module swap craype-haswell craype-mic-knl -module list - -# module files resulting from module imports above: -#Currently Loaded Modulefiles: -# 1) modules/3.2.11.4 6) craype/2.6.2 11) gni-headers/5.0.12.0-7.0.1.1_6.43__g3b1768f.ari 16) rca/2.2.20-7.0.1.1_4.65__g8e3fb5b.ari 21) craype-hugepages2M -# 2) altd/2.0 7) udreg/2.3.2-7.0.1.1_3.52__g8175d3d.ari 12) xpmem/2.2.20-7.0.1.1_4.23__g0475745.ari 17) atp/2.1.3 22) boost/1.70.0 -# 3) darshan/3.2.1 8) ugni/6.0.14.0-7.0.1.1_7.54__ge78e5b0.ari 13) job/2.2.4-7.0.1.1_3.50__g36b56f4.ari 18) PrgEnv-intel/6.0.5 23) cray-hdf5-parallel/1.10.5.2 -# 4) craype-network-aries 9) pmi/5.0.14 14) dvs/2.12_2.2.167-7.0.1.1_17.6__ge473d3a2 19) craype-mic-knl 24) cmake/3.14.4 -# 5) intel/19.0.3.199 10) dmapp/7.1.1-7.0.1.1_4.64__g38cf134.ari 15) alps/6.6.58-7.0.1.1_6.22__g437d88db.ari 20) cray-mpich/7.7.10 25) gcc/7.3.0 - -# KNL CPU Complex -mkdir build_nersc_cori_knl_cmplx -cd build_nersc_cori_knl_cmplx -cmake -DQMC_SYMLINK_TEST_FILES=0 -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DQMC_COMPLEX=1 .. -nice make -j 8 -cd .. -appendage= -if [ -f "build_nersc_cori_knl_cmplx/bin/qmcpack_complex" ]; then - appendage=_complex -fi -ls -l build_nersc_cori_knl_cmplx/bin/qmcpack${appendage} -ln -sf ./build_nersc_cori_knl_cmplx/bin/qmcpack${appendage} ./qmcpack_nersc_cori_cpu_knl_comp - -# KNL CPU Real -mkdir build_nersc_cori_knl -cd build_nersc_cori_knl -cmake -DQMC_SYMLINK_TEST_FILES=0 -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DQMC_COMPLEX=0 .. -nice make -j 8 -ls -l bin/qmcpack -cd .. -ln -sf ./build_nersc_cori_knl/bin/qmcpack ./qmcpack_nersc_cori_cpu_knl diff --git a/config/build_nersc_perlmutter_Clang.sh b/config/build_nersc_perlmutter_Clang.sh index cea2d37c12..ca82baedf6 100755 --- a/config/build_nersc_perlmutter_Clang.sh +++ b/config/build_nersc_perlmutter_Clang.sh @@ -1,7 +1,7 @@ #!/bin/bash # This recipe is intended for NERSC Perlmutter https://docs.nersc.gov/systems/perlmutter # It builds all the varaints of QMCPACK in the current directory -# last revision: Aug 12th 2023 +# last revision: Mar 18th 2024 # # How to invoke this script? # build_nersc_perlmutter_Clang.sh # build all the variants assuming the current directory is the source directory. @@ -10,13 +10,17 @@ module load PrgEnv-gnu module load cray-libsci -CRAY_LIBSCI_LIB=$CRAY_LIBSCI_PREFIX_DIR/lib/libsci_gnu_mp.so - -module load PrgEnv-llvm/0.1 llvm/16 -module load cray-fftw/3.3.10.3 -module load cray-hdf5-parallel/1.12.2.3 +CRAY_LIBSCI_LIB=$CRAY_PE_LIBSCI_PREFIX_DIR/lib/libsci_gnu_mp.so +module unload PrgEnv-gnu +module load craype cray-mpich +module load cray-fftw +module load cray-hdf5-parallel module load cmake/3.24.3 +module use /global/common/software/nersc/n9/llvm/modules +module load llvm/17.0.6-gpu +export MPICH_CC=clang +export MPICH_CXX=clang++ echo "**********************************" echo '$ clang -v' @@ -25,7 +29,7 @@ echo "**********************************" TYPE=Release Machine=perlmutter -Compiler=Clang16 +Compiler=Clang17 if [[ $# -eq 0 ]]; then source_folder=`pwd` diff --git a/config/build_nrel_kestrel.sh b/config/build_nrel_kestrel.sh new file mode 100755 index 0000000000..7fd5489e91 --- /dev/null +++ b/config/build_nrel_kestrel.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +##################################################################################### +## * This script builds available configurations of QMCPACK ## +## on kestrel at NREL ## +## ## +## * Execute this script in qmcpack git top level directory ## +## ./config/build_nrel_kestrel.sh ## +## ## +## Last verified: July 23, 2024 ## +##################################################################################### + +# module files resulting from module imports below: +# Currently Loaded Modules: +# 1) intel-oneapi-compilers/2023.2.0 2) intel-oneapi-mpi/2021.11.0-intel +# 3) gcc/13.1.0 4) intel-oneapi-tbb/2021.10.0-intel +# 5) intel-oneapi-mkl/2023.2.0-intel 6) boost/1.84.0-intel-oneapi-mpi-intel +# 7) fftw/3.3.10-intel-oneapi-mpi-intel 8) hdf5/1.14.3-intel-oneapi-mpi-intel +# 9) curl/8.6.0 10) cmake/3.27.9 + +source $MODULESHOME/init/bash +module purge +module load intel-oneapi-compilers/2023.2.0 +module load intel-oneapi-mpi/2021.11.0-intel +module load gcc/13.1.0 +module load intel-oneapi-mkl/2023.2.0-intel +module load boost/1.84.0-intel-oneapi-mpi-intel +module load fftw/3.3.10-intel-oneapi-mpi-intel +module load hdf5/1.14.3-intel-oneapi-mpi-intel +module load cmake/3.27.9 + +module list + +CMAKE_FLAGS="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx" + +# Configure and build cpu real. +echo "" +echo "" +echo "building QMCPACK for cpu real for Kestrel" +mkdir -p build_cpu_real +cd build_cpu_real +cmake $CMAKE_FLAGS .. +make -j 32 +cd .. +ln -sf ./build_cpu_real/bin/qmcpack ./qmcpack_cpu_real + +# Configure and build cpu complex. +echo "" +echo "" +echo "building QMCPACK for cpu complex for Kestrel" +mkdir -p build_cpu_complex +cd build_cpu_complex +cmake -DQMC_COMPLEX=1 $CMAKE_FLAGS .. +make -j 32 +cd .. +ln -sf ./build_cpu_complex/bin/qmcpack_complex ./qmcpack_cpu_complex + diff --git a/config/build_olcf_frontier_ROCm.sh b/config/build_olcf_frontier_ROCm.sh index 5b2bb39f5c..a4e4f11b43 100755 --- a/config/build_olcf_frontier_ROCm.sh +++ b/config/build_olcf_frontier_ROCm.sh @@ -1,18 +1,19 @@ #!/bin/bash -# Build script for Frontier and its test and development system Crusher at OLCF -# See https://github.com/QMCPACK/qmcpack/pull/4123 for more details on the module file if needed +# Build script for Frontier +# It builds all the varaints of QMCPACK in the current directory +# last revision: Aug 19th 2024 -echo "Loading QMCPACK dependency modules for crusher" +echo "Loading QMCPACK dependency modules for frontier" for module_name in PrgEnv-gnu PrgEnv-cray PrgEnv-amd PrgEnv-gnu-amd PrgEnv-cray-amd \ - amd amd-mixed gcc gcc-mixed cce cce-mixed + amd amd-mixed gcc gcc-mixed gcc-native cce cce-mixed rocm do if module is-loaded $module_name ; then module unload $module_name; fi done -module load PrgEnv-amd amd/5.7.0 +module load PrgEnv-amd amd/6.0.0 +module unload darshan-runtime unset HIP_PATH # it messed up clang as a HIP compiler. -module load craype/2.7.16 # hard-coded version. 2.7.19/20/21 cause CC segfault. module unload cray-libsci module load cmake/3.22.2 module load cray-fftw @@ -25,7 +26,7 @@ export BOOST_ROOT=/ccs/proj/mat151/opt/boost/boost_1_81_0 module list >& module_list.txt TYPE=Release -Compiler=rocm570 +Compiler=rocm600 if [[ $# -eq 0 ]]; then source_folder=`pwd` @@ -79,7 +80,7 @@ mkdir $folder cd $folder if [ ! -f CMakeCache.txt ] ; then cmake $CMAKE_FLAGS -DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment \ - -DCMAKE_C_FLAGS=--gcc-toolchain=/opt/cray/pe/gcc/11.2.0/snos -DCMAKE_CXX_FLAGS=--gcc-toolchain=/opt/cray/pe/gcc/11.2.0/snos \ + -DCMAKE_CXX_FLAGS="-add-runpath" \ $source_folder fi diff --git a/config/build_olcf_summit_Clang.sh b/config/build_olcf_summit_Clang.sh index ddfe86cd8f..800235fe58 100755 --- a/config/build_olcf_summit_Clang.sh +++ b/config/build_olcf_summit_Clang.sh @@ -1,7 +1,7 @@ #!/bin/bash # This recipe is intended for OLCF Summit https://www.olcf.ornl.gov/summit/ # It builds all the varaints of QMCPACK in the current directory -# last revision: Aug 29th 2022 +# last revision: Jun 25th 2024 # # How to invoke this script? # build_olcf_summit_Clang.sh # build all the variants assuming the current directory is the source directory. @@ -10,31 +10,29 @@ echo "Purging current module set" module purge +module use /sw/summit/modulefiles/ums/stf010/Core echo "Loading QMCPACK dependency modules for summit" -module load gcc/9.3.0 +module load gcc/12.2.0 module load spectrum-mpi module load cmake module load git -module load cuda/11.0.3 +module load cuda/12.2.0 module load essl module load netlib-lapack -module load hdf5/1.10.7 +module load hdf5/1.14.3 module load fftw -module load boost/1.76.0 -module load python/3.8-anaconda3 -# private module until OLCF provides a new llvm build -if [[ ! -d /gpfs/alpine/mat151/world-shared/opt/modules ]] ; then - echo "Required module folder /gpfs/alpine/mat151/world-shared/opt/modules not found!" - exit 1 -fi -module use /gpfs/alpine/mat151/world-shared/opt/modules -module load llvm/release-15.0.0-cuda11.0 +module load boost/1.83.0 +module load python/3.11.6 +module load llvm/18.1.6-latest module list >& module_list.txt +export OMPI_CC=clang +export OMPI_CXX=clang++ + TYPE=Release Machine=summit -Compiler=Clang15 +Compiler=Clang18 if [[ $# -eq 0 ]]; then source_folder=`pwd` @@ -56,7 +54,7 @@ for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_ cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx do -CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -DMPIEXEC_EXECUTABLE=`which jsrun` -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off'" +CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -DMPIEXEC_EXECUTABLE=`which jsrun` -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=-disable_gpu_hooks'" if [[ $name == *"cplx"* ]]; then CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON" @@ -67,11 +65,15 @@ if [[ $name == *"_MP"* ]]; then fi if [[ $name == *"offload"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DOFFLOAD_ARCH=sm_70" + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON" fi if [[ $name == *"cuda"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70" + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON" +fi + +if [[ $name == *"offload"* || $name == *"cuda"* ]]; then + CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_GPU_ARCHS=sm_70" fi folder=build_${Machine}_${Compiler}_${name} diff --git a/config/build_ornl_cades_baseline.sh b/config/build_ornl_cades_baseline.sh new file mode 100755 index 0000000000..087d8528c1 --- /dev/null +++ b/config/build_ornl_cades_baseline.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +##################################################################################### +## * This script builds available configurations of QMCPACK ## +## on baseline at ORNL ## +## https://docs.cades.olcf.ornl.gov/baseline_user_guide/baseline_user_guide.html ## +## ## +## * Execute this script in qmcpack git top level directory ## +## ./config/build_ornl_cades_baseline.sh ## +## ## +## Last verified: May 23, 2024 ## +##################################################################################### + +# module files resulting from module imports below: +# Currently Loaded Modules: +# 1) DefApps 2) gcc/12.2.0 3) openmpi/4.0.4 4) boost/1.83.0 5) fftw/3.3.10 +# 6) openblas/0.3.23 7) hdf5/1.14.3 8) cmake/3.26.3 + +source $MODULESHOME/init/bash +module purge +module load DefApps +module load gcc/12.2.0 +module load openmpi/4.0.4 +module load boost/1.83.0 +module load fftw/3.3.10 +module load openblas/0.3.23 +module load hdf5/1.14.3 +module load cmake/3.26.3 + +module list + +CMAKE_FLAGS="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpiCC \ + -DENABLE_TIMERS=1 -DBUILD_AFQMC=0 \ + -DMPIEXEC_EXECUTABLE=/usr/bin/srun -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;--distribution=block:cyclic'" +# If QMC_DATA is available, consider adding e.g. +# -DQMC_DATA=/gpfs/wolf2/cades/mat269/world-shared/pk7/QMC_DATA_FULL -DQMC_PERFORMANCE_NIO_MAX_ATOMS=128 -DQMC_PERFORMANCE_C_MOLECULE_MAX_ATOMS=16 -DQMC_PERFORMANCE_C_GRAPHITE_MAX_ATOMS=16 + +# Configure and build cpu real. +echo "" +echo "" +echo "building QMCPACK for cpu real for Baseline" +mkdir -p build_baseline_cpu_real +cd build_baseline_cpu_real +cmake $CMAKE_FLAGS .. +nice make -j 32 +cd .. +ln -sf ./build_baseline_cpu_real/bin/qmcpack ./qmcpack_baseline_cpu_real + +# Configure and build cpu complex. +echo "" +echo "" +echo "building QMCPACK for cpu complex for Baseline" +mkdir -p build_baseline_cpu_complex +cd build_baseline_cpu_complex +cmake -DQMC_COMPLEX=1 $CMAKE_FLAGS .. +nice make -j 32 +cd .. +ln -sf ./build_baseline_cpu_complex/bin/qmcpack_complex ./qmcpack_baseline_cpu_complex + diff --git a/config/build_tulip.sh b/config/build_tulip.sh deleted file mode 100644 index 324c93bac3..0000000000 --- a/config/build_tulip.sh +++ /dev/null @@ -1,72 +0,0 @@ -# Note: this script needs an OpenMP enabled OpenBLAS which is not avaible from -# modules provided on the machine. For this reason, this script is for archiving purpose. -# last update July 28th, 2021 - -module load PrgEnv-cray -module swap gcc gcc/8.1.0 -module load hdf5/1.10.1 - -if [ `module list 2>&1 | grep "openblas/dynamic" | wc -l` -gt 0 ]; then - echo please module unload openblas/dynamic which is not thread-safe for OpenMP - exit -fi - -module use /home/users/coe0097/opt/privatemodules -module load openblas-omp - -export FFTW_HOME=/cm/shared/apps/fftw/openmpi/gcc/64/3.3.8 -export BOOST_ROOT=/cm/shared/opt/boost/1.72.0 - -for build in V100_Clang_offload_real_MP V100_Clang_offload_cuda_real_MP \ - V100_Cray_offload_real_MP V100_Cray_offload_cuda_real_MP \ - MI100_Cray_offload_real_MP MI100_Cray_offload_cplx_MP \ - MI60_Cray_offload_real_MP -do -echo -echo "###################################" -echo "Building $build" -echo "###################################" - -if [[ $build == *"V100_Clang"* ]]; then - module_hw=llvm/main-20210726 - module load cuda11.2 - CTEST_FLAGS="-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DQMC_MPI=0" - CTEST_FLAGS="$CTEST_FLAGS -DENABLE_OFFLOAD=ON -DOFFLOAD_ARCH=sm_70 -DUSE_OBJECT_TARGET=ON" -elif [[ $build == *"V100_Cray"* ]]; then - module_hw=craype-accel-nvidia70 - module load cuda11.2 - CTEST_FLAGS="-DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DENABLE_OFFLOAD=ON" -elif [[ $build == *"MI100"* ]]; then - module_hw=craype-accel-amd-gfx908 - module load rocm - CTEST_FLAGS="-DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DENABLE_OFFLOAD=ON" -elif [[ $build == *"MI60"* ]]; then - module_hw=craype-accel-amd-gfx906 - module load rocm - CTEST_FLAGS="-DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -DENABLE_OFFLOAD=ON" -fi - -if [[ $build == *"cuda"* ]]; then - CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDAToolkit_ROOT=$CUDA_ROOT -DCMAKE_CUDA_HOST_COMPILER=`which g++`" -fi - -if [[ $build == *"cplx"* ]]; then - CTEST_FLAGS="$CTEST_FLAGS -DQMC_COMPLEX=ON" -fi - -if [[ $build == *"_MP"* ]]; then - CTEST_FLAGS="$CTEST_FLAGS -DQMC_MIXED_PRECISION=ON" -fi - -echo "CTEST_FLAGS $CTEST_FLAGS" -module load $module_hw -module list - -folder=build_$build -mkdir $folder; cd $folder -cmake $CTEST_FLAGS -DBLA_VENDOR=OpenBLAS \ - -DQMC_DATA=/home/users/coe0097/opt/h5data \ - .. && make -j32 -cd .. -module unload $module_hw -done diff --git a/config/docker/dependencies/ubuntu/clang-latest/Dockerfile b/config/docker/dependencies/ubuntu/clang-latest/Dockerfile deleted file mode 100644 index 947aa37900..0000000000 --- a/config/docker/dependencies/ubuntu/clang-latest/Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -FROM ubuntu:20.04 -LABEL maintainer="williamfgc@yahoo.com" - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get update -y &&\ - apt-get upgrade -y apt-utils &&\ - apt-get install -y gpg wget - -# Dependencies -RUN wget https://apt.kitware.com/kitware-archive.sh &&\ - sh kitware-archive.sh - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install gcc g++ \ - python3 \ - cmake \ - ninja-build \ - libboost-all-dev \ - git \ - libhdf5-serial-dev \ - hdf5-tools \ - libfftw3-dev \ - libopenblas-openmp-dev \ - libxml2-dev \ - sudo \ - curl \ - rsync \ - wget \ - software-properties-common \ - vim \ - -y - -# add the latest clang development -RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key| apt-key add - &&\ - apt-add-repository 'deb http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main' -RUN apt-get update -y &&\ - apt-get install clang-12 clang-tools-12 libomp-12-dev -y - -# must add a user different from root -# to run MPI executables -RUN useradd -ms /bin/bash user -# allow in sudoers to install packages -RUN adduser user sudo -RUN echo "user:user" | chpasswd - -USER user -WORKDIR /home/user diff --git a/config/docker/dependencies/ubuntu/impish/Dockerfile b/config/docker/dependencies/ubuntu/impish/Dockerfile deleted file mode 100644 index c64d0c1dd0..0000000000 --- a/config/docker/dependencies/ubuntu/impish/Dockerfile +++ /dev/null @@ -1,48 +0,0 @@ -FROM ubuntu:impish-20210722 -LABEL maintainer="williamfgc@yahoo.com" - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get update -y &&\ - apt-get upgrade -y apt-utils - -# Dependencies -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install gcc g++ \ - clang \ - clang-format \ - clang-tidy \ - gcovr \ - python3 \ - cmake \ - ninja-build \ - libboost-all-dev \ - git \ - libhdf5-serial-dev \ - hdf5-tools \ - libfftw3-dev \ - libopenblas-openmp-dev \ - libxml2-dev \ - sudo \ - curl \ - rsync \ - wget \ - software-properties-common \ - vim \ - -y - -# Python packages for tests -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install python3-numpy \ - python3-h5py \ - python3-pandas \ - -y - -# must add a user different from root -# to run MPI executables -RUN useradd -ms /bin/bash user -# allow in sudoers to install packages -RUN adduser user sudo -RUN echo "user:user" | chpasswd - -USER user -WORKDIR /home/user diff --git a/config/docker/dependencies/ubuntu22/Dockerfile b/config/docker/dependencies/ubuntu22/Dockerfile deleted file mode 100644 index e95db5efd9..0000000000 --- a/config/docker/dependencies/ubuntu22/Dockerfile +++ /dev/null @@ -1,57 +0,0 @@ -FROM ubuntu:22.04 -LABEL maintainer="williamfgc@yahoo.com" - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get clean &&\ - apt-get update -y &&\ - apt-get upgrade -y apt-utils &&\ - apt-get install -y gpg wget - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install gcc g++ \ - clang \ - clang-format \ - clang-tidy \ - libomp-dev \ - gcovr \ - python3 \ - cmake \ - ninja-build \ - libboost-all-dev \ - git \ - libopenmpi-dev \ - libhdf5-openmpi-dev \ - libhdf5-serial-dev \ - hdf5-tools \ - libfftw3-dev \ - libopenblas-openmp-dev \ - libxml2-dev \ - sudo \ - curl \ - rsync \ - wget \ - software-properties-common \ - vim \ - numdiff \ - -y - -# Python packages for tests -RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install python3-numpy \ - python3-h5py \ - python3-pandas \ - python3-pip \ - -y - -RUN export DEBIAN_FRONTEND=noninteractive &&\ - pip3 install cif2cell - -# must add a user different from root -# to run MPI executables -RUN useradd -ms /bin/bash user -# allow in sudoers to install packages -RUN adduser user sudo -RUN echo "user:user" | chpasswd - -USER user -WORKDIR /home/user diff --git a/config/docker/dependencies/ubuntu/openmpi/Dockerfile b/config/docker/dependencies/ubuntu22/serial/Dockerfile similarity index 55% rename from config/docker/dependencies/ubuntu/openmpi/Dockerfile rename to config/docker/dependencies/ubuntu22/serial/Dockerfile index da8c4b9c7f..34c95a5bbb 100644 --- a/config/docker/dependencies/ubuntu/openmpi/Dockerfile +++ b/config/docker/dependencies/ubuntu22/serial/Dockerfile @@ -1,5 +1,4 @@ -FROM ubuntu:20.04 -LABEL maintainer="williamfgc@yahoo.com" +FROM ubuntu:22.04 RUN export DEBIAN_FRONTEND=noninteractive &&\ apt-get clean &&\ @@ -12,18 +11,17 @@ RUN wget https://apt.kitware.com/kitware-archive.sh &&\ sh kitware-archive.sh RUN export DEBIAN_FRONTEND=noninteractive &&\ - apt-get install gcc g++ \ - clang \ - clang-format \ - clang-tidy \ + apt-get install gcc-12 g++-12 \ + clang-14 \ + clang-format-14 \ + clang-tidy-14 \ + libomp-14-dev \ gcovr \ python3 \ cmake \ ninja-build \ libboost-all-dev \ git \ - libopenmpi-dev \ - libhdf5-openmpi-dev \ libhdf5-serial-dev \ hdf5-tools \ libfftw3-dev \ @@ -49,8 +47,20 @@ RUN export DEBIAN_FRONTEND=noninteractive &&\ RUN export DEBIAN_FRONTEND=noninteractive &&\ pip3 install cif2cell +# add gcc-12 as gcc +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 + +# add clang-14 as clang +RUN update-alternatives --install /usr/bin/clang clang /usr/bin/clang-14 100 && \ + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-14 100 + +# add clang-format and clang-tidy as well as libomp +RUN update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-14 100 && \ + update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-14 100 && \ + update-alternatives --install /usr/bin/clang-tidy-diff.py clang-tidy-diff.py /usr/bin/clang-tidy-diff-14.py 100 + # must add a user different from root -# to run MPI executables RUN useradd -ms /bin/bash user # allow in sudoers to install packages RUN adduser user sudo diff --git a/docs/additional_tools.rst b/docs/additional_tools.rst index ff211d5925..273ecff613 100644 --- a/docs/additional_tools.rst +++ b/docs/additional_tools.rst @@ -544,9 +544,11 @@ Supported codes and CCSD, and for both isolated systems and periodic boundary conditions. PySCF can be downloaded from https://github.com/sunqm/pyscf. Many examples and tutorials can be found on the PySCF website, and all - types of single determinants calculations are compatible with , thanks - to active support from the authors of PySCF. A few additional steps are - necessary to generate an output readable by ``convert4qmc``. + types of single determinants calculations are compatible with QMCPACK, + thanks to active support from the authors of PySCF. Additionally, support + for multideterminant trial wave functions generated through the CASSCF or + CASCI modules are also supported. A few additional steps are necessary to + generate an output readable by ``convert4qmc``. This example shows how to run a Hartree-Fock calculation for the :math:`LiH` dimer molecule from PySCF and convert the wavefunction for QMCPACK. diff --git a/docs/bibs/methods.bib b/docs/bibs/methods.bib index 52177675b6..76293ba879 100644 --- a/docs/bibs/methods.bib +++ b/docs/bibs/methods.bib @@ -141,3 +141,69 @@ @article{Melton2016-2 Volume = {144}, Year = {2016} } + +@article{Sorella2001, + title = {Generalized Lanczos algorithm for variational quantum Monte Carlo}, + author = {Sorella, Sandro}, + journal = {Phys. Rev. B}, + volume = {64}, + issue = {2}, + pages = {024512}, + numpages = {16}, + year = {2001}, + month = {Jun}, + publisher = {American Physical Society}, + doi = {10.1103/PhysRevB.64.024512}, + url = {https://link.aps.org/doi/10.1103/PhysRevB.64.024512} +} + +@article{Casula2004, + author = {Casula, Michele and Attaccalite, Claudio and Sorella, Sandro}, + title = "{Correlated geminal wave function for molecules: An efficient resonating valence bond approach}", + journal = {The Journal of Chemical Physics}, + volume = {121}, + number = {15}, + pages = {7110-7126}, + year = {2004}, + month = {10}, + abstract = "{We show that a simple correlated wave function, obtained by applying a Jastrow correlation term to an antisymmetrized geminal power, based upon singlet pairs between electrons, is particularly suited for describing the electronic structure of molecules, yielding a large amount of the correlation energy. The remarkable feature of this approach is that, in principle, several resonating valence bonds can be dealt simultaneously with a single determinant, at a computational cost growing with the number of electrons similar to more conventional methods, such as Hartree-Fock or density functional theory. Moreover we describe an extension of the stochastic reconfiguration method, which was recently introduced for the energy minimization of simple atomic wave functions. Within this extension the atomic positions can be considered as further variational parameters, which can be optimized together with the remaining ones. The method is applied to several molecules from Li2 to benzene by obtaining total energies, bond lengths and binding energies comparable with much more demanding multiconfiguration schemes.}", + issn = {0021-9606}, + doi = {10.1063/1.1794632}, + url = {https://doi.org/10.1063/1.1794632}, + eprint = {https://pubs.aip.org/aip/jcp/article-pdf/121/15/7110/19183362/7110\_1\_online.pdf}, +} + +@article{Neuscamman2012, + title = {Optimizing large parameter sets in variational quantum Monte Carlo}, + author = {Neuscamman, Eric and Umrigar, C. J. and Chan, Garnet Kin-Lic}, + journal = {Phys. Rev. B}, + volume = {85}, + issue = {4}, + pages = {045103}, + numpages = {6}, + year = {2012}, + month = {Jan}, + publisher = {American Physical Society}, + doi = {10.1103/PhysRevB.85.045103}, + url = {https://link.aps.org/doi/10.1103/PhysRevB.85.045103} +} + +@article{Sorella2007, + author = {Sorella, Sandro and Casula, Michele and Rocca, Dario}, + title = "{Weak binding between two aromatic rings: Feeling the van der Waals attraction by quantum Monte Carlo methods}", + journal = {The Journal of Chemical Physics}, + volume = {127}, + number = {1}, + pages = {014105}, + year = {2007}, + month = {07}, + abstract = "{We report a systematic study of the weak chemical bond between two benzene molecules. We first show that it is possible to obtain a very good description of the C2 dimer and the benzene molecule by using pseudopotentials for the chemically inert 1s electrons and a resonating valence bond wave function as a variational ansatz, expanded on a relatively small Gaussian basis set. We employ an improved version of the stochastic reconfiguration technique to optimize the many-body wave function, which is the starting point for highly accurate simulations based on the lattice regularized diffusion Monte Carlo method. This projection technique provides a rigorous variational upper bound for the total energy, even in the presence of pseudopotentials, and substantially improves the accuracy of the trial wave function, which already yields a large fraction of the dynamical and nondynamical electron correlation. We show that the energy dispersion of two benzene molecules in the parallel displaced geometry is significantly deeper than the face-to-face configuration. However, contrary to previous studies based on post-Hartree-Fock methods, the binding energy remains weak (≃2kcal∕mol) also in this geometry, and its value is in agreement with the most accurate and recent experimental findings [H. Krause et al., Chem. Phys. Lett. 184, 411 (1991)].}", + issn = {0021-9606}, + doi = {10.1063/1.2746035}, + url = {https://doi.org/10.1063/1.2746035}, + eprint = {https://pubs.aip.org/aip/jcp/article-pdf/doi/10.1063/1.2746035/15399401/014105\_1\_online.pdf}, +} + + + + diff --git a/docs/developing.rst b/docs/developing.rst index ba6abd0507..825ea7b7e2 100644 --- a/docs/developing.rst +++ b/docs/developing.rst @@ -914,6 +914,49 @@ An example of the second approach is } } +Walker +------ +.. note:: Batched Version Documentation + The following documentation section describes the code design and behavior used when ``driver_version == batch`` at runtime. + +Lightweight representation of a markov chain walker's state. It is managed during each QMC driver section by ``MCPopulation``. Between sections it is stored in the WalkerConfigurations container class. + +Walker Identifiers (walker_id) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The ``Walker::walker_id_`` and ``Walker::parent_id_`` allow logging walkers for later constructing trajectories etc. The basic data relations involved are shown in :numref:`fig1`. In each QMC section each walker ID will be unique and is generated using the equation +.. math:: + :label: eq_walker_id + walker_id = walker_id = num_walkers_created_++ * num_ranks_ + rank_ + 1 + +where ``num_walkers_created_`` is a member variable of the sole ```MCPopulation`` object on the rank and initially set to 0. Each walkers ``parent_id`` is set to 0 when it is constructed and is assigned to ``walker_id`` of the walker it is transferred or copied from. If that assignment is from previous section's or run's ``WalkerConfigurations`` object then the value of the ``Walker::getWalkerID()`` is multiplied by -1. If the Walker's initial configuration comes from the golden particle set the parent_id will be 0. + +.. _fig1: +.. figure:: /uml/WalkerID_chen.pdf + :width: 400 + :align: center + +During DMC branching and load-balancing, there are two distinct mechanisms by which new walkers appear on each rank. +One is when a walker is transferred from another rank, the other is when walkers of multiplicity >= 2 are split. +For a given walker, both of these mechanisms potentially occur in order within each step, transfer first and then split. + +During the branching stage multiplicity of a walker is derived from its weight. +Walker multiplicity summed over all walkers gives the population of walkers for the next step. +Based on the total multiplicity on each rank, the highest multiplicity walkers on overpopulated ranks are fully or partially sent to underpopulated ranks for optimal load balance. +When possible rank multiplicities are balanced by transferring fewer walkers with more than one unit of multiplicity for minimized transfer traffic. +This unit is unfortunately called 'copy' in the source code but it is simply the multiplicity that the walker will have after it is unpacked on the receiving rank. +That amount of multiplicity is removed from the walker on the sending rank. +When walker transfer happens, the receiving walker overwrites its ``parent_id`` with the recieved value of ``walker_id`` before assigning a new ID to its ``walker_id``. +The multiplicity of the receiving walker is set to the multiplicity that sending walker lost. +Walkers with multiplicity < 1 are removed before transfers for creating vacant receiving walkers and after transfers for removing fully displaced walkers. + +Each rank can still carry walkers with multiplicity >= 2 at this point. +Some of these maybe exist before the population balancing and some may have been received. +To achieve optimal sampling ergodicity, all the high multiplicity walkers will follow one trajectory for each unit of multiplicity they have which means they need to become independent replicas. +For each unit of multiplicity >= 2, a new walker is spawned. The ``parent_id`` of each spawned walker is assigned to the ``walker_id`` of the original high multiplicity walker. +At the end of this process all walkers have multiplicity == 1. They keep their ``walker_id``'s from spawn time. + +The overarching rule of ``parent_id`` is when a newly active walker is transferred or split from an older walker, the older walker's ``walker_id`` becomes the new walker's ``parent_id`` and the new walker's ``walker_id`` is the global unique ID that it was spawned with. + Wavefunction ------------ diff --git a/docs/external_tools.rst b/docs/external_tools.rst index 499b0b0b60..8940eb025f 100644 --- a/docs/external_tools.rst +++ b/docs/external_tools.rst @@ -25,6 +25,12 @@ In general: These set the basic flags required to build with either of these sanitizer libraries which are mutually exclusive. Depending on your system and linker, these may be incompatible with the "Release" build, so set ``-DCMAKE_BUILD_TYPE=Debug`` or ``-DCMAKE_BUILD_TYPE=RelWithDebInfo``. They are tested on GitHub Actions CI using deterministic tests ``ctest -L deterministic`` (currently ubsan). See the following links for additional information on use, run time, and build options of the sanitizers: https://clang.llvm.org/docs/AddressSanitizer.html & https://clang.llvm.org/docs/MemorySanitizer.html. +Doxygen source documentation +---------------------------- + +If doxygen and optionally dot from graphviz are detected by CMake, a qmcpack_doxygen target will be defined. ``make qmcpack_doxygen`` will then generate html-based +documentation in the build directory. This target is not enabled by default because generation of the documentation may take several minutes. This automatically +generated documentation includes class diagrams and browsable and searchable lists of all functions, classes, and files. Intel VTune ----------- diff --git a/docs/hamiltonianobservable.rst b/docs/hamiltonianobservable.rst index 2a4bbe52db..2fd71310ab 100644 --- a/docs/hamiltonianobservable.rst +++ b/docs/hamiltonianobservable.rst @@ -305,31 +305,33 @@ the radial functions :math:`V_{\ell}^{\rm SO}` can be included in the pseudopote attributes: - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | **Name** | **Datatype** | **Values** | **Default** | **Description** | - +=============================+==============+=======================+========================+==================================================+ - | ``type``:math:`^r` | text | **pseudo** | | Must be pseudo | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``name/id``:math:`^r` | text | *anything* | PseudoPot | *No current function* | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``source``:math:`^r` | text | ``particleset.name`` | i | Ion ``particleset`` name | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``target``:math:`^r` | text | ``particleset.name`` | ``hamiltonian.target`` | Electron ``particleset`` name | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``pbc``:math:`^o` | boolean | yes/no | yes* | Use Ewald summation | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``forces`` | boolean | yes/no | no | *Deprecated* | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``wavefunction``:math:`^r` | text | ``wavefunction.name`` | invalid | Identify wavefunction | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``format``:math:`^r` | text | xml/table | table | Select file format | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``algorithm``:math:`^o` | text | batched/non-batched | batched | Choose NLPP algorithm | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``DLA``:math:`^o` | text | yes/no | no | Use determinant localization approximation | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ - | ``physicalSO``:math:`^o` | boolean | yes/no | yes | Include the SO contribution in the local energy | - +-----------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +==============================+==============+=======================+========================+==================================================+ + | ``type``:math:`^r` | text | **pseudo** | | Must be pseudo | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``name/id``:math:`^r` | text | *anything* | PseudoPot | *No current function* | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``source``:math:`^r` | text | ``particleset.name`` | i | Ion ``particleset`` name | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``target``:math:`^r` | text | ``particleset.name`` | ``hamiltonian.target`` | Electron ``particleset`` name | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``pbc``:math:`^o` | boolean | yes/no | yes* | Use Ewald summation | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``forces`` | boolean | yes/no | no | *Deprecated* | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``wavefunction``:math:`^r` | text | ``wavefunction.name`` | invalid | Identify wavefunction | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``format``:math:`^r` | text | xml/table | table | Select file format | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``algorithm``:math:`^o` | text | batched/non-batched | batched | Choose NLPP algorithm | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``DLA``:math:`^o` | text | yes/no | no | Use determinant localization approximation | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``physicalSO``:math:`^o` | boolean | yes/no | yes | Include the SO contribution in the local energy | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ + | ``spin_integrator``:math:`^o`| text | exact / simpson | exact | Choose which spin integration technique to use | + +------------------------------+--------------+-----------------------+------------------------+--------------------------------------------------+ Additional information: @@ -374,6 +376,12 @@ Additional information: ``.xml`` file, this flag allows control over whether the SO contribution is included in the local energy. +- **spin_integrator** Selects which spin integration technique to use. + ``simpson`` uses a numerical integration scheme + which can be inefficient but was previously the default. The ``exact`` method exploits + the structure of the Slater-Jastrow wave function in order to analytically + perform the spin integral. + .. code-block:: :caption: QMCPXML element for pseudopotential electron-ion interaction (psf files). :name: Listing 19 diff --git a/docs/installation.rst b/docs/installation.rst index d8ecf99773..d87ca9bb0a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -186,7 +186,7 @@ Building with CMake The build system for QMCPACK is based on CMake. It will autoconfigure based on the detected compilers and libraries. The most recent version of CMake has the best detection for the greatest variety of systems. The -minimum required version of CMake is 3.17.0. Most +minimum required version of CMake is 3.21.0. Most computer installations have a sufficiently recent CMake, though it might not be the default. diff --git a/docs/intro_wavefunction.rst b/docs/intro_wavefunction.rst index 437e64951e..347b1c6d09 100644 --- a/docs/intro_wavefunction.rst +++ b/docs/intro_wavefunction.rst @@ -798,7 +798,7 @@ Additional information: The best ``delay_rank`` depends on the processor microarchitecture. GPU support is under development. -- ``gpu`` This option is only effective when GPU features are built. Use the implementation with GPU acceleration if ``yes``. +- ``gpu`` This option is only effective when GPU features are built. Default to using GPU. "omptarget", "cuda", "sycl", "cpu" can be set to target a specific implementation, "yes", "no" can be used to toggle on or off GPU acceleration. - ``batch`` The default value is ``yes`` if ``gpu=yes`` and ``no`` otherwise. @@ -924,9 +924,10 @@ Combining orbitals is complicated by the need to maintain the normalization of t orbitals. A rotation matrix will preserve the normalization of the vectors in linear combinations. However the entries in a rotation matrix are not independent. -A rotation matrix can alternatively be expressed as the matrix exponential of a skew-symmetric matrix. +A rotation matrix can alternatively be expressed as the matrix exponential of a skew-symmetric matrix: :math:`R = \exp(\kappa)`. The entries in that skew-symmetric matrix are independent and can form an independent set of optimizable parameters. + Optimizable orbitals are given in the input file by enclosing an SPO in an `rotated_sposet` element. The `determinant` element `id` attribute should reference the name of the rotated sposet. @@ -945,11 +946,13 @@ The `rotated_sposet` element requires use of the updated `sposet_collection` sty Attribute: -+-----------------+----------+----------+---------+-------------------------+ -| Name | Datatype | Values | Default | Description | -+=================+==========+==========+=========+=========================+ -| ``name`` | Text | | | Name of rotated SPOSet | -+-----------------+----------+----------+---------+-------------------------+ ++-----------------+----------+----------------+---------+------------------------------------+ +| Name | Datatype | Values | Default | Description | ++=================+==========+================+=========+====================================+ +| ``name`` | Text | | | Name of rotated SPOSet | ++-----------------+----------+----------------+---------+------------------------------------+ +| ``method`` | Text | global/history | global | Rotation matrix composition method | ++-----------------+----------+----------------+---------+------------------------------------+ .. code-block:: :caption: Orbital Rotation XML element. @@ -974,6 +977,39 @@ The `opt_vars` element can be used to specify initial rotation parameters. The parameters are given as a space-separated list of numbers in the element text. The length of this list must match the expected number of rotation parameters. +Composing rotations +~~~~~~~~~~~~~~~~~~~ + +Rotation matrices do not commute, which consequently means the entries in the kappa matrix +do not simply add when combining rotations. +The parameters tracked for optimization are those for which the parameter derivatives are possibly non-zero. +Rotations from one occupied orbital to another, or from on unoccupied orbital to another, have +no effect on the energy, and hence have a zero parameter derivative. +These parameters are a subset of the full number of parameters in the kappa matrix. +When rotations are combined, the entries corresponding to zero parameter derivatives can +take on a non-zero value (i.e. the kappa matrix gets 'filled-in'). + +There are two ways to handle this. +One way is to store a list of applied rotations. +This method applies a new rotation to the coefficient matrix, and updates the coefficient matrix at each optimization step. +This is the "history" method. + +.. math:: C' = \exp(\kappa_n) \dots \exp(\kappa_1) \exp(\kappa_0) C + +The other way is to track the full set of kappa values separately. +After the matrix multiplication to compose the rotations, the matrix log recovers the new kappa matrix entries. +This is the "global" method. +This method keeps a separate copy of the coefficient matrix and updates it using the global rotation matrix at each optimization step. + +.. math:: \kappa_{new} &= \ln( \exp(\kappa_{\Delta}) \exp(\kappa_{old}) ) \\ + C' &= \exp(\kappa_{new}) C + +Another consequence is the rotation parameters printed in the output are meaningless past the first rotation. +Internally, the rotation code deals only with the difference between parameters at each step. + +This also means that extra information needs to be stored with the results of the optimization. +The extra information is stored in the VP HDF file. + .. _backflow: Backflow Wavefunctions diff --git a/docs/methods.rst b/docs/methods.rst index fa4400a945..2f7a094c2b 100644 --- a/docs/methods.rst +++ b/docs/methods.rst @@ -306,39 +306,39 @@ Batched ``vmc`` driver (experimental) parameters: - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | **Name** | **Datatype** | **Values** | **Default** | **Description** | - +================================+==============+=========================+=============+=================================================+ - | ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``usedrift`` | text | yes,no | yes | Use the algorithm with drift | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``samples`` (not ready) | integer | :math:`\geq 0` | 0 | Number of walker samples for in this VMC run | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +================================+==============+=========================+=============+======================================================+ + | ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized walker crowds | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``steps`` | integer | :math:`\geq 0` | dep. | Number of steps per block | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``usedrift`` | text | yes,no | yes | Use the algorithm with drift | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``samples`` | integer | :math:`\geq 0` | 0 | Total number of walker samples for this VMC run | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ + | ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block | + +--------------------------------+--------------+-------------------------+-------------+------------------------------------------------------+ Additional information: @@ -356,14 +356,14 @@ Additional information: If neither ``total_walkers`` nor ``walkers_per_rank`` is provided and there are no walker configurations carried over, ``walkers_per_rank`` is set equal to ``crowds``. -- ``total_walkers`` Total number of walkers summed over all MPI ranks, or equivalently the total number of walkers in the DMC +- ``total_walkers`` Total number of walkers summed over all MPI ranks, or equivalently the total number of walkers in the QMC calculation. If not provided, it is computed as ``walkers_per_rank`` times the number of MPI ranks. If both ``total_walkers`` and ``walkers_per_rank`` are provided, which is not recommended, ``total_walkers`` must be consistently set equal to ``walkers_per_rank`` times the number MPI ranks. - ``blocks`` This parameter is universal for all the QMC methods. The MC processes are divided into a number of - ``blocks``, each containing a number of steps. At the end of each block, the statistics accumulated in the block are dumped into files, - e.g., ``scalar.dat``. Typically, each block should have a sufficient number of steps that the I/O at the end of each block is negligible + ``blocks``, each containing an equal number of steps. At the end of each block, the statistics accumulated in the block are dumped into files, + e.g., ``scalar.dat``. Typically, blocks should have a sufficient number of steps that the I/O at the end of each block is negligible compared with the computational cost. Each block should not take so long that monitoring its progress is difficult. There should be a sufficient number of ``blocks`` to perform statistical analysis. @@ -371,7 +371,9 @@ Additional information: initial equilibration and do not count against the requested step or block count. Property measurements are not performed during warm-up steps. -- ``steps`` - ``steps`` are the number of energy and other property measurements to perform per block. +- ``steps`` - ``steps`` are the number of energy and other property measurements to perform per block. If ``samples`` is provided + in the input file but not ``steps``, its value is chosen based on ``samples`` see below. If neither ``samples`` nor ``steps`` is + provided, ``steps`` is set to one. - ``substeps`` For each substep, an attempt is made to move each of the electrons once only by either particle-by-particle or an all-electron move. Because the local energy is evaluated only at @@ -391,13 +393,18 @@ Additional information: acceptance ratio should be close to 50% for an efficient simulation. -- ``samples`` (not ready) +- ``samples`` The intended total number of samples that will be made in the QMC section. This is primarily intended for VMC + wavefunction optimization. The implementation always obtains at least the requested number but may obtain slightly more samples + than requested so as to map efficiently on to the MPI tasks and OpenMP threads. If ``samples`` and ``steps`` are both + provided, ``samples`` must be equal or smaller than the product of ``total_walkers``, ``steps`` and ``blocks``. If ``samples`` is + provided but ``steps`` is not, ``steps`` is automatically set to be the smallest integer that makes ``samples`` equal or smaller + than the product of ``total_walkers``, ``steps`` and ``blocks``. -- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction from scratch: =1 by - default when using mixed precision. =10 by default when not using mixed precision. 0 can be set for no recomputation - and higher performance, but numerical errors will accumulate over time. Recomputing introduces a performance penalty - dependent on system size, but protects against the accumulation of numerical error, particularly in the inverses of - the Slater determinants. These have a cubic-scaling cost to recompute. +- ``blocks_between_recompute`` Recompute the accuracy critical determinant part of the wavefunction from scratch: =1 by default when + using mixed precision. =10 by default when not using mixed precision. 0 can be set for no recomputation and higher performance, + but numerical errors will accumulate over time. Recomputing the determinants introduces a performance penalty dependent on system + size, but protects against the accumulation of numerical error, particularly in the inverses of the Slater determinants. These + have a cubic-scaling cost to recompute. - ``debug_checks`` valid values are 'no', 'all', 'checkGL_after_load', 'checkGL_after_moves', 'checkGL_after_tmove'. If the build type is `debug`, the default value is 'all'. Otherwise, the default value is 'no'. @@ -591,7 +598,7 @@ Optimizers QMCPACK implements a number of different optimizers each with different priorities for accuracy, convergence, memory usage, and stability. The optimizers can be switched among “OneShiftOnly” (default), “adaptive,” -“descent,” “hybrid,” and “quartic” (old) using the following line in the +“descent,” “hybrid,” "sr_cg," and “quartic” (old) using the following line in the optimization block: :: @@ -599,7 +606,7 @@ optimization block: THE METHOD YOU LIKE OneShiftOnly Optimizer -~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^ The OneShiftOnly optimizer targets a fast optimization by moving parameters more aggressively. It works with OpenMP and GPU and can be considered for large systems. This method relies on the effective weight of correlated sampling rather than the cost function value to justify a new set of parameters. @@ -611,13 +618,15 @@ and adjustment on ``minwalkers`` can be made if needed. parameters: - +--------------+--------------+-------------+-------------+---------------------------------------------------+ - | **Name** | **Datatype** | **Values** | **Default** | **Description** | - +==============+==============+=============+=============+===================================================+ - | ``shift_i`` | real | :math:`> 0` | 0.01 | Direct stabilizer added to the Hamiltonian matrix | - +--------------+--------------+-------------+-------------+---------------------------------------------------+ - | ``shift_s`` | real | :math:`> 0` | 1.00 | Initial stabilizer based on the overlap matrix | - +--------------+--------------+-------------+-------------+---------------------------------------------------+ + +------------------+--------------+------------------+-------------+---------------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +==================+==============+==================+=============+===================================================+ + | ``shift_i`` | real | :math:`> 0` | 0.01 | Direct stabilizer added to the Hamiltonian matrix | + +------------------+--------------+------------------+-------------+---------------------------------------------------+ + | ``shift_s`` | real | :math:`> 0` | 1.00 | Initial stabilizer based on the overlap matrix | + +------------------+--------------+------------------+-------------+---------------------------------------------------+ + | ``eigensolver`` | string | inverse, general | inverse | Eigensolver for linear method | + +------------------+--------------+------------------+-------------+---------------------------------------------------+ Additional information: @@ -630,6 +639,11 @@ Additional information: slower optimization with a large value. The used value is auto-adjusted by the optimizer. +- ``eigensolver`` Choice of eigensolver for the linear method. Option only implemented for the batched optimizer driver. + + - ``general`` Use solver specific to the generalized eigenvalue problem (LAPACK dggev). + - ``inverse`` Compute :math:`S^{-1} H` and use regular eigenvalue solver (LAPACK dgeev). This is faster than the general eigensolver option. + Recommendations: - Default ``shift_i``, ``shift_s`` should be fine. @@ -638,13 +652,18 @@ Recommendations: - If the VMC energy of the last optimization iterations grows significantly, increase ``minwalkers`` closer to 1 and make the optimization stable. -- If the first iterations of optimization are rejected on a reasonable initial wavefunction, - lower the ``minwalkers`` value based on the measured value printed in the standard output to accept the move. +- If the first iterations of optimization are rejected despite a reasonable initial wavefunction, lower the ``minwalkers`` value + based on the measured value printed in the standard output to accept the move. -We recommended using this optimizer in two sections with a very small ``minwalkers`` in the first and a large value in the second, such as the following. -In the very beginning, parameters are far away from optimal values and large changes are proposed by the optimizer. -Having a small ``minwalkers`` makes it much easier to accept these changes. -When the energy gradually converges, we can have a large ``minwalkers`` to avoid risky parameter sets. +When optimizing parameters from scratch, we recommended using this optimizer in two sections with a very small ``minwalkers`` in the +first and a large value in the second, e.g., 1e-4 amd 0.5, as illustrated below. In the very beginning, parameters are far away from +optimal values and large changes are proposed by the optimizer. Having a small ``minwalkers`` makes it much easier to accept these +changes. If optimization becomes unstable, increase ``minwalkers``. If optimization gets stuck with proposed parameter sets being +constantly rejected, decrease ``minwalkers``. When the energy gradually converges, keeping a large ``minwalkers`` is necessary to +prevent accepting risky parameter sets. Continuing optimization with more parameters from a partially converged wavefunction should +also use large ``minwalkers``, for example adding three-body Jastrow factor to converged one-body and two-body Jastrow factors. When +developing a reliable optimization recipe for a new system, one should check convergence of the process with significantly increased +samples, e.g. 4x, and repeat the check each time the flexibility in the wavefunction and number of parameters is increased. :: @@ -702,7 +721,7 @@ command ``qmca -q ev *.scalar.dat`` to look at the VMC energy and variance for each optimization step. Adaptive Optimizer -~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^ The default setting of the adaptive optimizer is to construct the linear method Hamiltonian and overlap matrices explicitly and add different @@ -915,7 +934,7 @@ Excited state recommendations: favor of the ground state. Descent Optimizer -~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^ Gradient descent algorithms are an alternative set of optimization methods to the OneShiftOnly and adaptive optimizers based on the linear method. These methods use only first derivatives to optimize trial wave functions and convergence can be accelerated by retaining a memory of previous derivative values. @@ -1056,7 +1075,7 @@ Additional information and recommendations: Hybrid Optimizer -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ Another optimization option is to use a hybrid combination of accelerated descent and blocked linear method. It provides a means to retain the advantages of both individual methods while scaling to large numbers of parameters beyond the traditional 10,000 parameter limit of the linear method. :cite:`Otis2019` @@ -1155,8 +1174,45 @@ Additional information and recommendations: parameters discussed earlier for descent are useful for setting up the descent engine to do this averaging on its own. +Stochastic Reconfiguration with Conjugate Gradient +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +We have implemented a preliminary version of stochastic reconfiguration (:cite:`Sorella2001` and :cite:`Casula2004`), +currently only available in the batched drivers. The SR optimization reduces the +computational cost over the linear method by avoiding the need to build the +Hamiltonian derivative matrix elements, and instead only needs the derivative overlap +matrix. This can result in substantial savings when optimizing with very large parameter +counts, e.g. in orbital optimization. The SR method determines the parameter changes via + +:math:`-\tau \mathbf{g} = \mathbf{S} \Delta \mathbf{p}` + +where :math:`\mathbf{S}` is given by :math:`\langle \Psi_i | \Psi_j\rangle`, :math:`\mathbf{g}` is given by :math:`\langle \Psi_i | H | \Psi_0\rangle`, :math:`\Delta \mathbf{p}` is the parameter update, and :math:`\tau` is an effective timestep since the SR method can be interpretted as an imaginary time projection expanded in the parameter derivative basis. +The solution could be found by directly inverting the overlap matrix :math:`\mathbf{S}`, but this becomes prohibitive for large parameter counts. Therefore, we have implemented the conjugate gradient iterative scheme to solve the linear equation :cite:`Neuscamman2012`. This avoids having to directly invert the overlap matrix and significantly reduces the cost for large parameter counts. + +Since we are using finite samples to represent the overlap matrix, it can become ill-conditioned. We choose to use a simple regularization scheme to improve the optimization, described in :cite:`Sorella2007`. The overlap matrix is scaled via :math:`\mathbf{S} \rightarrow \mathbf{S} + \epsilon \mathbf{I}`, where :math:`\epsilon` is a small scalar. This can be controlled through ``sr_regularization``. + +By default, the parameter update is accepted as is, and the size of the proposed parameter changes can be controlled by the timestep :math:`\tau`. This parameter can be controlled via ``sr_tau``. If this parameter gets too large, the optimization can become unstable. Therefore, it is recommended to use a small timestep. Small timesteps require many more total optimization steps than is typically required by the linear method, so convergence should be carefully checked. Alternatively, it is possible to use the conjugate gradient step to determine the parameter update direction, and follow up with a line search, triggered via ``line_search``. This can result in much faster convergence at the expense of doing additional correlated sampling steps. + +We are currently investigating various improvements to make this a more reliable optimizer. + +``sr_cg` method: + + parameters: + + +-----------------------+--------------+-------------+-------------+----------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +=======================+==============+=============+=============+==============================================+ + | ``sr_tau`` | real | :math:`> 0` | 0.01 | Effective timestep for SR equation | + +-----------------------+--------------+-------------+-------------+----------------------------------------------+ + | ``sr_tolerance`` | real | :math:`> 0` | 1e-06 | Convergence threshold for CG algorithm | + +-----------------------+--------------+-------------+-------------+----------------------------------------------+ + | ``sr_regularization`` | real | :math:`> 0` | 0.01 | Scaling constant for S matrix regularization | + +-----------------------+--------------+-------------+-------------+----------------------------------------------+ + | ``line_search`` | text | yes/no | no | Use linesearch to find optimal move | + +-----------------------+--------------+-------------+-------------+----------------------------------------------+ + + Quartic Optimizer -~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^ *This is an older optimizer method retained for compatibility. We recommend starting with the newest OneShiftOnly or adaptive optimizers.* @@ -1669,52 +1725,54 @@ Batched ``dmc`` driver (experimental) parameters: - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | **Name** | **Datatype** | **Values** | **Default** | **Description** | - +================================+==============+=========================+=============+=================================================+ - | ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ - | ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block | - +--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+ + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +================================+==============+=========================+===================+=================================================+ + | ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``target_walkers`` | integer | :math:`> 0` | ``total_walkers`` | Target walker count by the population control | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ + | ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block | + +--------------------------------+--------------+-------------------------+-------------------+-------------------------------------------------+ - ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads. -- ``walkers_per_rank`` The number of walkers per MPI rank. This number does not have to be a multiple of the number of OpenMP +- ``walkers_per_rank`` The number of walkers per MPI rank when a DMC calculation starts. This number does not have to be a multiple of the number of OpenMP threads. However, to avoid any idle resources, it is recommended to be at least the number of OpenMP threads for pure CPU runs. For GPU runs, a scan of this parameter is necessary to reach reasonable single rank efficiency and also get a balanced time to solution. For highest throughput on GPUs, expect to use hundreds of walkers_per_rank, or the largest number that will fit in GPU @@ -1725,11 +1783,12 @@ Batched ``dmc`` driver (experimental) If neither ``total_walkers`` nor ``walkers_per_rank`` is provided and there are no walker configurations carried over, ``walkers_per_rank`` is set equal to ``crowds``. -- ``total_walkers`` Total number of walkers summed over all MPI ranks, or equivalently the total number of walkers in the DMC - calculation. If not provided, it is computed as ``walkers_per_rank`` times the number of MPI ranks. If both ``total_walkers`` +- ``total_walkers`` Total number of walkers summed over all MPI ranks, or equivalently the total number of walkers when a DMC calculation starts. If not provided, it is computed as ``walkers_per_rank`` times the number of MPI ranks. If both ``total_walkers`` and ``walkers_per_rank`` are provided, which is not recommended, ``total_walkers`` must be consistently set equal to ``walkers_per_rank`` times the number MPI ranks. +- ``target_walkers`` The target population size. Population control algorithms work towards this target. Do not confuse it with the actual walker count during random walking. The default will be the number of walkers when a DMC calculation starts, namely ``total_walkers``. + - ``debug_checks`` valid values are 'no', 'all', 'checkGL_after_load', 'checkGL_after_moves', 'checkGL_after_tmove'. If the build type is `debug`, the default value is 'all'. Otherwise, the default value is 'no'. - ``spin_mass`` Optional parameter to allow the user to change the rate of spin sampling. If spin sampling is on using ``spinor`` == yes in the electron ParticleSet input, the spin mass determines the rate @@ -1742,14 +1801,16 @@ Batched ``dmc`` driver (experimental) .. math:: + E_\text{ref} = E_\text{pop\_avg} E_\text{trial} = E_\text{pop\_avg}+(\ln \texttt{targetwalkers}-\ln W_\text{pop}) / \texttt{timestep} where :math:`E_\text{pop\_avg}` is the local energy average over the walker population at the current step and :math:`W_\text{pop}` is the current population weight before the population adjustment in branching. -After the warm-up phase, the trial energy is updated as +After the warm-up phase, the reference and trial energy values are updated as .. math:: + E_\text{ref} = \sum_\text{post warm up} E_\text{pop\_avg} / Nsteps_\text{post warm up} E_\text{trial} = E_\text{ref}+\texttt{feedback}\cdot(\ln\texttt{targetWalkers}-\ln W_\text{pop}) where :math:`E_\text{ref}` is the :math:`E_\text{pop\_avg}` average over all the post warm-up steps up to the current step. The update frequency is controlled by ``energyUpdateInterval``. @@ -1885,4 +1946,251 @@ declaration to ensure correct sampling: a new all-electron configuration, at which point the action is computed and the move is either accepted or rejected. + + +.. _walker_logging + +Walker Data Logging +=================== + +Detailed per-walker information can be written to HDF5 files for VMC or DMC by +including the XML element. This includes the LocalEnergy and +its components for each walker from each MC step. By default, more detailed +particle-level information (e.g. electron coordinates) is also written for the +lowest, highest, and median energy walkers at each MC step (modest disk usage). +Optionally, particle-level information can also be written for all walkers, +potentially requiring a huge amount of disk space. + +**Input specification** + +The default walker data logging functionality is enabled by including the + XML element (once) just before the QMC driver sections, +for example: + +:: + + + + 256 + 100 + 200 + 10 + 3 + 0.3 + yes + + + 256 + 40 + 800 + 20 + 0.01 + + + + +Optional XML attributes enable finer control over the behavior: + +.. table:: + + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | **Name** | **Datatype** | **Values** | **Default** | **Description** | + +==================+==============+==============+=============+====================================================+ + | ``step_period`` | integer | :math:`> 0` | 1 | Collect walker data every step_period MC steps | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``particle`` | text | yes,no | no | Write particle data for all walkers | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``quantiles`` | text | yes,no | yes | Write full data for min/max/median energy walkers | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``min`` | text | yes,no | yes | Enable/disable write for min energy walker data | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``max`` | text | yes,no | yes | Enable/disable write for max energy walker data | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``median`` | text | yes,no | yes | Enable/disable write for median energy walker data | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + | ``verbose`` | text | yes,no | no | Write more log file information | + +------------------+--------------+--------------+-------------+----------------------------------------------------+ + + +Additional information: + +- ``step_period``: By default, data for each walker is collected every MC + step, corresponding to step_period=1. A sub-sampling of the walker + data may be obtained instead by setting step_period>1. For example, + with step_period=5, walker data is collected every 5th MC step. + +- ``particle``: This controls whether per-particle data is written to + the walker log HDF files along with scalar walker properties. These data + comprise: electron coordinates, spin coordinates (spinor runs only), + per-particle wavefunction gradients, and per-particle wavefunction + laplacian values. + +- ``quantiles``: Write out full (scalar and per-particle) data for walkers + at specific quantiles of the local energy distribution. Currently, + these quantiles are the minimum, maximum, and median. + +- ``min``: Selectively disable writing data for the minimum energy + walkers. Active only if qtiles=yes. + +- ``max``: Selectively disable writing data for the maximum energy + walkers. Active only if qtiles=yes. + +- ``median``: Selectively disable writing data for the median energy + walkers. Active only if qtiles=yes. + +- ``verbose``: If "yes", write function-call information related to + the walker logging functionality. This option is mainly intended + for developers, as it is of little use in practical runs. + + +**Output files** + +The HDF5 files created by the walker logging functionality have the extension \*.wlogs.h5. +For each VMC or DMC section, one of these files is written for every MPI rank in the run. + +For the example XML inputs shown above, QMCPACK run on 6 MPI ranks would produce (at least) +the following output data files: + +:: + + qmc.s000.scalar.dat + qmc.s000.stat.h5 + qmc.s000.p000.wlogs.h5 + qmc.s000.p001.wlogs.h5 + qmc.s000.p002.wlogs.h5 + qmc.s000.p003.wlogs.h5 + qmc.s000.p004.wlogs.h5 + qmc.s000.p005.wlogs.h5 + + qmc.s001.scalar.dat + qmc.s001.dmc.dat + qmc.s001.stat.h5 + qmc.s001.p000.wlogs.h5 + qmc.s001.p001.wlogs.h5 + qmc.s001.p002.wlogs.h5 + qmc.s001.p003.wlogs.h5 + qmc.s001.p004.wlogs.h5 + qmc.s001.p005.wlogs.h5 + + +A single wlogs.h5 file has several walker data buffers (names with underscores below): + +:: + + # scalar (int/real) data for all walkers + walker_property_int walker_property_real + + # scalar and per-particle data for min energy walkers + wmin_property_int wmin_property_real wmin_particle_real + + # scalar and per-particle data for max energy walkers + wmax_property_int wmax_property_real wmax_particle_real + + # scalar and per-particle data for median energy walkers + wmed_property_int wmed_property_real wmed_particle_real + + +Each data buffer contains packed walker data in the form of a large 2D array ("data" below): + +:: + + >h5ls qmc.s000.p000.wlogs.h5/walker_property_int + data Dataset {512000/Inf, 4} + data_layout Group + + >h5ls qmc.s000.p000.wlogs.h5/walker_property_real + data Dataset {512000/Inf, 15} + data_layout Group + + +Each row in the 2D data array/buffer contains data for a single walker at a single MC step. +In this case, 256 walkers were advanced through 200\*10=2000 steps for 512000 row entries total. + +The location of each particular walker quantity in each row is listed in "data_layout": + +:: + + >h5ls qmc.s000.p000.wlogs.h5/walker_property_int/data_layout + id Group # unique walker id + parent_id Group # id of parent (DMC branching) + step Group # MC step number + age Group # walker "age" + + >h5ls qmc.s000.p000.wlogs.h5/walker_property_real/data_layout + weight Group # statistical weight of the walker + LocalEnergy Group # the local (total) energy + Kinetic Group # kinetic energy + LocalPotential Group # full potential energy (all terms) + ElecElec Group # electron-electron energy + LocalECP Group # energy for local channel of ECP + NonLocalECP Group # energy for non-local channels of ECP + logpsi Group # log of wavefunction modulus + phase Group # wavefunction phase + dlogpsi2 Group # squared gradient of wavefunction log-modulus + dphase2 Group # squared gradient of wavefunction phase + dr_node_min Group # estimate of min distance to wfn node along any dimension + multiplicity Group # branching multiplicity (DMC only) + R2Accepted Group # average diffusion of accepted MC moves + R2Proposed Group # average diffusion of proposed MC moves + +From this we can see, e.g., that the value for the MC "step" is stored at column +index 0 in walker_property_int/data and the LocalEnergy is stored at column index 6 +in walker_property_real/data: + +:: + + >h5ls -d qmc.s000.p000.wlogs.h5/walker_property_int/data_layout/step/index_start + index_start Dataset {SCALAR} + Data: + (0) 0 + + >h5ls -d qmc.s000.p000.wlogs.h5/walker_property_real/data_layout/LocalEnergy/index_start + index_start Dataset {SCALAR} + Data: + (0) 6 + + +The per-particle data is arranged similarly: + +:: + + >h5ls -d qmc_log_dmc_legacy.s000.p000.wlogs.h5/wmin_particle_real/data_layout + R Group # electron coordinates + G Group # wavefunction gradient + L Group # wavefunction laplacian (per-particle) + + +However, more information is required in the data_layout to fully specify the location and +shape of the particle-level array data (simplified view for a run with 8 electrons and a +real-valued wavefunction): + +:: + + >h5ls -d qmc.s000.p000.wlogs.h5/wmin_particle_real/data_layout/R + index_start 0 # data starts at column index 0 + index_end 24 # data ends at column index 24 + dimension 2 # array is 2-dimensional + size 24 # array has 24 elements total + shape 8, 3, 0, 0 # array has shape 8x3 + unit_size 1 # each unit of data stored as 1 real value + + >h5ls -d qmc.s000.p000.wlogs.h5/wmin_particle_real/data_layout/G + index_start 24 # data starts at column index 24 + index_end 48 # data ends at column index 48 + dimension 2 # array is 2-dimensional + size 24 # array has 24 elements total + shape 8, 3, 0, 0 # array has shape 8x3 + unit_size 1 # data stored as single real values (2 if complex) + + >h5ls -d qmc.s000.p000.wlogs.h5/wmin_particle_real/data_layout/L + index_start 48 # data starts at column index 48 + index_end 56 # data ends at column index 56 + dimension 1 # array is 1-dimensional + size 8 # array has 8 elements total + shape 8, 0, 0, 0 # array has linear shape, length 8 + unit_size 1 # data stored as single real values (2 if complex) + + + + .. bibliography:: /bibs/methods.bib diff --git a/docs/requirements.in b/docs/requirements.in new file mode 100644 index 0000000000..7b73822545 --- /dev/null +++ b/docs/requirements.in @@ -0,0 +1,7 @@ +# Specify exact versions for reproducible builds, e.g. in readthedocs +sphinx==4.3.2 +sphinx_rtd_theme==1.0.0 +sphinxcontrib-bibtex==2.4.1 + + + diff --git a/docs/requirements.txt b/docs/requirements.txt index b79c726b96..13a0eb09de 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,76 @@ -# Specify exact versions for reproducible builds, e.g. in readthedocs -sphinx==3.2.0 -sphinx_rtd_theme==0.5.2 -sphinxcontrib-bibtex==2.2.0 - +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +alabaster==0.7.13 + # via sphinx +babel==2.13.0 + # via sphinx +certifi==2024.7.4 + # via requests +charset-normalizer==3.3.0 + # via requests +docutils==0.17.1 + # via + # pybtex-docutils + # sphinx + # sphinx-rtd-theme + # sphinxcontrib-bibtex +idna==3.7 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.4 + # via sphinx +latexcodec==2.0.1 + # via pybtex +markupsafe==2.1.3 + # via jinja2 +packaging==23.2 + # via sphinx +pybtex==0.24.0 + # via + # pybtex-docutils + # sphinxcontrib-bibtex +pybtex-docutils==1.0.3 + # via sphinxcontrib-bibtex +pygments==2.16.1 + # via sphinx +pyyaml==6.0.1 + # via pybtex +requests==2.32.2 + # via sphinx +six==1.16.0 + # via + # latexcodec + # pybtex +snowballstemmer==2.2.0 + # via sphinx +sphinx==4.3.2 + # via + # -r requirements.in + # sphinx-rtd-theme + # sphinxcontrib-bibtex +sphinx-rtd-theme==1.0.0 + # via -r requirements.in +sphinxcontrib-applehelp==1.0.4 + # via sphinx +sphinxcontrib-bibtex==2.4.1 + # via -r requirements.in +sphinxcontrib-devhelp==1.0.2 + # via sphinx +sphinxcontrib-htmlhelp==2.0.1 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.3 + # via sphinx +sphinxcontrib-serializinghtml==1.1.5 + # via sphinx +urllib3==2.2.2 + # via requests +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/docs/running.rst b/docs/running.rst index 22c06e15fc..5f40034f46 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -5,7 +5,7 @@ Running QMCPACK QMCPACK requires at least one xml input file, and is invoked via: -``qmcpack [command line options] `` +``qmcpack [command line options] `` .. _commandline: @@ -16,14 +16,16 @@ QMCPACK offers several command line options that affect how calculations are performed. If the flag is absent, then the corresponding option is disabled: -- ``--dryrun`` Validate the input file without performing the simulation. This is a good way to ensure that QMCPACK will do what you think it will. +- ``--dryrun`` Validate the input file without performing the simulation. All the QMC and loop sections are skipped. + Wavefunctions and pseudopotentials will be loaded and processed. This option can be used to verify all the required + files are available or to check memory usage. - ``--enable-timers=none|coarse|medium|fine`` Control the timer granularity when the build option ``ENABLE_TIMERS`` is enabled. -- ``help`` Print version information as well as a list of optional +- ``--help`` Print version information as well as a list of optional command-line arguments. -- ``noprint`` Do not print extra information on Jastrow or pseudopotential. +- ``--noprint`` Do not print extra information on Jastrow or pseudopotential. If this flag is not present, QMCPACK will create several ``.dat`` files that contain information about pseudopotentials (one file per PP) and Jastrow factors (one per Jastrow factor). These file might be useful for visual inspection @@ -31,14 +33,20 @@ option is disabled: - ``--verbosity=low|high|debug`` Control the output verbosity. The default low verbosity is concise and, for example, does not include all electron or atomic positions for large systems to reduce output size. Use "high" to see this information and more details of initialization, allocations, QMC method settings, etc. -- ``version`` Print version information and optional arguments. Same as ``help``. +- ``--version`` Print version information and optional arguments. Same as ``help``. .. _inputs: Input files ----------- -The input is one or more XML file(s), documented in :ref:`input-overview`. +The input is one or more XML file(s), documented in :ref:`input-overview`. Input XML files must end in the suffix ``.xml``. + +An ensemble of calculations can be run by specifying multiple input XML files or text files containing a list of valid XML input +files. In the latter case, a single filename of an XML input should be specified on each line. Ensemble runs split available MPI +tasks evenly between all the specified inputs. Because QMCPACK will only exit when all calculations are completed, it is recommended +for computational efficiency that either all calculations have similar costs and runtimes, or the ``max_seconds`` input parameter +should be used to enforce similar runtimes. Output files ------------ @@ -72,14 +80,23 @@ Running in parallel with MPI QMCPACK is fully parallelized with MPI. When performing an ensemble job, all the MPI ranks are first equally divided into groups that perform individual QMC calculations. Within one calculation, all the walkers are fully distributed -across all the MPI ranks in the group. Since MPI requires distributed memory, -there must be at least one MPI per node. To maximize the efficiency, more facts -should be taken into account. When using MPI+threads on compute nodes with more -than one NUMA domain (e.g., AMD Interlagos CPU on Titan or a node with multiple -CPU sockets), it is recommended to place as many MPI ranks as the number of -NUMA domains if the memory is sufficient (e.g., one MPI task per socket). On clusters with more than one -GPU per node (NVIDIA Tesla K80), it is necessary to use the same number of MPI -ranks as the number of GPUs per node to let each MPI rank take one GPU. +across all the MPI ranks in the group. Each compute node must have at least one MPI rank. +Having one MPI rank per CPU core is a bad practice due to high total memory footprint +caused by datasets that have to be duplicated on each MPI rank. + +We recommend users study the hardware architecture of a compute node before starting any calculation on it. +Suboptimal choice of the number of MPI ranks and their binding to the hardware may lead to significant waste of compute resource. +The rule of thumb is to have the number of MPI ranks per node equal to the number of memory domains with uniform access +attached to the dominant compute devices within a compute node. Fewer can be used when memory is constrained. +On most CPU-only machines, each CPU socket has its dedicated memory with uniform access from all its cores and cross-socket access is non-uniform. +Users may simply place one MPI rank per socket. +There are CPU sockets consisting of core clusters and cross-cluster memory access is non-uniform like Fujitsu A64FX. +In such case, the largest uniform access memory domain is a cluster and thus users should place one MPI rank per cluster for optimal code performance. +On machines with GPU accelerators, GPUs are the primary compute devices and thus users should count the number of +uniform access memory domains attached to GPUs. Usually each GPU card has a single GPU die with its own dedicated graphic memory, counted as one domain. +users may simply place one MPI rank per GPU card. High-end GPU cards may have more than a single GPU memory domain. +For example, AMD Instinct MI250X and Intel Data Center GPU Max 1550 cards both have two memory domains per card. +users should place one MPI rank per GPU memory domain (AMD GCD, Intel tile). .. _openmprunning: @@ -89,15 +106,18 @@ Using OpenMP threads Modern processors integrate multiple identical cores even with hardware threads on a single die to increase the total performance and maintain a reasonable power draw. QMCPACK takes advantage of this -compute capability by using threads and the OpenMP programming model -as well as threaded linear algebra libraries. By default, QMCPACK is +compute capability by using threads directly via the OpenMP programming model +and indirectly via threaded linear algebra libraries. By default, QMCPACK is always built with OpenMP enabled. When launching calculations, users should instruct QMCPACK to create the right number of threads per MPI -rank by specifying environment variable OMP\_NUM\_THREADS. Assuming -one MPI rank per socket, the number of threads should typically be the -number of cores on that socket. Even in the GPU-accelerated version, -using threads significantly reduces the time spent on the calculations -performed by the CPU. +rank by specifying environment variable OMP\_NUM\_THREADS. +It is recommended to set the number of OpenMP threads equal to the number +of physical CPU cores that can be exclusively assigned to each MPI rank. +Even when the GPU-acceleration is enabled, using threads significantly +reduces the time spent on the calculations performed by the CPU. Almost all the MPI launchers +require proper configuration to map the OpenMP threads to the processor cores correctly +and avoid assigning multiple threads to the same processor core. If this happens very significant +slowdowns result. Users should check their MPI documentation and verify performance before doing costly production calculations. Nested OpenMP threads ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/simulationcell.rst b/docs/simulationcell.rst index c03e5eb9fe..7d9ca71822 100644 --- a/docs/simulationcell.rst +++ b/docs/simulationcell.rst @@ -20,21 +20,23 @@ up. Attribute: - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | **parameter name** | **datatype** | **values** | **default** | **description** | - +=====================+==============+===========================+===================+====================================================+ - | ``lattice`` | 9 floats | any float | Must be specified | Specification of lattice vectors. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | ``bconds`` | string | "p" or "n" | "n n n " | Boundary conditions for each axis. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | ``vacuum`` | float | :math:`\geq 1.0` | 1.0 | Vacuum scale. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | ``LR_handler`` | string | string | "opt_breakup" | Ewald breakup method. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | ``LR_dim_cutoff`` | float | float | 15 | Ewald breakup distance. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ - | ``LR_tol`` | float | float | 3e-4 | Tolerance in Ha for Ewald ion-ion energy per atom. | - +---------------------+--------------+---------------------------+-------------------+----------------------------------------------------+ + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | **parameter name** | **datatype** | **values** | **default** | **description** | + +=====================+==============+==================+===================+====================================================================================+ + | ``lattice`` | 9 floats | any float | Must be specified | Specification of lattice vectors. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``bconds`` | string | "p" or "n" | "n n n " | Boundary conditions for each axis. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``vacuum`` | float | :math:`\geq 1.0` | 1.0 | Vacuum scale. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``LR_handler`` | string | string | "opt_breakup" | Ewald breakup method. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``LR_dim_cutoff`` | float | float | 15 | Ewald breakup distance. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``LR_tol`` | float | float | 3e-4 | Tolerance in Ha for Ewald ion-ion energy per atom. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ + | ``ewald_grid`` | int | int | 1001 | The number of linear grid points used for short-range part of the Ewald potential. | + +---------------------+--------------+------------------+-------------------+------------------------------------------------------------------------------------+ An example of a block is given below: @@ -119,6 +121,7 @@ the z-axis increases from 12 to 18 by the vacuum scale of 1.5. 1.5 20 ewald + 1001 LR_handler @@ -163,6 +166,11 @@ length of the maximum :math:`k`-vector used in the long-ranged term. Larger values of increase the accuracy of the evaluation. A value of 15 tends to be conservative for the ``opt_breakup`` handler in 3D. +ewald_grid +~~~~~~~~~~~~~ +The short-range part of the Ewald/optimized potential :math:`v^{sr}(r)` is put on a linear grid. +`ewald_grid` controls the number of grid points on this 1D grid. + .. _particleset: Specifying the particle set @@ -216,6 +224,20 @@ Optional particleset attributes - | ``size`` | Number of particles in set. +- | ``random`` + | Randomize starting positions of particles. Each component of each + particle’s position is randomized independently in the range of the + simulation cell in that component’s direction. + +- | ``randomsrc``/``random_source`` + | Specify source particle set around which to randomize the initial + positions of this particle set. + +- | ``spinor`` + | Sets an internal flag that the particleset (usually for electrons) is + a spinor object. This is used in the wavefunction builders and QMC drivers + to determiane if spin sampling will be used + ``Group`` element: +-----------------+---------------------------+ @@ -250,6 +272,24 @@ Optional particleset attributes | ``atomicnumber`` | Integer | *Any* | 0 | Atomic number of particles in set | +------------------+----------+--------+---------+------------------------------------+ +Required group attributes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- | ``name``/``id`` + | Unique name for the particle set group. Typically, element symbols + are used for ions and “u" or “d" for spin-up and spin-down electron + groups, respectively. + +Optional group attributes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- | ``mass`` + | Mass of particles in set. + +- | ``unit`` + | Units for mass of particles in set (au[:math:`m_e` = 1] or + amu[:math:`\frac{1}{12}m_{\rm ^{12}C}` = 1]). + ``attrib`` element: +---------------------+------------------------------------+ @@ -261,44 +301,17 @@ Optional particleset attributes +--------------------+--------------+--------------------------------------------+-------------+------------------------+ | **Name** | **Datatype** | **Values** | **Default** | **Description** | +====================+==============+============================================+=============+========================+ - | ``name`` | String | *Any* | *None* | Name of attrib | + | ``name`` | String | ionid, *Any* | *None* | Name of attrib | +--------------------+--------------+--------------------------------------------+-------------+------------------------+ | ``datatype`` | String | IntArray, realArray, posArray, stringArray | *None* | Type of data in attrib | +--------------------+--------------+--------------------------------------------+-------------+------------------------+ + | ``condition`` | Integer | 0, 1 | 0 | Select coordinates | + +--------------------+--------------+--------------------------------------------+-------------+------------------------+ | ``size``:math:`^o` | String | *Any* | *None* | Size of data in attrib | +--------------------+--------------+--------------------------------------------+-------------+------------------------+ -- | ``random`` - | Randomize starting positions of particles. Each component of each - particle’s position is randomized independently in the range of the - simulation cell in that component’s direction. - -- | ``randomsrc``/``random_source`` - | Specify source particle set around which to randomize the initial - positions of this particle set. - -- | ``spinor`` - | Sets an internal flag that the particleset (usually for electrons) is - a spinor object. This is used in the wavefunction builders and QMC drivers - to determiane if spin sampling will be used - -Required name attributes -^^^^^^^^^^^^^^^^^^^^^^^^ - -- | ``name``/``id`` - | Unique name for the particle set group. Typically, element symbols - are used for ions and “u" or “d" for spin-up and spin-down electron - groups, respectively. - -Optional group attributes -^^^^^^^^^^^^^^^^^^^^^^^^^ - -- | ``mass`` - | Mass of particles in set. - -- | ``unit`` - | Units for mass of particles in set (au[:math:`m_e` = 1] or - amu[:math:`\frac{1}{12}m_{\rm ^{12}C}` = 1]). +- | ``condition`` + | When ``datatype`` is posArray, set 0 for cartesian coordinates or set 1 for fractional coordinates. Example use cases ~~~~~~~~~~~~~~~~~ diff --git a/docs/spin_orbit.rst b/docs/spin_orbit.rst index e2716a43c7..78a7669705 100644 --- a/docs/spin_orbit.rst +++ b/docs/spin_orbit.rst @@ -75,7 +75,7 @@ where we now utilize determinants of spinors, as opposed to the usual product of - + @@ -88,8 +88,9 @@ need a product of up and down determinants. In the Jastrow specification, we only need to provide the jastrow terms for the same spin as there is no longer a distinction between the up and down spins. +The electon-electron cusp in this case should be -1/2, as discussed in :cite:`Melton2016-2`. -We also make a small modification in the particleset specification: +We also make a small modification in the particleset specification .. code-block:: :caption: specification for the electron particle when performing spin-orbit calculations diff --git a/docs/uml/WalkerID_chen.pdf b/docs/uml/WalkerID_chen.pdf new file mode 100644 index 0000000000..53e629d53d Binary files /dev/null and b/docs/uml/WalkerID_chen.pdf differ diff --git a/docs/uml/WalkerID_chen.uml b/docs/uml/WalkerID_chen.uml new file mode 100644 index 0000000000..34ee2c4bec --- /dev/null +++ b/docs/uml/WalkerID_chen.uml @@ -0,0 +1,33 @@ +@startchen +entity WALKER <> { + walker_id <> + step_created <> + } + +relationship PARENT_OF { +parent_id +} + +PARENT_OF -1- WALKER +PARENT_OF -N- WALKER + +entity STEP <> { + step_number <> +} + +entity MOVE <> { + properties + coordinate +} + +WALKER == MOVE + +relationship PART_OF { + Section +} + +STEP == MOVE + +PART_OF =N= WALKER +PART_OF =N= STEP +@endchen diff --git a/docs/unit_testing.rst b/docs/unit_testing.rst index 26d8cccaa8..6181a94164 100644 --- a/docs/unit_testing.rst +++ b/docs/unit_testing.rst @@ -27,6 +27,8 @@ For example, the tests in ``src/QMCWavefunctions/tests`` are compiled into ``bui All the unit test executables are collected under ctest with the ``unit`` label. When checking the whole code, it is useful to run through CMake (``cmake -L unit``). When working on an individual directory, it is useful to run the individual executable. +One can work from one of the `tests` directories beneath the build directory for a faster build and test cycle. + Some of the tests reference input files. The unit test CMake setup places those input files in particular locations under the ``tests`` directory (e.g., ``tests/xml_test``). The individual test needs to be run from that directory to find the expected input files. @@ -109,7 +111,7 @@ A test with failures will look like Adding tests ------------ -Three scenarios are covered here: adding a new test in an existing file, adding a new test file, and adding a new ``test`` directory. +Three scenarios are covered here: adding a new test in an existing file, adding a new test file, and adding a new ``tests`` directory. Adding a test to existing file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -121,6 +123,7 @@ Adding a test file When adding a new test file, create a file in the test directory, or copy from an existing file. Add the file name to the ``ADD_EXECUTABLE`` in the ``CMakeLists.txt`` file in that directory. +The pattern for the test file name is ``test_.cpp``. Many older tests do not follow this pattern, but new tests should. One (and only one) file must define the ``main`` function for the test executable by defining ``CATCH_CONFIG_MAIN`` before including the Catch header. If more than one file defines this value, there will be linking errors about multiply defined values. @@ -144,3 +147,39 @@ This approach is valuable at some levels of testing, but is unsatisfying at the The ``Utilities`` directory contains a "fake" random number generator that can be used for deterministic tests of these parts of the code. Currently it outputs a single, fixed value every time it is called, but it could be expanded to produce more varied, but still deterministic, sequences. See ``src/QMCDrivers/test_vmc.cpp`` for an example of using the fake random number generator. + +Setting up objects +------------------ +One of the more difficult parts of writing tests is constructing the object, and prerequisite objects. +There are three routes to building an object: + +1. Construct the object directly. +2. Use an XML fragment and use the XML parsing paths to construct an object +3. For updated classes, construct the Input object and use that in the construction path. + + +Constructing the object directly can be the most difficult in terms of building all the prerequisite objects. +Building an object from an XML fragment has an advantage of being similar to input files. + +Building from XML +----------------- + +Use C++ raw string literals (strings delimited with ``R"(`` and ``)"``) to use XML fragments in the code. +The ``Libxml2Document`` class has a ``parseFromString`` function to parse XML input for testing. + +The following code fragment to read the xml is common + +.. code-block:: + + const char* xml_str = R"()"; + Libxml2Document doc; + bool okay = doc.parseFromString(xml_str); + REQUIRE(okay); + +After parsing, the ``Libxml2Document`` class has a ``getRoot`` function to the the root XML node. +The QMCPACK parsing functions often expect the tags they are parsing to be a child of the node +that is passed to the function. +For this case, put an additional tag as a parent of the target elements (The reason for ``...`` in the example above. + +The ``Libxml2Document`` class can also read XML from a file with the ``parse`` function +Reading from a file can make the test code smaller, at the expense of maintaining an extra file. diff --git a/doxygen/CMakeLists.txt b/doxygen/CMakeLists.txt new file mode 100644 index 0000000000..941c104e87 --- /dev/null +++ b/doxygen/CMakeLists.txt @@ -0,0 +1,58 @@ +find_package(Doxygen) +if(NOT DOXYGEN_FOUND) + message(STATUS "doxygen not found. Source code documentation via qmcpack_doxygen target requires doxygen and (optionally) dot from graphviz.") + return() +else() + message(STATUS "doxygen found. For source code documentation build the qmcpack_doxygen target.") +endif() + +set(DOXYGEN_PROJECT_NAME QMCPACK) +set(DOXYGEN_FILE_PATTERNS *.h *.cpp *.doc) +set(DOXYGEN_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") +set(DOXYGEN_STRIP_FROM_PATH docs src) +set(DOXYGEN_IMAGE_PATH + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/doxygen/images + "$(DOXYGEN_IMAGE_PATH)" +) +set(DOXYGEN_USE_MDFILE_AS_MAINPAGE ${PROJECT_SOURCE_DIR}/README.md) +set(DOXYGEN_TOC_INCLUDE_HEADINGS 0) +set(DOXYGEN_NUM_PROC_THREADS 0) +set(DOXYGEN_GENERATE_TREEVIEW YES) +set(DOXYGEN_TEMPLATE_RELATIONS YES) +set(DOXYGEN_EXTRACT_ALL YES) +set(DOXYGEN_EXTRACT_PRIVATE YES) +set(DOXYGEN_EXTRACT_STATIC YES) +set(DOXYGEN_SOURCE_BROWSER YES) +set(DOXYGEN_SHORT_NAMES YES) +set(DOXYGEN_JAVADOC_AUTOBRIEF YES) +set(DOXYGEN_DOT_IMAGE_FORMAT svg) +set(DOXYGEN_INTERACTIVE_SVG YES) +set(DOXYGEN_DISTRIBUTE_GROUP_DOC YES) +set(DOXYGEN_INLINE_GROUPED_CLASSES YES) +set(DOXYGEN_INLINE_SIMPLE_STRUCTS YES) +set(DOXYGEN_INLINE_SOURCES YES) +set(DOXYGEN_STRIP_CODE_COMMENTS NO) +set(DOXYGEN_HIDE_SCOPE_NAMES YES) +set(DOXYGEN_SHOW_INCLUDE_FILES NO) +set(DOXYGEN_REFERENCED_BY_RELATION YES) +set(DOXYGEN_REFERENCES_RELATION YES) +set(DOXYGEN_VERBATIM_HEADERS NO) +set(DOXYGEN_TOC_EXPAND YES) +set(DOXYGEN_MACRO_EXPANSION YES) +set(DOXYGEN_EXPAND_ONLY_PREDEF YES) +set(DOXYGEN_SEARCH_INCLUDES NO) +set(DOXYGEN_PREDEFINED DOXYGEN_SHOULD_SKIP_THIS) +set(DOXYGEN_DIRECTORY_GRAPH NO) +set(DOXYGEN_HTML_TIMESTAMP YES) +set(DOXYGEN_ENUM_VALUES_PER_LINE 1) +set(DOXYGEN_GENERATE_TAGFILE qmc_doxygen_tags) +set(DOXYGEN_CALL_GRAPH YES) + +doxygen_add_docs(qmcpack_doxygen + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_BINARY_DIR}/src/config.h + ${PROJECT_SOURCE_DIR}/README.md + ${PROJECT_SOURCE_DIR}/CHANGELOG.md + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} +) diff --git a/doxygen/Doxyfile b/doxygen/attic/Doxyfile similarity index 100% rename from doxygen/Doxyfile rename to doxygen/attic/Doxyfile diff --git a/doxygen/Doxyfile.ug b/doxygen/attic/Doxyfile.ug similarity index 100% rename from doxygen/Doxyfile.ug rename to doxygen/attic/Doxyfile.ug diff --git a/doxygen/DoxygenLayout.xml b/doxygen/attic/DoxygenLayout.xml similarity index 100% rename from doxygen/DoxygenLayout.xml rename to doxygen/attic/DoxygenLayout.xml diff --git a/doxygen/Makefile b/doxygen/attic/Makefile similarity index 100% rename from doxygen/Makefile rename to doxygen/attic/Makefile diff --git a/doxygen/OrbitalOptimization.tex b/doxygen/attic/OrbitalOptimization.tex similarity index 100% rename from doxygen/OrbitalOptimization.tex rename to doxygen/attic/OrbitalOptimization.tex diff --git a/doxygen/README b/doxygen/attic/README similarity index 100% rename from doxygen/README rename to doxygen/attic/README diff --git a/doxygen/config.h b/doxygen/attic/config.h similarity index 100% rename from doxygen/config.h rename to doxygen/attic/config.h diff --git a/doxygen/conversion.tex b/doxygen/attic/conversion.tex similarity index 100% rename from doxygen/conversion.tex rename to doxygen/attic/conversion.tex diff --git a/doxygen/dev.cfg b/doxygen/attic/dev.cfg similarity index 100% rename from doxygen/dev.cfg rename to doxygen/attic/dev.cfg diff --git a/doxygen/dev.tag b/doxygen/attic/dev.tag similarity index 100% rename from doxygen/dev.tag rename to doxygen/attic/dev.tag diff --git a/doxygen/dev/basic.dox b/doxygen/attic/dev/basic.dox similarity index 100% rename from doxygen/dev/basic.dox rename to doxygen/attic/dev/basic.dox diff --git a/doxygen/dev/design.dox b/doxygen/attic/dev/design.dox similarity index 100% rename from doxygen/dev/design.dox rename to doxygen/attic/dev/design.dox diff --git a/doxygen/dev/einspline.dox b/doxygen/attic/dev/einspline.dox similarity index 100% rename from doxygen/dev/einspline.dox rename to doxygen/attic/dev/einspline.dox diff --git a/doxygen/dev/index.dox b/doxygen/attic/dev/index.dox similarity index 100% rename from doxygen/dev/index.dox rename to doxygen/attic/dev/index.dox diff --git a/doxygen/dev/jastrow.dox b/doxygen/attic/dev/jastrow.dox similarity index 100% rename from doxygen/dev/jastrow.dox rename to doxygen/attic/dev/jastrow.dox diff --git a/doxygen/dev/psi.dox b/doxygen/attic/dev/psi.dox similarity index 100% rename from doxygen/dev/psi.dox rename to doxygen/attic/dev/psi.dox diff --git a/doxygen/dev/todo.dox b/doxygen/attic/dev/todo.dox similarity index 100% rename from doxygen/dev/todo.dox rename to doxygen/attic/dev/todo.dox diff --git a/doxygen/dev/update.dox b/doxygen/attic/dev/update.dox similarity index 100% rename from doxygen/dev/update.dox rename to doxygen/attic/dev/update.dox diff --git a/doxygen/dev/validation.dox b/doxygen/attic/dev/validation.dox similarity index 100% rename from doxygen/dev/validation.dox rename to doxygen/attic/dev/validation.dox diff --git a/doxygen/dev_layout.xml b/doxygen/attic/dev_layout.xml similarity index 100% rename from doxygen/dev_layout.xml rename to doxygen/attic/dev_layout.xml diff --git a/doxygen/dot/einsplineset_0.graffle b/doxygen/attic/dot/einsplineset_0.graffle similarity index 100% rename from doxygen/dot/einsplineset_0.graffle rename to doxygen/attic/dot/einsplineset_0.graffle diff --git a/doxygen/dot/einsplineset_0.pdf b/doxygen/attic/dot/einsplineset_0.pdf similarity index 100% rename from doxygen/dot/einsplineset_0.pdf rename to doxygen/attic/dot/einsplineset_0.pdf diff --git a/doxygen/dot/hamiltonian.dot b/doxygen/attic/dot/hamiltonian.dot similarity index 100% rename from doxygen/dot/hamiltonian.dot rename to doxygen/attic/dot/hamiltonian.dot diff --git a/doxygen/dot/lcorbitalset.graffle b/doxygen/attic/dot/lcorbitalset.graffle similarity index 100% rename from doxygen/dot/lcorbitalset.graffle rename to doxygen/attic/dot/lcorbitalset.graffle diff --git a/doxygen/dot/particleset.dot b/doxygen/attic/dot/particleset.dot similarity index 100% rename from doxygen/dot/particleset.dot rename to doxygen/attic/dot/particleset.dot diff --git a/doxygen/dot/particleset.graffle b/doxygen/attic/dot/particleset.graffle similarity index 100% rename from doxygen/dot/particleset.graffle rename to doxygen/attic/dot/particleset.graffle diff --git a/doxygen/dot/psi_factory.dot b/doxygen/attic/dot/psi_factory.dot similarity index 100% rename from doxygen/dot/psi_factory.dot rename to doxygen/attic/dot/psi_factory.dot diff --git a/doxygen/dot/qmcsys.dot b/doxygen/attic/dot/qmcsys.dot similarity index 100% rename from doxygen/dot/qmcsys.dot rename to doxygen/attic/dot/qmcsys.dot diff --git a/doxygen/dot/simulation.dot b/doxygen/attic/dot/simulation.dot similarity index 100% rename from doxygen/dot/simulation.dot rename to doxygen/attic/dot/simulation.dot diff --git a/doxygen/dot/wfs.dot b/doxygen/attic/dot/wfs.dot similarity index 100% rename from doxygen/dot/wfs.dot rename to doxygen/attic/dot/wfs.dot diff --git a/doxygen/dot/wfs_factory.dot b/doxygen/attic/dot/wfs_factory.dot similarity index 100% rename from doxygen/dot/wfs_factory.dot rename to doxygen/attic/dot/wfs_factory.dot diff --git a/doxygen/dot/xmlsymbols.dot b/doxygen/attic/dot/xmlsymbols.dot similarity index 100% rename from doxygen/dot/xmlsymbols.dot rename to doxygen/attic/dot/xmlsymbols.dot diff --git a/doxygen/doxygen.sty b/doxygen/attic/doxygen.sty similarity index 100% rename from doxygen/doxygen.sty rename to doxygen/attic/doxygen.sty diff --git a/doxygen/eshdf.dot b/doxygen/attic/eshdf.dot similarity index 100% rename from doxygen/eshdf.dot rename to doxygen/attic/eshdf.dot diff --git a/doxygen/eshdf.svg b/doxygen/attic/eshdf.svg similarity index 100% rename from doxygen/eshdf.svg rename to doxygen/attic/eshdf.svg diff --git a/doxygen/features.doc b/doxygen/attic/features.doc similarity index 100% rename from doxygen/features.doc rename to doxygen/attic/features.doc diff --git a/doxygen/footer.tex b/doxygen/attic/footer.tex similarity index 100% rename from doxygen/footer.tex rename to doxygen/attic/footer.tex diff --git a/doxygen/header.tex b/doxygen/attic/header.tex similarity index 100% rename from doxygen/header.tex rename to doxygen/attic/header.tex diff --git a/doxygen/images/DMC-startcalc.eps b/doxygen/attic/images/DMC-startcalc.eps similarity index 100% rename from doxygen/images/DMC-startcalc.eps rename to doxygen/attic/images/DMC-startcalc.eps diff --git a/doxygen/images/DMC-startcalc.png b/doxygen/attic/images/DMC-startcalc.png similarity index 100% rename from doxygen/images/DMC-startcalc.png rename to doxygen/attic/images/DMC-startcalc.png diff --git a/doxygen/images/DMCTstepConv.eps b/doxygen/attic/images/DMCTstepConv.eps similarity index 100% rename from doxygen/images/DMCTstepConv.eps rename to doxygen/attic/images/DMCTstepConv.eps diff --git a/doxygen/images/DMCTstepConv.png b/doxygen/attic/images/DMCTstepConv.png similarity index 100% rename from doxygen/images/DMCTstepConv.png rename to doxygen/attic/images/DMCTstepConv.png diff --git a/doxygen/images/FSU-tutorial-0.png b/doxygen/attic/images/FSU-tutorial-0.png similarity index 100% rename from doxygen/images/FSU-tutorial-0.png rename to doxygen/attic/images/FSU-tutorial-0.png diff --git a/doxygen/images/FSU-tutorial-1.png b/doxygen/attic/images/FSU-tutorial-1.png similarity index 100% rename from doxygen/images/FSU-tutorial-1.png rename to doxygen/attic/images/FSU-tutorial-1.png diff --git a/doxygen/images/FSU-tutorial-2.png b/doxygen/attic/images/FSU-tutorial-2.png similarity index 100% rename from doxygen/images/FSU-tutorial-2.png rename to doxygen/attic/images/FSU-tutorial-2.png diff --git a/doxygen/images/FSU-tutorial-3.png b/doxygen/attic/images/FSU-tutorial-3.png similarity index 100% rename from doxygen/images/FSU-tutorial-3.png rename to doxygen/attic/images/FSU-tutorial-3.png diff --git a/doxygen/images/FSU-tutorial-4.png b/doxygen/attic/images/FSU-tutorial-4.png similarity index 100% rename from doxygen/images/FSU-tutorial-4.png rename to doxygen/attic/images/FSU-tutorial-4.png diff --git a/doxygen/images/FSU-tutorial-5.png b/doxygen/attic/images/FSU-tutorial-5.png similarity index 100% rename from doxygen/images/FSU-tutorial-5.png rename to doxygen/attic/images/FSU-tutorial-5.png diff --git a/doxygen/images/FSU-tutorial-6.png b/doxygen/attic/images/FSU-tutorial-6.png similarity index 100% rename from doxygen/images/FSU-tutorial-6.png rename to doxygen/attic/images/FSU-tutorial-6.png diff --git a/doxygen/images/FSU-tutorial-7.png b/doxygen/attic/images/FSU-tutorial-7.png similarity index 100% rename from doxygen/images/FSU-tutorial-7.png rename to doxygen/attic/images/FSU-tutorial-7.png diff --git a/doxygen/images/FSU-tutorial-8.png b/doxygen/attic/images/FSU-tutorial-8.png similarity index 100% rename from doxygen/images/FSU-tutorial-8.png rename to doxygen/attic/images/FSU-tutorial-8.png diff --git a/doxygen/images/FSU-tutorial-9.png b/doxygen/attic/images/FSU-tutorial-9.png similarity index 100% rename from doxygen/images/FSU-tutorial-9.png rename to doxygen/attic/images/FSU-tutorial-9.png diff --git a/doxygen/images/FeO_variance_ratio.png b/doxygen/attic/images/FeO_variance_ratio.png similarity index 100% rename from doxygen/images/FeO_variance_ratio.png rename to doxygen/attic/images/FeO_variance_ratio.png diff --git a/doxygen/images/FullDMCPlot.eps b/doxygen/attic/images/FullDMCPlot.eps similarity index 100% rename from doxygen/images/FullDMCPlot.eps rename to doxygen/attic/images/FullDMCPlot.eps diff --git a/doxygen/images/FullDMCPlot.png b/doxygen/attic/images/FullDMCPlot.png similarity index 100% rename from doxygen/images/FullDMCPlot.png rename to doxygen/attic/images/FullDMCPlot.png diff --git a/doxygen/images/SplineConv.eps b/doxygen/attic/images/SplineConv.eps similarity index 100% rename from doxygen/images/SplineConv.eps rename to doxygen/attic/images/SplineConv.eps diff --git a/doxygen/images/SplineConv.png b/doxygen/attic/images/SplineConv.png similarity index 100% rename from doxygen/images/SplineConv.png rename to doxygen/attic/images/SplineConv.png diff --git a/doxygen/images/bconds.png b/doxygen/attic/images/bconds.png similarity index 100% rename from doxygen/images/bconds.png rename to doxygen/attic/images/bconds.png diff --git a/doxygen/images/compose_figures.pptx b/doxygen/attic/images/compose_figures.pptx similarity index 100% rename from doxygen/images/compose_figures.pptx rename to doxygen/attic/images/compose_figures.pptx diff --git a/doxygen/images/core.png b/doxygen/attic/images/core.png similarity index 100% rename from doxygen/images/core.png rename to doxygen/attic/images/core.png diff --git a/doxygen/images/dmc_algo.png b/doxygen/attic/images/dmc_algo.png similarity index 100% rename from doxygen/images/dmc_algo.png rename to doxygen/attic/images/dmc_algo.png diff --git a/doxygen/images/driver_class.png b/doxygen/attic/images/driver_class.png similarity index 100% rename from doxygen/images/driver_class.png rename to doxygen/attic/images/driver_class.png diff --git a/doxygen/images/e_block.eps b/doxygen/attic/images/e_block.eps similarity index 100% rename from doxygen/images/e_block.eps rename to doxygen/attic/images/e_block.eps diff --git a/doxygen/images/e_block.pdf b/doxygen/attic/images/e_block.pdf similarity index 100% rename from doxygen/images/e_block.pdf rename to doxygen/attic/images/e_block.pdf diff --git a/doxygen/images/e_block.png b/doxygen/attic/images/e_block.png similarity index 100% rename from doxygen/images/e_block.png rename to doxygen/attic/images/e_block.png diff --git a/doxygen/images/e_trace.eps b/doxygen/attic/images/e_trace.eps similarity index 100% rename from doxygen/images/e_trace.eps rename to doxygen/attic/images/e_trace.eps diff --git a/doxygen/images/e_trace.pdf b/doxygen/attic/images/e_trace.pdf similarity index 100% rename from doxygen/images/e_trace.pdf rename to doxygen/attic/images/e_trace.pdf diff --git a/doxygen/images/e_trace.png b/doxygen/attic/images/e_trace.png similarity index 100% rename from doxygen/images/e_trace.png rename to doxygen/attic/images/e_trace.png diff --git a/doxygen/images/eclipse_build_panel.png b/doxygen/attic/images/eclipse_build_panel.png similarity index 100% rename from doxygen/images/eclipse_build_panel.png rename to doxygen/attic/images/eclipse_build_panel.png diff --git a/doxygen/images/eclipse_build_panel_comment.png b/doxygen/attic/images/eclipse_build_panel_comment.png similarity index 100% rename from doxygen/images/eclipse_build_panel_comment.png rename to doxygen/attic/images/eclipse_build_panel_comment.png diff --git a/doxygen/images/eclipse_proj.png b/doxygen/attic/images/eclipse_proj.png similarity index 100% rename from doxygen/images/eclipse_proj.png rename to doxygen/attic/images/eclipse_proj.png diff --git a/doxygen/images/eclipse_project.png b/doxygen/attic/images/eclipse_project.png similarity index 100% rename from doxygen/images/eclipse_project.png rename to doxygen/attic/images/eclipse_project.png diff --git a/doxygen/images/fermion_wfs.png b/doxygen/attic/images/fermion_wfs.png similarity index 100% rename from doxygen/images/fermion_wfs.png rename to doxygen/attic/images/fermion_wfs.png diff --git a/doxygen/images/finite-size-scaling.eps b/doxygen/attic/images/finite-size-scaling.eps similarity index 100% rename from doxygen/images/finite-size-scaling.eps rename to doxygen/attic/images/finite-size-scaling.eps diff --git a/doxygen/images/finite-size-scaling.png b/doxygen/attic/images/finite-size-scaling.png similarity index 100% rename from doxygen/images/finite-size-scaling.png rename to doxygen/attic/images/finite-size-scaling.png diff --git a/doxygen/images/h_calltree.png b/doxygen/attic/images/h_calltree.png similarity index 100% rename from doxygen/images/h_calltree.png rename to doxygen/attic/images/h_calltree.png diff --git a/doxygen/images/ham_class.pdf b/doxygen/attic/images/ham_class.pdf similarity index 100% rename from doxygen/images/ham_class.pdf rename to doxygen/attic/images/ham_class.pdf diff --git a/doxygen/images/ham_class.png b/doxygen/attic/images/ham_class.png similarity index 100% rename from doxygen/images/ham_class.png rename to doxygen/attic/images/ham_class.png diff --git a/doxygen/images/ham_xml.png b/doxygen/attic/images/ham_xml.png similarity index 100% rename from doxygen/images/ham_xml.png rename to doxygen/attic/images/ham_xml.png diff --git a/doxygen/images/mcconfig.png b/doxygen/attic/images/mcconfig.png similarity index 100% rename from doxygen/images/mcconfig.png rename to doxygen/attic/images/mcconfig.png diff --git a/doxygen/images/mpi_openmp_objects.pdf b/doxygen/attic/images/mpi_openmp_objects.pdf similarity index 100% rename from doxygen/images/mpi_openmp_objects.pdf rename to doxygen/attic/images/mpi_openmp_objects.pdf diff --git a/doxygen/images/mpi_openmp_objects.png b/doxygen/attic/images/mpi_openmp_objects.png similarity index 100% rename from doxygen/images/mpi_openmp_objects.png rename to doxygen/attic/images/mpi_openmp_objects.png diff --git a/doxygen/images/opt-conv.eps b/doxygen/attic/images/opt-conv.eps similarity index 100% rename from doxygen/images/opt-conv.eps rename to doxygen/attic/images/opt-conv.eps diff --git a/doxygen/images/opt-conv.png b/doxygen/attic/images/opt-conv.png similarity index 100% rename from doxygen/images/opt-conv.png rename to doxygen/attic/images/opt-conv.png diff --git a/doxygen/images/particleset.png b/doxygen/attic/images/particleset.png similarity index 100% rename from doxygen/images/particleset.png rename to doxygen/attic/images/particleset.png diff --git a/doxygen/images/qmc.xml.png b/doxygen/attic/images/qmc.xml.png similarity index 100% rename from doxygen/images/qmc.xml.png rename to doxygen/attic/images/qmc.xml.png diff --git a/doxygen/images/qmc_workflow.png b/doxygen/attic/images/qmc_workflow.png similarity index 100% rename from doxygen/images/qmc_workflow.png rename to doxygen/attic/images/qmc_workflow.png diff --git a/doxygen/images/qmcdrivers.png b/doxygen/attic/images/qmcdrivers.png similarity index 100% rename from doxygen/images/qmcdrivers.png rename to doxygen/attic/images/qmcdrivers.png diff --git a/doxygen/images/qmcsys.png b/doxygen/attic/images/qmcsys.png similarity index 100% rename from doxygen/images/qmcsys.png rename to doxygen/attic/images/qmcsys.png diff --git a/doxygen/images/sampleinput.eps b/doxygen/attic/images/sampleinput.eps similarity index 100% rename from doxygen/images/sampleinput.eps rename to doxygen/attic/images/sampleinput.eps diff --git a/doxygen/images/sampleinput.pdf b/doxygen/attic/images/sampleinput.pdf similarity index 100% rename from doxygen/images/sampleinput.pdf rename to doxygen/attic/images/sampleinput.pdf diff --git a/doxygen/images/sampleinput.png b/doxygen/attic/images/sampleinput.png similarity index 100% rename from doxygen/images/sampleinput.png rename to doxygen/attic/images/sampleinput.png diff --git a/doxygen/images/simulation.png b/doxygen/attic/images/simulation.png similarity index 100% rename from doxygen/images/simulation.png rename to doxygen/attic/images/simulation.png diff --git a/doxygen/images/src.png b/doxygen/attic/images/src.png similarity index 100% rename from doxygen/images/src.png rename to doxygen/attic/images/src.png diff --git a/doxygen/images/titancuda.png b/doxygen/attic/images/titancuda.png similarity index 100% rename from doxygen/images/titancuda.png rename to doxygen/attic/images/titancuda.png diff --git a/doxygen/images/twist-avg-conv.eps b/doxygen/attic/images/twist-avg-conv.eps similarity index 100% rename from doxygen/images/twist-avg-conv.eps rename to doxygen/attic/images/twist-avg-conv.eps diff --git a/doxygen/images/twist-avg-conv.png b/doxygen/attic/images/twist-avg-conv.png similarity index 100% rename from doxygen/images/twist-avg-conv.png rename to doxygen/attic/images/twist-avg-conv.png diff --git a/doxygen/images/ug_img_0.jpg b/doxygen/attic/images/ug_img_0.jpg similarity index 100% rename from doxygen/images/ug_img_0.jpg rename to doxygen/attic/images/ug_img_0.jpg diff --git a/doxygen/images/ug_img_1.jpg b/doxygen/attic/images/ug_img_1.jpg similarity index 100% rename from doxygen/images/ug_img_1.jpg rename to doxygen/attic/images/ug_img_1.jpg diff --git a/doxygen/images/ug_img_2.jpg b/doxygen/attic/images/ug_img_2.jpg similarity index 100% rename from doxygen/images/ug_img_2.jpg rename to doxygen/attic/images/ug_img_2.jpg diff --git a/doxygen/images/ug_img_3.jpg b/doxygen/attic/images/ug_img_3.jpg similarity index 100% rename from doxygen/images/ug_img_3.jpg rename to doxygen/attic/images/ug_img_3.jpg diff --git a/doxygen/images/ug_img_4.jpg b/doxygen/attic/images/ug_img_4.jpg similarity index 100% rename from doxygen/images/ug_img_4.jpg rename to doxygen/attic/images/ug_img_4.jpg diff --git a/doxygen/images/wfs.png b/doxygen/attic/images/wfs.png similarity index 100% rename from doxygen/images/wfs.png rename to doxygen/attic/images/wfs.png diff --git a/doxygen/images/wfs_class.png b/doxygen/attic/images/wfs_class.png similarity index 100% rename from doxygen/images/wfs_class.png rename to doxygen/attic/images/wfs_class.png diff --git a/doxygen/images/xmlsymbols.png b/doxygen/attic/images/xmlsymbols.png similarity index 100% rename from doxygen/images/xmlsymbols.png rename to doxygen/attic/images/xmlsymbols.png diff --git a/doxygen/index.doc b/doxygen/attic/index.doc similarity index 100% rename from doxygen/index.doc rename to doxygen/attic/index.doc diff --git a/doxygen/input_opt.tex b/doxygen/attic/input_opt.tex similarity index 100% rename from doxygen/input_opt.tex rename to doxygen/attic/input_opt.tex diff --git a/doxygen/input_qmc.tex b/doxygen/attic/input_qmc.tex similarity index 100% rename from doxygen/input_qmc.tex rename to doxygen/attic/input_qmc.tex diff --git a/doxygen/intro.tex b/doxygen/attic/intro.tex similarity index 100% rename from doxygen/intro.tex rename to doxygen/attic/intro.tex diff --git a/doxygen/intro_advanced.tex b/doxygen/attic/intro_advanced.tex similarity index 100% rename from doxygen/intro_advanced.tex rename to doxygen/attic/intro_advanced.tex diff --git a/doxygen/intro_io.tex b/doxygen/attic/intro_io.tex similarity index 100% rename from doxygen/intro_io.tex rename to doxygen/attic/intro_io.tex diff --git a/doxygen/myfooter.html b/doxygen/attic/myfooter.html similarity index 100% rename from doxygen/myfooter.html rename to doxygen/attic/myfooter.html diff --git a/doxygen/myheader.html b/doxygen/attic/myheader.html similarity index 100% rename from doxygen/myheader.html rename to doxygen/attic/myheader.html diff --git a/doxygen/qmcpack.cfg b/doxygen/attic/qmcpack.cfg similarity index 100% rename from doxygen/qmcpack.cfg rename to doxygen/attic/qmcpack.cfg diff --git a/doxygen/qmcpack.css b/doxygen/attic/qmcpack.css similarity index 100% rename from doxygen/qmcpack.css rename to doxygen/attic/qmcpack.css diff --git a/doxygen/qmcpack.sty b/doxygen/attic/qmcpack.sty similarity index 100% rename from doxygen/qmcpack.sty rename to doxygen/attic/qmcpack.sty diff --git a/examples/solids/dft-inputs-no-collect/LiH-nscf.in b/examples/solids/dft-inputs-no-collect/LiH-nscf.in index ea527817f7..e8b3213190 100644 --- a/examples/solids/dft-inputs-no-collect/LiH-nscf.in +++ b/examples/solids/dft-inputs-no-collect/LiH-nscf.in @@ -26,7 +26,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS {crystal} diff --git a/examples/solids/dft-inputs-no-collect/LiH-scf.in b/examples/solids/dft-inputs-no-collect/LiH-scf.in index 90553819d8..695bd52146 100644 --- a/examples/solids/dft-inputs-no-collect/LiH-scf.in +++ b/examples/solids/dft-inputs-no-collect/LiH-scf.in @@ -24,7 +24,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS AUTOMATIC diff --git a/examples/solids/dft-inputs-polarized-no-collect/LiH-nscf.in b/examples/solids/dft-inputs-polarized-no-collect/LiH-nscf.in index 0ffad52a6f..9e8aad97c4 100644 --- a/examples/solids/dft-inputs-polarized-no-collect/LiH-nscf.in +++ b/examples/solids/dft-inputs-polarized-no-collect/LiH-nscf.in @@ -27,7 +27,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS {crystal} diff --git a/examples/solids/dft-inputs-polarized-no-collect/LiH-scf.in b/examples/solids/dft-inputs-polarized-no-collect/LiH-scf.in index c17eccc7f9..c155d20175 100644 --- a/examples/solids/dft-inputs-polarized-no-collect/LiH-scf.in +++ b/examples/solids/dft-inputs-polarized-no-collect/LiH-scf.in @@ -25,7 +25,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS AUTOMATIC diff --git a/examples/solids/dft-inputs-polarized/LiH-nscf.in b/examples/solids/dft-inputs-polarized/LiH-nscf.in index 80ca2b6cf2..9537f0573b 100644 --- a/examples/solids/dft-inputs-polarized/LiH-nscf.in +++ b/examples/solids/dft-inputs-polarized/LiH-nscf.in @@ -27,7 +27,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS {crystal} diff --git a/examples/solids/dft-inputs-polarized/LiH-scf.in b/examples/solids/dft-inputs-polarized/LiH-scf.in index adbb15de49..daf33d572a 100644 --- a/examples/solids/dft-inputs-polarized/LiH-scf.in +++ b/examples/solids/dft-inputs-polarized/LiH-scf.in @@ -25,7 +25,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS AUTOMATIC diff --git a/examples/solids/dft-inputs/LiH-nscf.in b/examples/solids/dft-inputs/LiH-nscf.in index 654f8f02b1..a492a35223 100644 --- a/examples/solids/dft-inputs/LiH-nscf.in +++ b/examples/solids/dft-inputs/LiH-nscf.in @@ -26,7 +26,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS {crystal} diff --git a/examples/solids/dft-inputs/LiH-scf.in b/examples/solids/dft-inputs/LiH-scf.in index 287134170c..8017b5b464 100644 --- a/examples/solids/dft-inputs/LiH-scf.in +++ b/examples/solids/dft-inputs/LiH-scf.in @@ -24,7 +24,7 @@ ATOMIC_SPECIES Li 9.01 Li.ncpp H 1.01 H.ncpp -ATOMIC_POSITIONS +ATOMIC_POSITIONS alat Li 0.00 0.00 0.00 H 0.50 0.50 0.50 K_POINTS AUTOMATIC diff --git a/external_codes/boost_multi/multi/.cirrus.yml b/external_codes/boost_multi/multi/.cirrus.yml deleted file mode 100644 index a181b58bd4..0000000000 --- a/external_codes/boost_multi/multi/.cirrus.yml +++ /dev/null @@ -1,14 +0,0 @@ -freebsd_instance: - image_family: freebsd-13-0 - -task: - install_script: pkg install -y ... - script: - - pkg install --no-install-recommends -y --quiet clang pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - clang++ --version - - mkdir build && cd build - - CXX="clang++" cmake .. - - cmake --build . --verbose -- --quiet --no-print-directory - - ctest --output-on-failure - diff --git a/external_codes/boost_multi/multi/.clang-format b/external_codes/boost_multi/multi/.clang-format new file mode 100644 index 0000000000..adcb8acec3 --- /dev/null +++ b/external_codes/boost_multi/multi/.clang-format @@ -0,0 +1,230 @@ +--- +Language: Cpp +# BasedOnStyle: Google +#AccessModifierOffset: -1 +AlignAfterOpenBracket: BlockIndent # Align +AlignArrayOfStructures: Right +#AlignConsecutiveMacros: None +AlignConsecutiveAssignments: Consecutive # None +#AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: Consecutive +#AlignEscapedNewlines: Left +AlignOperands: AlignAfterOperator +AlignTrailingComments: false +#AllowAllArgumentsOnNextLine: true +#AllowAllParametersOfDeclarationOnNextLine: true +#AllowShortEnumsOnASingleLine: true +#AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: true # false +#AllowShortFunctionsOnASingleLine: All +#AllowShortLambdasOnASingleLine: All +#AllowShortIfStatementsOnASingleLine: WithoutElse +#AllowShortLoopsOnASingleLine: true +#AlwaysBreakAfterDefinitionReturnType: None +#AlwaysBreakAfterReturnType: None +#AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: No # Yes +#AttributeMacros: +# - __capability +#BinPackArguments: true +#BinPackParameters: true +#BraceWrapping: +# AfterCaseLabel: false +# AfterClass: false +# AfterControlStatement: Never +# AfterEnum: false +# AfterFunction: false +# AfterNamespace: false +# AfterObjCDeclaration: false +# AfterStruct: false +# AfterUnion: false +# AfterExternBlock: false +# BeforeCatch: false +# BeforeElse: false +# BeforeLambdaBody: false +# BeforeWhile: false +# IndentBraces: false +# SplitEmptyFunction: true +# SplitEmptyRecord: true +# SplitEmptyNamespace: true +#BreakBeforeBinaryOperators: None +#BreakBeforeConceptDeclarations: true +#BreakBeforeBraces: Attach +#BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeComma +#BreakBeforeTernaryOperators: true +#BreakConstructorInitializersBeforeComma: false +#BreakConstructorInitializers: BeforeColon +#BreakAfterJavaFieldAnnotations: false +#BreakStringLiterals: true +ColumnLimit: 0 +#CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Right # Leave +#CompactNamespaces: false +ConstructorInitializerIndentWidth: 0 +ContinuationIndentWidth: 99 +#Cpp11BracedListStyle: true +#DeriveLineEnding: true +#DerivePointerAlignment: true +#DisableFormat: false +#EmptyLineAfterAccessModifier: Never +#EmptyLineBeforeAccessModifier: LogicalBlock +#ExperimentalAutoDetectBinPacking: false +#PackConstructorInitializers: NextLine +#BasedOnStyle: '' +#ConstructorInitializerAllOnOneLineOrOnePerLine: false +#AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +#ForEachMacros: +# - foreach +# - Q_FOREACH +# - BOOST_FOREACH +#IfMacros: +# - KJ_IF_MAYBE +#IncludeBlocks: Regroup +#IncludeCategories: +# - Regex: '^' +# Priority: 2 +# SortPriority: 0 +# CaseSensitive: false +# - Regex: '^<.*\.h>' +# Priority: 1 +# SortPriority: 0 +# CaseSensitive: false +# - Regex: '^<.*' +# Priority: 2 +# SortPriority: 0 +# CaseSensitive: false +# - Regex: '.*' +# Priority: 3 +# SortPriority: 0 +# CaseSensitive: false +#IncludeIsMainRegex: '([-_](test|unittest))?$' +#IncludeIsMainSourceRegex: '' +#IndentAccessModifiers: true # false +AccessModifierOffset: -98 # 2 +#IndentCaseLabels: true +#IndentCaseBlocks: false +#IndentGotoLabels: true +#IndentPPDirectives: None +#IndentExternBlock: AfterExternBlock +#IndentRequires: false +IndentWidth: 99 +#IndentWrappedFunctionNames: false +#InsertTrailingCommas: None +#JavaScriptQuotes: Leave +#JavaScriptWrapImports: true +#KeepEmptyLinesAtTheStartOfBlocks: false +#LambdaBodyIndentation: Signature +#MacroBlockBegin: '' +#MacroBlockEnd: '' +#MaxEmptyLinesToKeep: 1 +#NamespaceIndentation: None +#ObjCBinPackProtocolList: Never +#ObjCBlockIndentWidth: 2 +#ObjCBreakBeforeNestedBlockParam: true +#ObjCSpaceAfterProperty: false +#ObjCSpaceBeforeProtocolList: true +#PenaltyBreakAssignment: 2 +#PenaltyBreakBeforeFirstCallParameter: 1 +#PenaltyBreakComment: 300 +#PenaltyBreakFirstLessLess: 120 +#PenaltyBreakOpenParenthesis: 0 +#PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +#PenaltyExcessCharacter: 1000000 +#PenaltyReturnTypeOnItsOwnLine: 200 +#PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +#PPIndentWidth: -1 +#RawStringFormats: +# - Language: Cpp +# Delimiters: +# - cc +# - CC +# - cpp +# - Cpp +# - CPP +# - 'c++' +# - 'C++' +# CanonicalDelimiter: '' +# BasedOnStyle: google +# - Language: TextProto +# Delimiters: +# - pb +# - PB +# - proto +# - PROTO +# EnclosingFunctions: +# - EqualsProto +# - EquivToProto +# - PARSE_PARTIAL_TEXT_PROTO +# - PARSE_TEST_PROTO +# - PARSE_TEXT_PROTO +# - ParseTextOrDie +# - ParseTextProtoOrDie +# - ParseTestProto +# - ParsePartialTestProto +# CanonicalDelimiter: pb +# BasedOnStyle: google +#ReferenceAlignment: Pointer +#ReflowComments: true +#RemoveBracesLLVM: false +#SeparateDefinitionBlocks: Leave +#ShortNamespaceLines: 1 +#SortIncludes: CaseSensitive +#SortJavaStaticImport: Before +#SortUsingDeclarations: true +#SpaceAfterCStyleCast: false +#SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false # true +#SpaceBeforeAssignmentOperators: true +#SpaceBeforeCaseColon: false +#SpaceBeforeCpp11BracedList: false +#SpaceBeforeCtorInitializerColon: true +#SpaceBeforeInheritanceColon: true +SpaceBeforeParens: Custom # ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: false # true +# AfterForeachMacros: true +# AfterFunctionDefinitionName: false +# AfterFunctionDeclarationName: false +# AfterIfMacros: true +# AfterOverloadedOperator: false +# BeforeNonEmptyParentheses: false +#SpaceAroundPointerQualifiers: Default +#SpaceBeforeRangeBasedForLoopColon: true +#SpaceInEmptyBlock: false +#SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +#SpacesInAngles: Never +#SpacesInConditionalStatement: false +#SpacesInContainerLiterals: true +#SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 +# Maximum: -1 +#SpacesInParentheses: false +#SpacesInSquareBrackets: false +#SpaceBeforeSquareBrackets: false +#BitFieldColonSpacing: Both +Standard: c++17 +#StatementAttributeLikeMacros: +# - Q_EMIT +#StatementMacros: +# - Q_UNUSED +# - QT_REQUIRE_VERSION +TabWidth: 99 +#UseCRLF: false +UseTab: ForContinuationAndIndentation # Never +WhitespaceSensitiveMacros: + - BOOST_REQUIRE + - BOOST_TEST + - BOOST_TEST_REQUIRE +#WhitespaceSensitiveMacros: +# - STRINGIZE +# - PP_STRINGIZE +# - BOOST_PP_STRINGIZE +# - NS_SWIFT_NAME +# - CF_SWIFT_NAME +... diff --git a/external_codes/boost_multi/multi/.clang-tidy b/external_codes/boost_multi/multi/.clang-tidy index 2a83744bb6..37ecad7d62 100644 --- a/external_codes/boost_multi/multi/.clang-tidy +++ b/external_codes/boost_multi/multi/.clang-tidy @@ -5,67 +5,52 @@ HeaderFilterRegex: '.' FormatStyle: file CheckOptions: - - { key: readability-identifier-naming.ClassCase , value: lower_case } - - { key: readability-identifier-naming.FunctionCase , value: lower_case } - - { key: readability-identifier-naming.GlobalConstantCase , value: aNy_CasE } - - { key: readability-identifier-naming.NamespaceCase , value: lower_case } - - { key: readability-identifier-naming.PrivateMemberPrefix , value: '' } - - { key: readability-identifier-naming.PrivateMemberSufix , value: _ } - - { key: readability-identifier-naming.StructCase , value: lower_case } - - { key: readability-identifier-naming.TemplateParameterCase , value: CamelCase } - - { key: readability-identifier-length.MinimumVariableNameLength , value: 2 } - - { key: readability-identifier-length.MinimumParameterNameLength , value: 2 } - - { key: readability-identifier-length.MinimumLoopCounterNameLength, value: 2 } - - { key: readability-identifier-length.IgnoredLoopCounterNames , value: "^[ijk_]$" } + - { key: readability-identifier-naming.ClassCase , value: lower_case } + - { key: readability-identifier-naming.FunctionCase , value: lower_case } + - { key: readability-identifier-naming.GlobalConstantCase , value: aNy_CasE } + - { key: readability-identifier-length.IgnoredLoopCounterNames , value: "^[ijk_]$" } + - { key: readability-identifier-length.IgnoredVariableNames , value: "^[ijk_]$" } + - { key: readability-identifier-length.MethodCase , value: lower_case } + - { key: readability-identifier-length.MinimumLoopCounterNameLength , value: 2 } + - { key: readability-identifier-length.MinimumParameterNameLength , value: 2 } + - { key: readability-identifier-length.MinimumVariableNameLength , value: 2 } + - { key: readability-identifier-naming.MacroDefinitionCase , value: UPPER_CASE } + - { key: readability-identifier-naming.MacroDefinitionPrefix, , value: BOOST_MULTI_ } + - { key: readability-identifier-naming.MacroDefinitionSuffix, , value: * } + - { key: readability-identifier-naming.NamespaceCase , value: lower_case } + - { key: readability-identifier-naming.PrivateMemberPrefix , value: '' } + - { key: readability-identifier-naming.PrivateMemberSuffix , value: _ } + - { key: readability-identifier-naming.PrivateMethodSuffix , value: _ } + - { key: readability-identifier-naming.StructCase , value: lower_case } + - { key: readability-identifier-naming.TemplateParameterCase , value: CamelCase } + - { key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp, value: expr-type} + - { key: readability-operators-representation.BinaryOperators , value: '&&;&=;&;|;~;!;!=;||;|=;^;^='} +# value: expr-type , bug in clang 16 in c++20 mode https://stackoverflow.com/a/75157215/225186 -# - { key: readability-identifier-naming.VariableCase, value: aNy_CasE } for tests -# - { key: readability-identifier-naming.PrivateMethodSuffix, value: } +# for range-for loops, e.g. `for(auto i : is) ...` +# - { key: readability-identifier-length.IgnoredVariableNames , value: "^[ijk]$" } Checks: '*, - -altera-id-dependent-backward-branch, -altera-struct-pack-align, - -altera-unroll-loops, - -cert-err58-cpp, - -clang-diagnostic-c++98-compat-unnamed-type-template-args, + -clang-diagnostic-deprecated-declarations, -cppcoreguidelines-avoid-magic-numbers, - -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-macro-usage, - -cppcoreguidelines-pro-bounds-array-to-pointer-decay, - -cppcoreguidelines-pro-type-vararg, - -fuchsia-default-arguments-calls, - -fuchsia-default-arguments-declarations, -fuchsia-overloaded-operator, - -fuchsia-statically-constructed-objects, -fuchsia-trailing-return, - -hicpp-no-array-decay, - -hicpp-vararg, - -llvmlibc-callee-namespace, - -llvmlibc-implementation-in-namespace, - -llvmlibc-restrict-system-libc-headers, + -llvmlibc-*, + -misc-include-cleaner, -modernize-use-nodiscard, -readability-magic-numbers ' - -# -altera-id-dependent-backward-branch, // seems like a buggy warning https://github.com/llvm/llvm-project/issues/52790 +## explanation of the suppressed rules above: # -altera-struct-pack-align, // this asks to use non-standard pragmas such as __attribute__((aligned(0))) and _attribute__((packed)), may interfeere with gcc/nvcc compatibility? -# -altera-unroll-loops, // expansion BOOST_REQUIRE macro requires this -# -cert-err58-cpp, // expansion BOOST_AUTO_TEST_CASE macro requires this -# -clang-diagnostic-c++98-compat-unnamed-type-template-args // allow lambda types in template parameters +# -clang-diagnostic-deprecated-declarations // some test run deprecated functions on purposes # -cppcoreguidelines-avoid-magic-numbers, // all tests use magic numbers -# -cppcoreguidelines-avoid-non-const-global-variables, // expansion BOOST_AUTO_TEST_CASE macros require this -# -cppcoreguidelines-macro-usage, // definition of BOOST_TEST_MODULE macro requires this -# -cppcoreguidelines-pro-bounds-array-to-pointer-decay, // assert macros need this -# -cppcoreguidelines-pro-type-vararg, // expansion of BOOST_REQUIRE macros require this -# -fuchsia-default-arguments-declarations // BOOST_AUTO_TEST_CASE_TEMPLATE -# -fuchsia-default-arguments-calls, // expansion of BOOST_REQUIRE macros require this -# -fuchsia-overloaded-operator, // this library overloads operators (==, >, +, &) -# -fuchsia-statically-constructed-objects, // expansion of BOOST_AUTO_TEST_CASE creates these -# -fuchsia-trailing-return, // to allow any '-> decltype(auto)' -# -hicpp-no-array-decay, // all assert macros require this -# -hicpp-vararg, // all expansions of BOOST_TEST_REQUIRE macros require this -# -llvmlibc-implementation-in-namespace, // nonsense warning -# -llvmlibc-callee-namespace, // nonsense warning -# -llvmlibc-restrict-system-libc-headers, // nonsense warning +# -cppcoreguidelines-macro-usage, // BLAS adaptors uses a lot of macros +# -fuchsia-overloaded-operator, // this library overloads operators (==, <, +, &) +# -fuchsia-trailing-return, // to allow any '-> decltype(auto)', contradicts modernize trailing +# -llvmlibc-*, // nonsense warnings, for llvm developers +# -misc-include-cleaner, // using Boost.Test # -modernize-use-nodiscard, // this would add [[nodiscard]] to almost all functions, adding a lot of noise # -readability-magic-numbers, // all tests use magic numbers diff --git a/external_codes/boost_multi/multi/.cmake-format b/external_codes/boost_multi/multi/.cmake-format new file mode 100644 index 0000000000..8ce29a6c6c --- /dev/null +++ b/external_codes/boost_multi/multi/.cmake-format @@ -0,0 +1,241 @@ +# ---------------------------------- +# Options affecting listfile parsing +# ---------------------------------- +with section("parse"): + + # Specify structure for custom cmake functions + additional_commands = { 'foo': { 'flags': ['BAR', 'BAZ'], + 'kwargs': {'DEPENDS': '*', 'HEADERS': '*', 'SOURCES': '*'}}} + + # Override configurations per-command where available + override_spec = {} + + # Specify variable tags. + vartags = [] + + # Specify property tags. + proptags = [] + +# ----------------------------- +# Options affecting formatting. +# ----------------------------- +with section("format"): + + # Disable formatting entirely, making cmake-format a no-op + disable = False + + # How wide to allow formatted cmake files + line_width = 240 + + # How many spaces to tab for indent + tab_size = 2 + + # If true, lines are indented using tab characters (utf-8 0x09) instead of + # space characters (utf-8 0x20). In cases where the layout would + # require a fractional tab character, the behavior of the fractional + # indentation is governed by + use_tabchars = False + + # If is True, then the value of this variable indicates how + # fractional indentions are handled during whitespace replacement. If set to + # 'use-space', fractional indentation is left as spaces (utf-8 0x20). If set + # to `round-up` fractional indentation is replaced with a single tab character + # (utf-8 0x09) effectively shifting the column to the next tabstop + fractional_tab_policy = 'use-space' + + # If an argument group contains more than this many sub-groups (parg or kwarg + # groups) then force it to a vertical layout. + max_subgroups_hwrap = 2 + + # If a positional argument group contains more than this many arguments, then + # force it to a vertical layout. + max_pargs_hwrap = 6 + + # If a cmdline positional group consumes more than this many lines without + # nesting, then invalidate the layout (and nest) + max_rows_cmdline = 2 + + # If true, separate flow control names from their parentheses with a space + separate_ctrl_name_with_space = False + + # If true, separate function names from parentheses with a space + separate_fn_name_with_space = False + + # If a statement is wrapped to more than one line, than dangle the closing + # parenthesis on its own line. + dangle_parens = False + + # If the trailing parenthesis must be 'dangled' on its on line, then align it + # to this reference: `prefix`: the start of the statement, `prefix-indent`: + # the start of the statement, plus one indentation level, `child`: align to + # the column of the arguments + dangle_align = 'prefix' + + # If the statement spelling length (including space and parenthesis) is + # smaller than this amount, then force reject nested layouts. + min_prefix_chars = 4 + + # If the statement spelling length (including space and parenthesis) is larger + # than the tab width by more than this amount, then force reject un-nested + # layouts. + max_prefix_chars = 10 + + # If a candidate layout is wrapped horizontally but it exceeds this many + # lines, then reject the layout. + max_lines_hwrap = 2 + + # What style line endings to use in the output. + line_ending = 'unix' + + # Format command names consistently as 'lower' or 'upper' case + command_case = 'canonical' + + # Format keywords consistently as 'lower' or 'upper' case + keyword_case = 'unchanged' + + # A list of command names which should always be wrapped + always_wrap = [] + + # If true, the argument lists which are known to be sortable will be sorted + # lexicographicall + enable_sort = True + + # If true, the parsers may infer whether or not an argument list is sortable + # (without annotation). + autosort = True + + # By default, if cmake-format cannot successfully fit everything into the + # desired linewidth it will apply the last, most aggressive attempt that it + # made. If this flag is True, however, cmake-format will print error, exit + # with non-zero status code, and write-out nothing + require_valid_layout = False + + # A dictionary mapping layout nodes to a list of wrap decisions. See the + # documentation for more information. + layout_passes = {} + +# ------------------------------------------------ +# Options affecting comment reflow and formatting. +# ------------------------------------------------ +with section("markup"): + + # What character to use for bulleted lists + bullet_char = '*' + + # What character to use as punctuation after numerals in an enumerated list + enum_char = '.' + + # If comment markup is enabled, don't reflow the first comment block in each + # listfile. Use this to preserve formatting of your copyright/license + # statements. + first_comment_is_literal = False + + # If comment markup is enabled, don't reflow any comment block which matches + # this (regex) pattern. Default is `None` (disabled). + literal_comment_pattern = None + + # Regular expression to match preformat fences in comments default= + # ``r'^\s*([`~]{3}[`~]*)(.*)$'`` + fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$' + + # Regular expression to match rulers in comments default= + # ``r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'`` + ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$' + + # If a comment line matches starts with this pattern then it is explicitly a + # trailing comment for the preceding argument. Default is '#<' + explicit_trailing_pattern = '#<' + + # If a comment line starts with at least this many consecutive hash + # characters, then don't lstrip() them off. This allows for lazy hash rulers + # where the first hash char is not separated by space + hashruler_min_length = 10 + + # If true, then insert a space between the first hash char and remaining hash + # chars in a hash ruler, and normalize its length to fill the column + canonicalize_hashrulers = True + + # enable comment markup parsing and reflow + enable_markup = True + +# ---------------------------- +# Options affecting the linter +# ---------------------------- +with section("lint"): + + # a list of lint codes to disable + disabled_codes = [] + + # regular expression pattern describing valid function names + function_pattern = '[0-9a-z_]+' + + # regular expression pattern describing valid macro names + macro_pattern = '[0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with global + # (cache) scope + global_var_pattern = '[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with global + # scope (but internal semantic) + internal_var_pattern = '_[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for variables with local + # scope + local_var_pattern = '[a-z][a-z0-9_]+' + + # regular expression pattern describing valid names for privatedirectory + # variables + private_var_pattern = '_[0-9a-z_]+' + + # regular expression pattern describing valid names for public directory + # variables + public_var_pattern = '[A-Z][0-9A-Z_]+' + + # regular expression pattern describing valid names for function/macro + # arguments and loop variables. + argument_var_pattern = '[a-z][a-z0-9_]+' + + # regular expression pattern describing valid names for keywords used in + # functions or macros + keyword_pattern = '[A-Z][0-9A-Z_]+' + + # In the heuristic for C0201, how many conditionals to match within a loop in + # before considering the loop a parser. + max_conditionals_custom_parser = 2 + + # Require at least this many newlines between statements + min_statement_spacing = 1 + + # Require no more than this many newlines between statements + max_statement_spacing = 2 + max_returns = 6 + max_branches = 12 + max_arguments = 5 + max_localvars = 15 + max_statements = 50 + +# ------------------------------- +# Options affecting file encoding +# ------------------------------- +with section("encode"): + + # If true, emit the unicode byte-order mark (BOM) at the start of the file + emit_byteorder_mark = False + + # Specify the encoding of the input file. Defaults to utf-8 + input_encoding = 'utf-8' + + # Specify the encoding of the output file. Defaults to utf-8. Note that cmake + # only claims to support utf-8 so be careful when using anything else + output_encoding = 'utf-8' + +# ------------------------------------- +# Miscellaneous configurations options. +# ------------------------------------- +with section("misc"): + + # A dictionary containing any per-command configuration overrides. Currently + # only `command_case` is supported. + per_command = {} + diff --git a/external_codes/boost_multi/multi/.codecov.yml b/external_codes/boost_multi/multi/.codecov.yml index 175ef95210..515b535053 100644 --- a/external_codes/boost_multi/multi/.codecov.yml +++ b/external_codes/boost_multi/multi/.codecov.yml @@ -1,17 +1,22 @@ codecov: - token: 999feb5b-a599-4d02-b9c5-46d977247f3a notify: + after_n_builds: 2 require_ci_to_pass: yes coverage: + status: + project: + default: + threshold: 0.5% + patch: true + changes: false precision: 2 round: down - range: 99..100 + range: "99..100" - status: - project: yes - patch: yes - changes: no +# Ignore testing directory itself +ignore: + - "test/" parsers: gcov: @@ -23,3 +28,36 @@ parsers: comment: layout: "header, diff" + + +# Fixes report prefix paths from CI dynamic coverage action +# from https://docs.codecov.io/docs/fixing-paths +#fixes: +# - "/builds/correaa::" + +#codecov: +# disable_default_path_fixes: true +# token: 999feb5b-a599-4d02-b9c5-46d977247f3a +# notify: +# require_ci_to_pass: yes +# +#coverage: +# precision: 2 +# round: down +# range: 99..100 +# +# status: +# project: yes +# patch: yes +# changes: no +# +#parsers: +# gcov: +# branch_detection: +# conditional: yes +# loop: yes +# method: no +# macro: no +# +#comment: +# layout: "header, diff" diff --git a/external_codes/boost_multi/multi/.deepsource.toml b/external_codes/boost_multi/multi/.deepsource.toml index 0ea2ab62c1..be25539bb3 100644 --- a/external_codes/boost_multi/multi/.deepsource.toml +++ b/external_codes/boost_multi/multi/.deepsource.toml @@ -1,9 +1,9 @@ version = 1 -[[analyzers]] -name = "shell" -enabled = true +test_patterns = ["test/**"] [[analyzers]] name = "test-coverage" -enabled = true \ No newline at end of file + +[[analyzers]] +name = "cxx" diff --git a/external_codes/boost_multi/multi/.github/workflows/ci.yml b/external_codes/boost_multi/multi/.github/workflows/ci.yml new file mode 100644 index 0000000000..3e9e6981fb --- /dev/null +++ b/external_codes/boost_multi/multi/.github/workflows/ci.yml @@ -0,0 +1,551 @@ +# Copyright 2021-2022 Andrey Semashev +# Copyright 2023 Matt Borland +# +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) + +name: CI + +on: + pull_request: + push: + branches: + - master + - develop + - feature/** + +concurrency: + group: ${{format('{0}:{1}', github.repository, github.ref)}} + cancel-in-progress: true + +env: + GIT_FETCH_JOBS: 8 + NET_RETRY_COUNT: 5 + DEFAULT_BUILD_VARIANT: debug,release + +jobs: + posix: + defaults: + run: + shell: bash + + strategy: + fail-fast: false + matrix: + include: + - toolset: gcc-7 + cxxstd: "03,11,14,17" + address_model: 32,64 + os: ubuntu-latest + container: ubuntu:18.04 + install: + - g++-7-multilib + - toolset: gcc-8 + cxxstd: "03,11,14,17,2a" + address_model: 32,64 + os: ubuntu-latest + container: ubuntu:18.04 + install: + - g++-8-multilib + - toolset: gcc-9 + cxxstd: "03,11,14,17,2a" + address_model: 32,64 + os: ubuntu-20.04 + install: + - g++-9-multilib + - toolset: gcc-9 + cxxstd: "03-gnu,11-gnu,14-gnu,17-gnu,2a-gnu" + address_model: 32,64 + os: ubuntu-20.04 + install: + - g++-9-multilib + - toolset: gcc-10 + cxxstd: "03,11,14,17,20" + address_model: 32,64 + os: ubuntu-20.04 + install: + - g++-10-multilib + - toolset: gcc-11 + cxxstd: "03,11,14,17,20,23" + address_model: 32,64 + os: ubuntu-22.04 + install: + - g++-11-multilib + - toolset: gcc-12 + cxxstd: "03,11,14,17,20,23" + address_model: 32,64 + os: ubuntu-22.04 + install: + - g++-12-multilib + # Gitub deleted support from 22.04 and 24.04 has internal warnings about futex + # Disable until the runner is out of beta + #- toolset: gcc-13 + # cxxstd: "03,11,14,17,20,23" + # address_model: 32,64 + # os: ubuntu-24.04 + # install: + # - g++-13-multilib + # cxxflags: -Wno-dangling-reference # Boost test hits error + - toolset: gcc-12 + cxxstd: "03-gnu,11-gnu,14-gnu,17-gnu,20-gnu,23-gnu" + address_model: "32" + os: ubuntu-22.04 + install: + - g++-12-multilib + - toolset: gcc-12 + cxxstd: "03-gnu,11-gnu,14-gnu,17-gnu,20-gnu,23-gnu" + address_model: "64" + os: ubuntu-22.04 + install: + - g++-12-multilib + - name: 32-bit UBSAN + toolset: gcc-12 + cxxstd: "03,11,14,17,20,23" + address_model: "32" + ubsan: 1 + os: ubuntu-22.04 + install: + - g++-12-multilib + - name: 64-bit UBSAN + toolset: gcc-12 + cxxstd: "03,11,14,17,20,23" + address_model: "64" + ubsan: 1 + os: ubuntu-22.04 + install: + - g++-12-multilib + + # Linux, clang + - toolset: clang + compiler: clang++-6.0 + cxxstd: "03,11,14,17" + os: ubuntu-latest + container: ubuntu:18.04 + install: + - clang-6.0 + - toolset: clang + compiler: clang++-7 + cxxstd: "03,11,14,17" + os: ubuntu-latest + container: ubuntu:18.04 + install: + - clang-7 + # Note: clang-8 does not fully support C++20, so it is not compatible with libstdc++-8 in this mode + - toolset: clang + compiler: clang++-8 + cxxstd: "03,11,14,17,2a" + os: ubuntu-latest + container: ubuntu:18.04 + install: + - clang-8 + - g++-7 + gcc_toolchain: 7 + - toolset: clang + compiler: clang++-9 + cxxstd: "03,11,14,17,2a" + os: ubuntu-20.04 + install: + - clang-9 + - toolset: clang + compiler: clang++-10 + cxxstd: "03,11,14,17,20" + os: ubuntu-20.04 + install: + - clang-10 + - toolset: clang + compiler: clang++-11 + cxxstd: "03,11,14,17" + os: ubuntu-22.04 + install: + - clang-11 + - toolset: clang + compiler: clang++-12 + cxxstd: "03,11,14,17" + os: ubuntu-22.04 + install: + - clang-12 + - toolset: clang + compiler: clang++-13 + cxxstd: "03,11,14,17" + os: ubuntu-22.04 + install: + - clang-13 + - toolset: clang + compiler: clang++-14 + cxxstd: "03,11,14,17" + os: ubuntu-22.04 + install: + - clang-14 + - toolset: clang + compiler: clang++-14 + cxxstd: "03-gnu,11-gnu,14-gnu,17-gnu" + os: ubuntu-22.04 + install: + - clang-14 + - toolset: clang + compiler: clang++-15 + cxxstd: "03,11,14,17,20" + os: ubuntu-22.04 + install: + - clang-15 + sources: + - "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-15 main" + source_keys: + - "https://apt.llvm.org/llvm-snapshot.gpg.key" + - toolset: clang + compiler: clang++-15 + cxxstd: "03,11,14,17,20,2b" + os: ubuntu-22.04 + install: + - clang-15 + - libc++-15-dev + - libc++abi-15-dev + sources: + - "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-15 main" + source_keys: + - "https://apt.llvm.org/llvm-snapshot.gpg.key" + cxxflags: -stdlib=libc++ + linkflags: -stdlib=libc++ + - toolset: clang + compiler: clang++-16 + cxxstd: "03,11,14,17,20,2b" + os: ubuntu-22.04 + install: + - clang-16 + sources: + - "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-16 main" + source_keys: + - "https://apt.llvm.org/llvm-snapshot.gpg.key" + - toolset: clang + compiler: clang++-17 + cxxstd: "03,11,14,17,20,2b" + os: ubuntu-22.04 + install: + - clang-17 + sources: + - "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main" + source_keys: + - "https://apt.llvm.org/llvm-snapshot.gpg.key" + - name: UBSAN + toolset: clang + compiler: clang++-14 + cxxstd: "03,11,14,17,20,2b" + cxxflags: -stdlib=libc++ + linkflags: -stdlib=libc++ + ubsan: 1 + os: ubuntu-22.04 + install: + - clang-14 + - libc++-14-dev + - libc++abi-14-dev + + - toolset: clang + cxxstd: "03,11,14,17,20,2b" + os: macos-12 + - toolset: clang + cxxstd: "03,11,14,17,20,2b" + os: macos-13 + - toolset: clang + cxxstd: "03,11,14,17,20,2b" + os: macos-14 + + timeout-minutes: 120 + runs-on: ${{matrix.os}} + container: ${{matrix.container}} + + steps: + - name: Setup environment + run: | + if [ -f "/etc/debian_version" ] + then + echo "DEBIAN_FRONTEND=noninteractive" >> $GITHUB_ENV + export DEBIAN_FRONTEND=noninteractive + fi + if [ -n "${{matrix.container}}" ] + then + echo "GHA_CONTAINER=${{matrix.container}}" >> $GITHUB_ENV + if [ -f "/etc/debian_version" ] + then + apt-get -o Acquire::Retries=$NET_RETRY_COUNT update + if [ "$(apt-cache search "^python-is-python3$" | wc -l)" -ne 0 ] + then + PYTHON_PACKAGE="python-is-python3" + else + PYTHON_PACKAGE="python" + fi + apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y sudo software-properties-common tzdata wget curl apt-transport-https ca-certificates make build-essential g++ $PYTHON_PACKAGE python3 perl git cmake + fi + fi + git config --global pack.threads 0 + - uses: actions/checkout@v3 + + - name: Install packages + if: matrix.install + run: | + declare -a SOURCE_KEYS SOURCES + if [ -n "${{join(matrix.source_keys, ' ')}}" ] + then + SOURCE_KEYS=("${{join(matrix.source_keys, '" "')}}") + fi + if [ -n "${{join(matrix.sources, ' ')}}" ] + then + SOURCES=("${{join(matrix.sources, '" "')}}") + fi + for key in "${SOURCE_KEYS[@]}" + do + for i in {1..$NET_RETRY_COUNT} + do + echo "Adding key: $key" + wget -O - "$key" | sudo apt-key add - && break || sleep 2 + done + done + if [ ${#SOURCES[@]} -gt 0 ] + then + APT_ADD_REPO_COMMON_ARGS=("-y") + APT_ADD_REPO_SUPPORTED_ARGS="$(apt-add-repository --help | perl -ne 'if (/^\s*-n/) { print "n"; } elsif (/^\s*-P/) { print "P"; } elsif (/^\s*-S/) { print "S"; } elsif (/^\s*-U/) { print "U"; }')" + if [ -n "$APT_ADD_REPO_SUPPORTED_ARGS" -a -z "${APT_ADD_REPO_SUPPORTED_ARGS##*n*}" ] + then + APT_ADD_REPO_COMMON_ARGS+=("-n") + fi + APT_ADD_REPO_HAS_SOURCE_ARGS="$([ -n "$APT_ADD_REPO_SUPPORTED_ARGS" -a -z "${APT_ADD_REPO_SUPPORTED_ARGS##*P*}" -a -z "${APT_ADD_REPO_SUPPORTED_ARGS##*S*}" -a -z "${APT_ADD_REPO_SUPPORTED_ARGS##*U*}" ] && echo 1 || echo 0)" + for source in "${SOURCES[@]}" + do + for i in {1..$NET_RETRY_COUNT} + do + APT_ADD_REPO_ARGS=("${APT_ADD_REPO_COMMON_ARGS[@]}") + if [ $APT_ADD_REPO_HAS_SOURCE_ARGS -ne 0 ] + then + case "$source" in + "ppa:"*) + APT_ADD_REPO_ARGS+=("-P") + ;; + "deb "*) + APT_ADD_REPO_ARGS+=("-S") + ;; + *) + APT_ADD_REPO_ARGS+=("-U") + ;; + esac + fi + APT_ADD_REPO_ARGS+=("$source") + echo "apt-add-repository ${APT_ADD_REPO_ARGS[@]}" + sudo -E apt-add-repository "${APT_ADD_REPO_ARGS[@]}" && break || sleep 2 + done + done + fi + sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update + sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y ${{join(matrix.install, ' ')}} locales + sudo locale-gen de_DE.UTF-8 + sudo update-locale + - name: Setup GCC Toolchain + if: matrix.gcc_toolchain + run: | + GCC_TOOLCHAIN_ROOT="$HOME/gcc-toolchain" + echo "GCC_TOOLCHAIN_ROOT=\"$GCC_TOOLCHAIN_ROOT\"" >> $GITHUB_ENV + MULTIARCH_TRIPLET="$(dpkg-architecture -qDEB_HOST_MULTIARCH)" + mkdir -p "$GCC_TOOLCHAIN_ROOT" + ln -s /usr/include "$GCC_TOOLCHAIN_ROOT/include" + ln -s /usr/bin "$GCC_TOOLCHAIN_ROOT/bin" + mkdir -p "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET" + ln -s "/usr/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" = "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + BUILD_JOBS=$((nproc || sysctl -n hw.ncpu) 2> /dev/null) + echo "BUILD_JOBS=$BUILD_JOBS" >> $GITHUB_ENV + echo "CMAKE_BUILD_PARALLEL_LEVEL=$BUILD_JOBS" >> $GITHUB_ENV + DEPINST_ARGS=() + GIT_VERSION="$(git --version | sed -e 's/git version //')" + GIT_HAS_JOBS=1 + if [ -f "/etc/debian_version" ] + then + if $(dpkg --compare-versions "$GIT_VERSION" lt 2.8.0) + then + GIT_HAS_JOBS=0 + fi + else + declare -a GIT_VER=(${GIT_VERSION//./ }) + declare -a GIT_MIN_VER=(2 8 0) + for ((i=0; i<${#GIT_VER[@]}; i++)) + do + if [ -z "${GIT_MIN_VER[i]}" ] + then + GIT_MIN_VER[i]=0 + fi + if [ "${GIT_VER[i]}" -lt "${GIT_MIN_VER[i]}" ] + then + GIT_HAS_JOBS=0 + break + fi + done + fi + if [ "$GIT_HAS_JOBS" -ne 0 ] + then + DEPINST_ARGS+=("--git_args" "--jobs $GIT_FETCH_JOBS") + fi + cd .. + git clone -b "$BOOST_BRANCH" --depth 1 "https://github.com/boostorg/boost.git" "boost-root" + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + DEPINST_ARGS+=("$LIBRARY") + python tools/boostdep/depinst/depinst.py "${DEPINST_ARGS[@]}" + if [ -z "${{matrix.cmake_tests}}" ] + then + ./bootstrap.sh + ./b2 headers + if [ -n "${{matrix.compiler}}" -o -n "$GCC_TOOLCHAIN_ROOT" ] + then + echo -n "using ${{matrix.toolset}} : : ${{matrix.compiler}}" > ~/user-config.jam + if [ -n "$GCC_TOOLCHAIN_ROOT" ] + then + echo -n " : \"--gcc-toolchain=$GCC_TOOLCHAIN_ROOT\" \"--gcc-toolchain=$GCC_TOOLCHAIN_ROOT\"" >> ~/user-config.jam + fi + echo " ;" >> ~/user-config.jam + fi + fi + - name: Run tests + if: matrix.cmake_tests == '' + run: | + cd ../boost-root + B2_ARGS=("-j" "$BUILD_JOBS" "toolset=${{matrix.toolset}}" "cxxstd=${{matrix.cxxstd}}" "link=static,shared") + if [ -n "${{matrix.build_variant}}" ] + then + B2_ARGS+=("variant=${{matrix.build_variant}}") + else + B2_ARGS+=("variant=$DEFAULT_BUILD_VARIANT") + fi + if [ -n "${{matrix.threading}}" ] + then + B2_ARGS+=("threading=${{matrix.threading}}") + fi + if [ -n "${{matrix.ubsan}}" ] + then + export UBSAN_OPTIONS="print_stacktrace=1" + B2_ARGS+=("cxxflags=-fsanitize=undefined -fno-sanitize-recover=undefined" "linkflags=-fsanitize=undefined -fuse-ld=gold" "define=UBSAN=1" "debug-symbols=on" "visibility=global") + fi + if [ -n "${{matrix.cxxflags}}" ] + then + B2_ARGS+=("cxxflags=${{matrix.cxxflags}}") + fi + if [ -n "${{matrix.linkflags}}" ] + then + B2_ARGS+=("linkflags=${{matrix.linkflags}}") + fi + if [ -n "${{matrix.address_model}}" ] + then + B2_ARGS+=("address-model=${{matrix.address_model}}") + fi + B2_ARGS+=("libs/$LIBRARY/test") + ./b2 "${B2_ARGS[@]}" cxxflags="-Wall -Wextra -Werror" + + windows: + strategy: + fail-fast: false + matrix: + include: + - toolset: msvc-14.2 + cxxstd: "14,17,20,latest" + addrmd: 32,64 + # cxxflags: "/permissive-" + os: windows-2019 + - toolset: msvc-14.3 + cxxstd: "14,17,20,latest" + addrmd: 32,64 + # cxxflags: "/permissive-" + os: windows-2022 + - toolset: gcc + cxxstd: "03,11,14,17,2a" + addrmd: "64" + os: windows-2019 + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@v3 + + - name: Setup Boost + shell: cmd + run: | + echo GITHUB_REPOSITORY: %GITHUB_REPOSITORY% + for /f %%i in ("%GITHUB_REPOSITORY%") do set LIBRARY=%%~nxi + echo LIBRARY: %LIBRARY% + echo LIBRARY=%LIBRARY%>>%GITHUB_ENV% + echo GITHUB_BASE_REF: %GITHUB_BASE_REF% + echo GITHUB_REF: %GITHUB_REF% + if "%GITHUB_BASE_REF%" == "" set GITHUB_BASE_REF=%GITHUB_REF% + set BOOST_BRANCH=develop + for /f %%i in ("%GITHUB_BASE_REF%") do if "%%~nxi" == "master" set BOOST_BRANCH=master + echo BOOST_BRANCH: %BOOST_BRANCH% + cd .. + git clone -b %BOOST_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + xcopy /s /e /q %GITHUB_WORKSPACE% libs\%LIBRARY%\ + git submodule update --init tools/boostdep + python tools/boostdep/depinst/depinst.py --git_args "--jobs 3" %LIBRARY% + cmd /c bootstrap + b2 -d0 headers + + - name: Run tests + shell: cmd + run: | + cd ../boost-root + b2 -j3 libs/%LIBRARY%/test toolset=${{matrix.toolset}} cxxflags=${{matrix.cxxflags}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release link=static,shared embed-manifest-via=linker + + windows-clang: + strategy: + fail-fast: false + matrix: + include: + - toolset: clang-win + cxxstd: "14,17,latest" + addrmd: "32" + os: windows-2022 + - toolset: clang-win + cxxstd: "14,17,latest" + addrmd: "64" + os: windows-2022 + + runs-on: ${{matrix.os}} + + steps: + - uses: actions/checkout@v3 + + - name: Setup Boost + shell: cmd + run: | + echo GITHUB_REPOSITORY: %GITHUB_REPOSITORY% + for /f %%i in ("%GITHUB_REPOSITORY%") do set LIBRARY=%%~nxi + echo LIBRARY: %LIBRARY% + echo LIBRARY=%LIBRARY%>>%GITHUB_ENV% + echo GITHUB_BASE_REF: %GITHUB_BASE_REF% + echo GITHUB_REF: %GITHUB_REF% + if "%GITHUB_BASE_REF%" == "" set GITHUB_BASE_REF=%GITHUB_REF% + set BOOST_BRANCH=develop + for /f %%i in ("%GITHUB_BASE_REF%") do if "%%~nxi" == "master" set BOOST_BRANCH=master + echo BOOST_BRANCH: %BOOST_BRANCH% + cd .. + git clone -b %BOOST_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + xcopy /s /e /q %GITHUB_WORKSPACE% libs\%LIBRARY%\ + git submodule update --init tools/boostdep + python tools/boostdep/depinst/depinst.py --git_args "--jobs 3" %LIBRARY% + cmd /c bootstrap + b2 -d0 headers + + - name: Run tests + shell: cmd + run: | + cd ../boost-root + b2 -j3 libs/%LIBRARY%/test toolset=${{matrix.toolset}} cxxstd=${{matrix.cxxstd}} address-model=${{matrix.addrmd}} variant=debug,release link=static,shared embed-manifest-via=linker define=_CRT_SECURE_NO_WARNINGS cxxflags="-Wno-reserved-macro-identifier -Wno-microsoft-cpp-macro -Wno-documentation -Wno-inconsistent-missing-destructor-override -Wno-global-constructors -Wno-zero-as-null-pointer-constant -Wno-suggest-destructor-override -Wno-redundant-parens -Wno-disabled-macro-expansion -Wno-missing-prototypes -Wno-used-but-marked-unused -Wno-reserved-identifier -Wno-unsafe-buffer-usage -Wno-exit-time-destructors" diff --git a/external_codes/boost_multi/multi/.github/workflows/cmake.yml b/external_codes/boost_multi/multi/.github/workflows/cmake.yml new file mode 100644 index 0000000000..c3fc75f104 --- /dev/null +++ b/external_codes/boost_multi/multi/.github/workflows/cmake.yml @@ -0,0 +1,162 @@ +# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml +name: CMake + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +env: + BUILD_TYPE: Release + +jobs: + latest-cpp: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Configure system + run: | + sudo apt-get -y update + sudo apt-get -y install libboost-test-dev + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure + + latest-cpp32: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Configure system + run: | + sudo apt-get -y update + sudo apt-get -y install g++-multilib libboost-test-dev + wget https://downloads.sourceforge.net/project/boost/boost/1.84.0/boost_1_84_0.tar.gz --no-verbose + tar -xf boost_1_84_0.tar.gz + cd boost_1_84_0 + ./bootstrap.sh + sudo ./b2 cxxflags=-m32 --with-serialization --with-test --with-timer install -j4 + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_FLAGS="-m32" + + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure + + latest-clangpp: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Configure system + run: | + sudo apt-get -y update + sudo apt-get -y install clang libboost-test-dev + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=clang++ + + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure + + macos-cpp: + runs-on: macos-14 + + steps: + - uses: actions/checkout@v4 + + - name: Configure system + run: | + brew install boost + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure + + # macos-gpp: + # runs-on: macos-14 + + # steps: + # - uses: actions/checkout@v4 + + # - name: Configure system + # run: | + # brew install boost gcc@13 + + # - name: Configure CMake + # run: | + # g++-13 --version + # cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=g++-13 -DCMAKE_CXX_STANDARD=23 -DCMAKE_CXX_FLAGS="-Wl,-ld_classic" # https://developer.apple.com/forums/thread/737707?answerId=765203022#765203022 + + # - name: Build + # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --verbose + + # - name: Test + # working-directory: ${{github.workspace}}/build + # run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure + + windows: + runs-on: windows-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up CMake + uses: lukka/get-cmake@latest + + - name: Set up MSVC + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + + - name: Install vcpkg + run: | + git clone https://github.com/microsoft/vcpkg.git + .\vcpkg\bootstrap-vcpkg.bat + + - name: Install Boost + run: .\vcpkg\vcpkg install boost + + - name: Set up Boost environment variables + run: | + echo "BOOST_ROOT=$(Get-Location)\vcpkg\installed\x64-windows" >> $env:GITHUB_ENV + echo "BOOST_INCLUDEDIR=$(Get-Location)\vcpkg\installed\x64-windows\include" >> $env:GITHUB_ENV + echo "BOOST_LIBRARYDIR=$(Get-Location)\vcpkg\installed\x64-windows\lib" >> $env:GITHUB_ENV + echo "PATH=$(Get-Location)\vcpkg\installed\x64-windows\bin;$env:PATH" >> $env:GITHUB_ENV + + - name: Configure CMake + run: cmake -S . -B build -A x64 -DCMAKE_TOOLCHAIN_FILE=.\vcpkg\scripts\buildsystems\vcpkg.cmake -DBOOST_ROOT=.\vcpkg\installed\x64-windows + + - name: Build + run: cmake --build build --config Release + + - name: Run tests + run: ctest --test-dir build --output-on-failure -C Release diff --git a/external_codes/boost_multi/multi/.github/workflows/codecov.yml b/external_codes/boost_multi/multi/.github/workflows/codecov.yml new file mode 100644 index 0000000000..278a2b8666 --- /dev/null +++ b/external_codes/boost_multi/multi/.github/workflows/codecov.yml @@ -0,0 +1,200 @@ +# Copyright 2020-2021 Peter Dimov +# Copyright 2021 Andrey Semashev +# Copyright 2021 Alexander Grund +# Copyright 2022 James E. King III +# Copyright 2023 Matt Borland +# +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at http://boost.org/LICENSE_1_0.txt) +--- +name: codecov + +on: + pull_request: + push: + branches: + - master + - develop + - bugfix/** + - feature/** + - fix/** + - pr/** + +env: + GIT_FETCH_JOBS: 8 + NET_RETRY_COUNT: 5 + B2_CI_VERSION: 1 + B2_VARIANT: debug,release + B2_LINK: shared,static + LCOV_BRANCH_COVERAGE: 0 + CODECOV_NAME: Github Actions + +jobs: + posix: + defaults: + run: + shell: bash + + strategy: + fail-fast: false + matrix: + include: + - { name: Collect coverage, coverage: yes, + compiler: gcc-12, cxxstd: '20', os: ubuntu-22.04, install: 'g++-12-multilib', address-model: '32,64' } + + timeout-minutes: 120 + runs-on: ${{matrix.os}} + container: ${{matrix.container}} + env: {B2_USE_CCACHE: 1} + + steps: + - name: Setup environment + run: | + if [ -f "/etc/debian_version" ]; then + echo "DEBIAN_FRONTEND=noninteractive" >> $GITHUB_ENV + export DEBIAN_FRONTEND=noninteractive + fi + if [ -n "${{matrix.container}}" ] && [ -f "/etc/debian_version" ]; then + apt-get -o Acquire::Retries=$NET_RETRY_COUNT update + apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y sudo software-properties-common curl + # Need (newer) git, and the older Ubuntu container may require requesting the key manually using port 80 + curl -sSL --retry ${NET_RETRY_COUNT:-5} 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE1DD270288B4E6030699E45FA1715D88E1DF1F24' | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/git-core_ubuntu_ppa.gpg + for i in {1..${NET_RETRY_COUNT:-3}}; do sudo -E add-apt-repository -y ppa:git-core/ppa && break || sleep 10; done + apt-get -o Acquire::Retries=$NET_RETRY_COUNT update + osver=$(lsb_release -sr | cut -f1 -d.) + pkgs="g++ git" + # Ubuntu 22+ has only Python 3 in the repos + if [ -n "$osver" ] && [ "$osver" -ge "22" ]; then + pkgs+=" python-is-python3 libpython3-dev" + else + pkgs+=" python libpython-dev" + fi + apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs + fi + # For jobs not compatible with ccache, use "ccache: no" in the matrix + if [[ "${{ matrix.ccache }}" == "no" ]]; then + echo "B2_USE_CCACHE=0" >> $GITHUB_ENV + fi + git config --global pack.threads 0 + + - uses: actions/checkout@v3 + with: + # For coverage builds fetch the whole history, else only 1 commit using a 'fake ternary' + fetch-depth: ${{ matrix.coverage && '0' || '1' }} + + - name: Cache ccache + uses: actions/cache@v3 + if: env.B2_USE_CCACHE + with: + path: ~/.ccache + key: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}-${{github.sha}} + restore-keys: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}- + + - name: Fetch Boost.CI + uses: actions/checkout@v3 + with: + repository: boostorg/boost-ci + ref: master + path: boost-ci-cloned + + - name: Get CI scripts folder + run: | + # Copy ci folder if not testing Boost.CI + [[ "$GITHUB_REPOSITORY" =~ "boost-ci" ]] || cp -r boost-ci-cloned/ci . + rm -rf boost-ci-cloned + + - name: Install packages + if: startsWith(matrix.os, 'ubuntu') + run: | + SOURCE_KEYS=(${{join(matrix.source_keys, ' ')}}) + SOURCES=(${{join(matrix.sources, ' ')}}) + # Add this by default + SOURCES+=(ppa:ubuntu-toolchain-r/test) + for key in "${SOURCE_KEYS[@]}"; do + for i in {1..$NET_RETRY_COUNT}; do + keyfilename=$(basename -s .key $key) + curl -sSL --retry ${NET_RETRY_COUNT:-5} "$key" | sudo gpg --dearmor > /etc/apt/trusted.gpg.d/${keyfilename} && break || sleep 10 + done + done + for source in "${SOURCES[@]}"; do + for i in {1..$NET_RETRY_COUNT}; do + sudo add-apt-repository $source && break || sleep 10 + done + done + sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update + if [[ -z "${{matrix.install}}" ]]; then + pkgs="${{matrix.compiler}}" + pkgs="${pkgs/gcc-/g++-}" + else + pkgs="${{matrix.install}}" + fi + sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT install -y $pkgs locales + sudo locale-gen de_DE.UTF-8 + sudo update-locale + + - name: Setup GCC Toolchain + if: matrix.gcc_toolchain + run: | + GCC_TOOLCHAIN_ROOT="$HOME/gcc-toolchain" + echo "GCC_TOOLCHAIN_ROOT=$GCC_TOOLCHAIN_ROOT" >> $GITHUB_ENV + if ! command -v dpkg-architecture; then + apt-get install -y dpkg-dev + fi + MULTIARCH_TRIPLET="$(dpkg-architecture -qDEB_HOST_MULTIARCH)" + mkdir -p "$GCC_TOOLCHAIN_ROOT" + ln -s /usr/include "$GCC_TOOLCHAIN_ROOT/include" + ln -s /usr/bin "$GCC_TOOLCHAIN_ROOT/bin" + mkdir -p "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET" + ln -s "/usr/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" "$GCC_TOOLCHAIN_ROOT/lib/gcc/$MULTIARCH_TRIPLET/${{matrix.gcc_toolchain}}" + + - name: Setup multiarch + if: matrix.multiarch + run: | + sudo apt-get install --no-install-recommends -y binfmt-support qemu-user-static + sudo docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + git clone https://github.com/jeking3/bdde.git + echo "$(pwd)/bdde/bin/linux" >> ${GITHUB_PATH} + echo "BDDE_DISTRO=${{ matrix.distro }}" >> ${GITHUB_ENV} + echo "BDDE_EDITION=${{ matrix.edition }}" >> ${GITHUB_ENV} + echo "BDDE_ARCH=${{ matrix.arch }}" >> ${GITHUB_ENV} + echo "B2_WRAPPER=bdde" >> ${GITHUB_ENV} + + - name: Setup Boost + env: + B2_ADDRESS_MODEL: ${{matrix.address-model}} + B2_COMPILER: ${{matrix.compiler}} + B2_CXXSTD: ${{matrix.cxxstd}} + B2_SANITIZE: ${{matrix.sanitize}} + B2_STDLIB: ${{matrix.stdlib}} + # More entries can be added in the same way, see the B2_ARGS assignment in ci/enforce.sh for the possible keys. + # B2_DEFINES: ${{matrix.defines}} + # Variables set here (to non-empty) will override the top-level environment variables, e.g. + # B2_VARIANT: ${{matrix.variant}} + # Set the (B2) target(s) to build, defaults to the test folder of the current library + # Can alternatively be done like this in the build step or in the build command of the build step, e.g. `run: B2_TARGETS=libs/$SELF/doc ci/build.sh` + # B2_TARGETS: libs/foo/test//bar + run: source ci/github/install.sh + + - name: Setup coverage collection + if: matrix.coverage + run: ci/github/codecov.sh "setup" + + - name: Run tests + if: '!matrix.coverity' + run: ci/build.sh + + - name: Upload coverage + if: matrix.coverage + run: ci/codecov.sh "upload" + env: + BOOST_CI_CODECOV_IO_UPLOAD: skip + + - name: Upload coverage + if: matrix.coverage + uses: codecov/codecov-action@v4 + with: + disable_search: true + file: coverage.info + name: Github Actions + token: ${{secrets.CODECOV_TOKEN}} + verbose: true diff --git a/external_codes/boost_multi/multi/.github/workflows/gitlab-sync.yml b/external_codes/boost_multi/multi/.github/workflows/gitlab-sync.yml new file mode 100644 index 0000000000..47cb272a6a --- /dev/null +++ b/external_codes/boost_multi/multi/.github/workflows/gitlab-sync.yml @@ -0,0 +1,22 @@ +name: GitlabSync # https://dev.to/brunorobert/github-and-gitlab-sync-44mn + +on: + - push + - delete + +jobs: + sync: + runs-on: ubuntu-latest + name: Git Repo Sync + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: wangchucheng/git-repo-sync@v0.1.0 + with: + # Such as https://github.com/wangchucheng/git-repo-sync.git + target-url: ${{ secrets.TARGET_URL }} + # Such as wangchucheng + target-username: ${{ secrets.TARGET_USERNAME }} + # You can store token in your project's 'Setting > Secrets' and reference the name here. Such as ${{ secrets.ACCESS\_TOKEN }} + target-token: ${{ secrets.TARGET_TOKEN }} diff --git a/external_codes/boost_multi/multi/.gitignore b/external_codes/boost_multi/multi/.gitignore index 66fa21a7d6..ed96246007 100644 --- a/external_codes/boost_multi/multi/.gitignore +++ b/external_codes/boost_multi/multi/.gitignore @@ -1,3 +1,42 @@ build* .build* +.vscode +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +# Cmake +cmake-build-debug + +# Clion +.idea diff --git a/external_codes/boost_multi/multi/.gitlab-ci.yml b/external_codes/boost_multi/multi/.gitlab-ci.yml index f608e9e067..f49ad2478e 100644 --- a/external_codes/boost_multi/multi/.gitlab-ci.yml +++ b/external_codes/boost_multi/multi/.gitlab-ci.yml @@ -1,724 +1,916 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;-*- -# © Alfredo A. Correa 2020-2022 +# Copyright 2020-2024 Alfredo A. Correa -image: debian:testing +# Install docker (for local CI or to set up CI machine) +# sudo apt install docker.io +# Install gitlab-runner +# curl -L "https://packages.gitlab.com/install/repositories/runner/gitlab-runner/script.deb.sh" | sudo bash +# sudo apt-get install gitlab-runner + +image: debian:stable + +workflow: + auto_cancel: + on_new_commit: interruptible variables: - OMP_NUM_THREADS: 1 + GIT_SUBMODULE_STRATEGY: recursive + CODECOV_TOKEN: "999feb5b-a599-4d02-b9c5-46d977247f3a" + RT_VERSION: "0.1" + NVIDIA_DISABLE_REQUIRE: 1 + SONAR_USER_HOME: "${CI_PROJECT_DIR}/.sonar" # Defines the location of the analysis task cache + GIT_DEPTH: "0" # Tells git to fetch all the branches of the project, required by the analysis task + msbuild: 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe' -g++: +g++: # debian-stable: default is gcc 12.2.0 as of April 2024 stage: build + interruptible: false + tags: + - non-shared script: - - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev liblapack-dev libfftw3-dev pkg-config - g++ --version - mkdir build && cd build - - cmake .. -DCMAKE_BUILD_TYPE=Debug + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG=1" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure -T Test + +g++-m32: + stage: build + image: debian:latest + tags: + - non-shared + - high-bandwidth + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates g++ g++-multilib cmake make tar wget + - wget https://downloads.sourceforge.net/project/boost/boost/1.84.0/boost_1_84_0.tar.gz --no-verbose # 1.70 doesn't install boost timer well, there is no version 1.71 in sourceforge + - tar -xf boost_1_84_0.tar.gz + - cd boost_1_84_0 + - ./bootstrap.sh + - ./b2 cxxflags=-m32 --with-serialization --with-test --with-timer install -j4 # libc++ only works with boost test compiled with libc++ + - cd .. + - mkdir build && cd build + - c++ --version + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-m32" - cmake --build . - - ctest --output-on-failure - - make install - -#g++-debug: -# stage: build -# script: -# - export CXX="g++" -# - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - $CXX --version -# - mkdir build && cd build -# - cmake .. -DCMAKE_BUILD_TYPE=Debug -# - cmake --build . --verbose -# - ctest --output-on-failure -# needs: ["g++"] - -#g++-9: -# image: debian:stable-backports -# stage: build -# script: -# - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++-9 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - g++-9 --version -# - mkdir build && cd build -# - CXX=g++-9 cmake .. -# - cmake --build . -# - ctest --output-on-failure -# - make install -# needs: ["g++"] - -g++-std20: + - ctest -j 2 --output-on-failure + needs: ["g++"] + +g++-10: stage: build + image: ubuntu:20.04 + tags: + - high-bandwidth + interruptible: true script: - - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates g++-10 cmake libboost-test-dev libboost-timer-dev libboost-serialization-dev make tar wget # linux-libc-dev:i386 + - mkdir build && cd build + - g++-10 --version + - CXX=g++-10 cmake .. -DCMAKE_BUILD_TYPE=Debug # -DCMAKE_CXX_FLAGS="-m32" + - cmake --build . + - ctest -j 2 --output-on-failure + needs: ["g++"] + +clang++-9: + stage: build + image: ubuntu:20.04 + tags: + - non-shared + - high-bandwidth + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang-9 cmake libboost-test-dev libboost-timer-dev libboost-serialization-dev make tar wget + - mkdir build && cd build + - clang++-9 --version + - CXX=clang++-9 cmake .. -DCMAKE_BUILD_TYPE=Debug + - cmake --build . + - ctest -j 2 --output-on-failure + needs: ["clang++"] + +clang++-11: + stage: build + image: ubuntu:22.04 + tags: + - non-shared + - high-bandwidth + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang-11 cmake libboost-test-dev libboost-timer-dev libboost-serialization-dev make tar wget + - mkdir build && cd build + - clang++-11 --version + - CXX=clang++-11 cmake .. -DCMAKE_BUILD_TYPE=Debug + - cmake --build . + - ctest -j 2 --output-on-failure + needs: ["clang++"] + +coverage: + stage: build + tags: + - non-shared + - docker + allow_failure: true + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake curl g++ gcovr git make lcov libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config wget + - mkdir build && cd build + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-DNDEBUG --coverage -O0 -fno-inline -fno-inline-small-functions -fno-default-inline" -DCMAKE_EXE_LINKER_FLAGS="--coverage" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 1 --output-on-failure -T Test + - ctest -j 1 --output-on-failure -T Coverage + - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} + # - lcov --directory . --capture --output-file coverage.info + # - lcov --remove coverage.info '/usr/*' --output-file coverage.info + # - lcov --list coverage.info && genhtml coverage.info + - bash <(curl -s https://codecov.io/bash) || echo 'Codecov failed to upload' + coverage: /^\s*Percentage\s+Coverage:\s*\d+.\d+\%/ + # coverage: /^\s*lines:\s*\d+.\d+\%/ + artifacts: + name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA} + expire_in: 2 days + reports: + coverage_report: + coverage_format: cobertura + path: build/coverage.xml + needs: ["g++"] + +g++ cppcheck cpplint memcheck: # debian-stable: gcc 12 as of Dec 2023 + stage: build + allow_failure: true + interruptible: true + tags: + - non-shared + - docker + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake cppcheck cpplint g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config valgrind - g++ --version + - cpplint --version + - cppcheck --version + - valgrind --version - mkdir build && cd build - - cmake .. -DCMAKE_BUILD_TYPE=AggressiveOpt - - cmake --build . --verbose - - ctest --output-on-failure - - make install + - export VALGRIND_EXE="valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all --suppressions=.valgrind_suppressions --gen-suppressions=all --error-exitcode=1 " + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CPPLINT="cpplint;--quiet" -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--inline-suppr;--std=c++17;--check-config;--error-exitcode=1" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure -T memcheck needs: ["g++"] -g++-asan: +g++-testing sanitizer: stage: build + tags: + - non-shared + - docker + image: debian:testing + interruptible: true script: - - export CXX="g++" - - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config - mkdir build && cd build - - CXXFLAGS="-g -O1 -fsanitize=address,undefined -fno-sanitize-recover=all -fno-omit-frame-pointer" cmake .. -DCMAKE_BUILD_TYPE=Debug - - cmake --build . --verbose - - ctest --output-on-failure + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-fsanitize=address" -fno-omit-frame-pointer # ,undefined,pointer-compare,pointer-subtract,float-divide-by-zero -fno-omit-frame-pointer -fno-sanitize-recover=all" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ASAN_OPTIONS="new_delete_type_mismatch=true:check_initialization_order=true:strict_init_order=true:detect_stack_use_after_return=true:detect_odr_violation=true" ctest -j 2 --output-on-failure -T Test needs: ["g++"] -g++-tsan: +g++-7: stage: build + image: debian:oldoldstable # default is gcc 8 as of Dec 2023 + tags: + - non-shared + - docker + interruptible: true script: - - export CXX="g++" - - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake g++-7 make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config wget + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - g++-7 --version - mkdir build && cd build - - CXXFLAGS="-g -O1 -fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer" cmake .. -DCMAKE_BUILD_TYPE=Debug - - cmake --build . --verbose - - ctest --output-on-failure + - CXX=g++-7 CXXFLAGS="-DBOOST_TEST_MAIN -DBOOST_TEST_DYN_LINK" cmake .. -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_BUILD_TYPE=Debug + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure -T Test needs: ["g++"] -#g++-current-codecov: -# stage: build -# script: -# - export CXX="g++" -# - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev curl gcovr lcov -# - ln --symbolic --force . ../multi -# - $CXX --version -# - mkdir build && cd build -# - CXXFLAGS="-ftest-coverage -fprofile-arcs --coverage -fno-inline -fno-inline-small-functions -fno-default-inline" cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXE_LINKER_FLAGS="-lgcov --coverage" -# - cmake --build . --verbose -# - ctest --output-on-failure -T Test -T Coverage -# - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} -# - lcov --directory . --capture --output-file coverage.info -# - lcov --remove coverage.info '/usr/*' --output-file coverage.info -# - lcov --list coverage.info -# coverage: /^\s*lines:\s*\d+.\d+\%/ -# artifacts: -# name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA} -# expire_in: 2 days -# reports: -# cobertura: build/coverage.xml - -# TODO(correaa) remove -g++-openblas: - stage: build - script: - - export CXX="g++" - - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libopenblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version - - mkdir build && cd build - - cmake .. - - cmake --build . --verbose +g++-testing c++20: + stage: build + image: debian:testing + tags: + - non-shared + - docker + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev + - mkdir build && cd build + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_STANDARD=20 + - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure needs: ["g++"] -memcheck: +g++-13 c++23 par: # debian-testing: default is gcc 13.2.0 as of Dec 2023 stage: build + image: debian:unstable + tags: + - non-shared + - docker + interruptible: true script: - - export CXX="g++" - - apt-get update && apt-get install --no-install-recommends -y --quiet $CXX valgrind pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ g++-14 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev libtbb-dev - mkdir build && cd build - - cmake .. - - cmake --build . --verbose - - ctest --overwrite MemoryCheckCommandOptions="-q --tool=memcheck --leak-check=yes --num-callers=51 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all" -T memcheck --output-on-failure - needs: ["g++"] + - g++-13 --version + - CXX=g++-13 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=23 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --output-on-failure + needs: ["g++-testing c++20"] -clang++: +g++-14 c++23 par: # debian-testing: default is gcc 13.2.0 as of Dec 2023 stage: build + image: debian:unstable + tags: + - non-shared + - docker + allow_failure: true + interruptible: true script: - - export CXX="clang++" - - apt-get update && apt-get install --no-install-recommends -y --quiet clang pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ g++-14 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev libtbb-dev - mkdir build && cd build - - cmake .. -DCMAKE_BUILD_TYPE=Debug - - cmake --build . --verbose -- --quiet --no-print-directory + - g++-14 --version + - CXX=g++-14 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=23 + - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure + needs: ["g++-testing c++20"] -#clang++-9: -# image: debian:stable -# stage: build -# script: -# - export CXX="clang++-9" -# - apt-get update && apt-get install --no-install-recommends -y --quiet clang-9 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - $CXX --version -# - mkdir build && cd build -# - cmake .. -# - cmake --build . --verbose -# - ctest --output-on-failure -# needs: ["clang++"] - -#clang++-9-asan: -# image: debian:stable -# stage: build -# image: debian:stable -# script: -# - export CXX="clang++-9" -# - export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-9/bin/llvm-symbolizer -# - export CXXFLAGS="-fsanitize=undefined -fsanitize=address -fno-omit-frame-pointer" -# - apt-get update && apt-get install --no-install-recommends -y --quiet clang-9 llvm-9 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev llvm -# - ln --symbolic --force . ../multi -# - $CXX --version -# - mkdir build && cd build -# - cmake .. -DCMAKE_BUILD_TYPE=Debug -# - cmake --build . --verbose -# - export ASAN_OPTIONS="halt_on_error=1 detect_leaks=1" -# - ctest --output-on-failure -# needs: ["clang++"] - -clang++-std20: - stage: build - script: - - apt-get update && apt-get install --no-install-recommends -y --quiet clang pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - - ln --symbolic --force . ../multi +clang++: + stage: build + tags: + - non-shared + interruptible: false + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet clang cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config + - mkdir build && cd build - clang++ --version + - CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG=1" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + +# deb: +# stage: build +# script: +# - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config +# - mkdir build && cd build +# - cmake .. -DCMAKE_BUILD_TYPE=Release +# - cmake --build . --parallel 2 || cmake --build . --verbose +# - ctest -j 2 --output-on-failure +# - cpack -G DEB +# - ls -all +# - dpkg -i multi_all.deb +# - ls -all /usr/include/multi +# - c++ ../test/main.cpp +# artifacts: +# paths: +# - build/multi_all.deb +# needs: ["g++", "clang++"] + +clang++-latest libc++: + stage: build + image: debian:latest + tags: + - non-shared + - docker + - high-bandwidth + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake make libblas-dev libc++-dev libc++abi-dev libfftw3-dev pkg-config tar wget + - wget https://downloads.sourceforge.net/project/boost/boost/1.72.0/boost_1_72_0.tar.gz --no-verbose # 1.70 doesn't install boost timer well, there is no version 1.72 in sourceforge + - tar -xf boost_1_72_0.tar.gz + - cd boost_1_72_0 + - ./bootstrap.sh --with-toolset=clang + - ./b2 toolset=clang cxxflags=-stdlib=libc++ linkflags=-stdlib=libc++ --with-serialization --with-test --with-timer install -j4 # libc++ only works with boost test compiled with libc++ + - cd .. - mkdir build && cd build - - CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=20 - - cmake --build . --verbose - - ctest --output-on-failure + - clang++ --version + - CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-stdlib=libc++ -DBOOST_NO_CXX98_FUNCTION_BASE -D_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" + - cmake --build . + - ctest -j 2 --output-on-failure needs: ["clang++"] -clang++-tidy: - stage: test +# build_windows: +# stage: build +# allow_failure: true +# script: +# - '& "$msbuild" -help' +# - '& "$msbuild" -version' +# - choco install -y cmake +# - $env:Path += ';C:\Program Files\CMake\bin' +# - cmake --version +# tags: +# - shared-windows +# - windows +# - windows-1809 + +# clang++-macos: +# stage: build +# image: macos-14-xcode-15 +# # variables: +# # HOMEBREW_NO_AUTO_UPDATE: 1 +# tags: +# - saas-macos-medium-m1 +# allow_failure: true +# script: +# #- apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake make libblas-dev libc++-dev libc++abi-dev libfftw3-dev pkg-config tar wget +# #- wget https://downloads.sourceforge.net/project/boost/boost/1.72.0/boost_1_72_0.tar.gz --no-verbose # 1.70 doesn't install boost timer well, there is no version 1.72 in sourceforge +# #- tar -xf boost_1_72_0.tar.gz +# #- cd boost_1_72_0 +# #- ./bootstrap.sh --with-toolset=clang +# #- ./b2 toolset=clang cxxflags=-stdlib=libc++ linkflags=-stdlib=libc++ --with-serialization --with-test --with-timer install -j4 # libc++ only works with boost test compiled with libc++ +# #- cd .. +# - mkdir build && cd build +# - c++ --version +# - CXX=c++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 # -DCMAKE_CXX_FLAGS="-stdlib=libc++ -DBOOST_NO_CXX98_FUNCTION_BASE -D_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR -D_LIBCPP_ENABLE_DEBUG_MODE=1 -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY=1 -D_LIBCPP_DEBUG_STRICT_WEAK_ORDERING_CHECK=1" +# - cmake --build . +# - ctest -j 2 --output-on-failure +# needs: ["clang++"] + +clang++-m32: + stage: build + image: debian:latest + tags: + - high-bandwidth + interruptible: true script: - - export CXX="clang++" - - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet clang clang-tidy pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libblas-dev liblapack-dev libfftw3-dev - - ln --symbolic --force . ../multi - - $CXX --version - - clang-tidy --version + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang g++ g++-multilib cmake make tar wget + - wget https://downloads.sourceforge.net/project/boost/boost/1.84.0/boost_1_84_0.tar.gz --no-verbose # 1.70 doesn't install boost timer well, there is no version 1.71 in sourceforge + - tar -xf boost_1_84_0.tar.gz + - cd boost_1_84_0 + - ./bootstrap.sh --with-toolset=clang + - ./b2 toolset=clang cxxflags=-m32 --with-serialization --with-test --with-timer install -j4 # libc++ only works with boost test compiled with libc++ + - cd .. - mkdir build && cd build - - cmake .. -DCMAKE_CXX_CLANG_TIDY="clang-tidy" - - cmake --build . --verbose -- --quiet --no-print-directory - - ctest --output-on-failure + - c++ --version + - CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-m32" + - cmake --build . + - ctest -j 2 --output-on-failure needs: ["clang++"] -#g++-cpplint: -# stage: test -# script: -# - export CXX="g++" -# - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libblas-dev liblapack-dev libfftw3-dev python3-pip -# - pip install cpplint -# - ln --symbolic --force . ../multi -# - $CXX --version -# - cpplint --version -# - mkdir build && cd build -# - cmake .. -DCMAKE_CXX_CPPLINT="cpplint;--filter=-build/include_order,-legal/copyright,-readability/alt_tokens,-readability/nolint,-runtime/references,-whitespace/braces,-whitespace/comments,-whitespace/line_length,-whitespace/operators,-whitespace/parens,-whitespace/semicolon,-whitespace/tab" -# - cmake --build . --verbose -- --quiet --no-print-directory -# - cpplint --filter=-build/include_order,-build/header_guar,-legal/copyright,-readability/nolint,-runtime/references,-whitespace/line_length,-whitespace/parens,-whitespace/tab,-readability/alt_tokens --recursive ../include/multi/array.hpp -# needs: ["g++"] - -#clang++-11-gcc9: -# stage: build -# image: vistart/cuda:10.2-ubuntu20.04 -# script: -# - export CXX="clang++-11" -# - apt-get update -# - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y --quiet gcc-9 clang-11 pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - $CXX -v -# - mkdir build && cd build -# - cmake .. -# - cmake --build . --verbose -- --quiet --no-print-directory -# - ctest --output-on-failure - -#cuda-11.0: -# allow_failure: true -# image: nvidia/cuda:11.0-devel -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -# image not working -#cuda-11.1: -# image: nvidia/cuda:11.1.0-devel -# allow_failure: true -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.2: -# allow_failure: true -# image: nvidia/cuda:11.2.0-devel -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.3: -# allow_failure: true -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -# - apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev cuda -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.4: -# allow_failure: true -# image: docker pull nvidia/cuda:11.4.2-devel-ubi8 -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.5: -# image: nvidia/cuda:11.5.0-devel -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.6: -# image: nvidia/cuda:11.6.0-devel -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - ln --symbolic --force . ../multi -# - nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.5gpuci: -# allow_failure: true -# image: gpuci/cuda:11.5.0-devel-ubuntu20.04 -# stage: build -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev -# - nvcc --version -# - ln --symbolic --force . ../multi -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -# - cmake --build . --verbose -# - ctest --output-on-failure - -#cuda-11.0-sdkhpc: -# allow_failure: true -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y cmake nvhpc-22-3 wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev -# - cmake --version -# - apt-cache madison nvhpc -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.0/bin/nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.0/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.0 -DCMAKE_CUDA_ARCHITECTURES=61 -DCMAKE_CUDA_FLAGS="-gencode arch=compute_61,code=sm_61" -# - cmake --build . || make VERBOSE=1 -# - ctest --output-on-failure - -#cuda-11.6: -# allow_failure: false -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y cmake nvhpc-22-3 wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev -# - cmake --version -# - apt-cache madison nvhpc -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6 -DCMAKE_CUDA_ARCHITECTURES=61 -DCMAKE_CUDA_FLAGS="-gencode arch=compute_61,code=sm_61" -# - cmake --build . || make VERBOSE=1 -# - ctest --output-on-failure - -cuda-11.8: +clang++-testing tidy: stage: build - allow_failure: false - image: debian:stable + image: debian:unstable # clang 18 as of Apr 2024 + tags: + - non-shared + - docker + interruptible: true script: - - export DEBIAN_FRONTEND=noninteractive - - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates gnupg software-properties-common - - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/3bf863cc.pub - - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/ /" - - add-apt-repository contrib - - apt-get update - - apt-get -y install cuda - - apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev - - /usr/local/cuda-11.8/bin/nvcc --version - - mkdir build && cd build - - cmake .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda-11.8/bin/nvcc -DCMAKE_CXX_STANDARD=17 -DCMAKE_CUDA_ARCHITECTURES=61 -DCMAKE_CUDA_FLAGS="-gencode arch=compute_61,code=sm_61" - - cmake --build . || make VERBOSE=1 + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet clang-18 clang-tidy-18 cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config + - mkdir build && cd build + - clang++-18 --version + - clang-tidy-18 --version + - CXX=clang++-18 cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_CLANG_TIDY=clang-tidy-18 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["clang++"] + +g++-testing c++20: + stage: build + image: debian:testing + tags: + - non-shared + - docker + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev + - mkdir build && cd build + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_STANDARD=20 + - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure needs: ["g++"] -cuda-runner: - stage: test +g++-testing sanitizer: + stage: build + image: debian:testing tags: - - intel_compiler + - non-shared + - docker + interruptible: true script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config - mkdir build && cd build - - /usr/local/cuda/bin/nvcc --version - - cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_FLAGS="-ccbin=g++-9" -DCMAKE_CXX_STANDARD=17 - - make -j8 -output-sync=recurse - - ctest --output-on-failure --timeout 600 - - ../build/include/multi/adaptors/thrust/test/speed.cu.x - - ../build/include/multi/adaptors/thrust/test/speed_algo.cu.x - - ../build/include/multi/adaptors/fftw/test/combinations.cpp.x - - needs: ["cuda-11.8"] - -#g++-cppcheck: -# stage: build -# allow_failure: false -# image: debian:testing -# script: -# - export CXX="g++" -# - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet $CXX pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev liblapack-dev libfftw3-dev cppcheck -# - ln --symbolic --force . ../multi -# - $CXX --version -# - cppcheck --version -# - find . -name '*.hpp' -exec cppcheck --enable=all --inline-suppr --suppress=unmatchedSuppression --suppress=syntaxError --suppress=missingInclude --suppress=missingIncludeSystem --suppress=preprocessorErrorDirective --suppress=syntaxError --suppress=unusedFunction --suppress=arithOperationsOnVoidPointer --suppress=sizeofDereferencedVoidPointer --suppress=sizeofDereferencedVoidPointer --suppress=unmatchedSuppression:{} -D__align__ -DCUDARTAPI --language=c++ --std=c++17 --error-exitcode=666 --verbose --exception-handling {} \; -# - mkdir build && cd build -# - cmake -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--inline-suppr;--language=c++;--std=c++17;--check-config;--error-exitcode=666;--verbose;--exception-handling" .. -# - cmake --build . --verbose || make VERBOSE=1 -# - ctest --output-on-failure -# needs: ["g++"] - -#clang++-iwyu: -# stage: build -# allow_failure: true -# image: debian:testing -# script: -# - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet clang cmake iwyu make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev liblapack-dev libfftw3-dev pkg-config -# - ln --symbolic --force . ../multi -# - clang++ --version -# - iwyu --version -# - find . -name '*.hpp' -exec iwyu -w -Xiwyu --transitive_includes_only -Xiwyu --cxx17ns -Xiwyu --quoted_includes_first -Xiwyu --cxx17ns -std=c++17 {} \; -# needs: ["clang++"] - -#nvhpc-22.1: -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates curl tree -# - curl --output nvhpc-22-1_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-22-1_22.1_amd64.deb --retry 5 --retry-delay 5 -# - curl --output nvhpc-2022_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-2022_22.1_amd64.deb --retry 5 --retry-delay 5 -# - apt-get install --no-install-recommends -y --quiet ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - rm -rf ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - apt-get install --no-install-recommends -y --quiet grep cmake libboost-test-dev libboost-serialization-dev libboost-timer-dev make pkg-config libblas-dev liblapack-dev libfftw3-dev -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ --version -# - strings /opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ | grep D -# - mkdir build && cd build -# - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ cmake .. -# - make VERBOSE=1 -# - ctest --output-on-failure - -#nvhpc-22.1-std20: -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates curl tree -# - curl --output nvhpc-22-1_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-22-1_22.1_amd64.deb --retry 5 --retry-delay 5 -# - curl --output nvhpc-2022_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-2022_22.1_amd64.deb --retry 5 --retry-delay 5 -# - apt-get install --no-install-recommends -y --quiet ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - rm -rf ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - apt-get install --no-install-recommends -y --quiet cmake libboost-test-dev libboost-serialization-dev libboost-timer-dev make pkg-config libblas-dev liblapack-dev libfftw3-dev -# - mkdir build && cd build -# - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ cmake .. -DCMAKE_CXX_STANDARD=20 -# - make VERBOSE=1 -# - ctest --output-on-failure - -#nvhpc-22.2: -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get autoremove -# - apt-get install -y nvhpc-22-2 -# - apt-get install --no-install-recommends -y --quiet ca-certificates curl tree grep cmake libboost-test-dev libboost-serialization-dev libboost-timer-dev make pkg-config libblas-dev liblapack-dev libfftw3-dev -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.2/compilers/bin/nvc++ --version -# - mkdir build && cd build -# - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.2/compilers/bin/nvc++ cmake .. -# - make VERBOSE=1 -# - ctest --output-on-failure - -#nvhpc-22.3: -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y nvhpc-22-3 cmake make libboost-test-dev libboost-serialization-dev -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/nvc++ --version -# - mkdir build && cd build -# - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/compilers/bin/nvc++ cmake .. -# - make VERBOSE=1 -# - ctest --output-on-failure - -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates curl tree -# - curl --output nvhpc-22-1_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-22-1_22.1_amd64.deb --retry 5 --retry-delay 5 -# - curl --output nvhpc-2022_22.1_amd64.deb --url https://developer.download.nvidia.com/hpc-sdk/22.1/nvhpc-2022_22.1_amd64.deb --retry 5 --retry-delay 5 -# - apt-get install --no-install-recommends -y --quiet ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - rm -rf ./nvhpc-22-1_22.1_amd64.deb ./nvhpc-2022_22.1_amd64.deb -# - apt-get install --no-install-recommends -y --quiet grep cmake libboost-test-dev libboost-serialization-dev libboost-timer-dev make pkg-config libblas-dev liblapack-dev libfftw3-dev -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ --version -# - strings /opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ | grep D -# - mkdir build && cd build -# - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.1/compilers/bin/nvc++ cmake .. -# - make VERBOSE=1 -# - ctest --output-on-failure + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined,pointer-compare,pointer-subtract,float-divide-by-zero -fno-sanitize-recover=all" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ASAN_OPTIONS="new_delete_type_mismatch=0:check_initialization_order=true:strict_init_order=true:detect_stack_use_after_return=true:detect_odr_violation=1" ctest -j 2 --output-on-failure -T Test + needs: ["g++"] + +clang++-17-unstable libc++ c++23 boost_1_84: + stage: build + image: debian:unstable # clang 17 as of March 2024 + tags: + - non-shared + - docker + - high-bandwidth + interruptible: true + script: # clang 17 doesn't work with gcc 13 libstd + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang clang-17 cmake make libblas-dev libc++-17-dev libc++abi-17-dev libfftw3-dev pkg-config tar wget + - wget https://downloads.sourceforge.net/project/boost/boost/1.84.0/boost_1_84_0.tar.gz --no-verbose + - tar -xf boost_1_84_0.tar.gz + - cd boost_1_84_0 + - ./bootstrap.sh --with-toolset=clang + - ./b2 toolset=clang cxxflags=-stdlib=libc++ linkflags=-stdlib=libc++ --with-serialization --with-test --with-timer install -j4 # libc++ only works with boost test compiled with libc++ + - cd .. + - mkdir build && cd build + - CXX=clang++-17 cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_STANDARD=23 -DCMAKE_CXX_FLAGS="-stdlib=libc++ -D_LIBCPP_ENABLE_DEBUG_MODE=1 -D_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY=1 -D_LIBCPP_DEBUG_STRICT_WEAK_ORDERING_CHECK=1" # -DBoost_INCLUDE_DIR=../boost_1_84_0 -DBoost_LIBRARY_DIR=../boost_1_84_0/stage/lib -DBoost_NO_SYSTEM_PATHS=ON + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --output-on-failure + needs: ["clang++-latest libc++", "g++-testing c++20"] + +clang++-oldoldstable: + stage: build + image: debian:oldoldstable # clang 7.0.1 as of April 2024 + tags: + - non-shared + - docker + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config wget + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - mkdir build && cd build + - clang++ --version + - CXX=clang++ CXXFLAGS="-DBOOST_TEST_DYN_LINK" cmake .. -DCMAKE_BUILD_TYPE=Release -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["clang++"] + +oneapi: + stage: build + image: intel/oneapi-hpckit:2023.0.0-devel-ubuntu22.04 # Intel(R) oneAPI DPC++/C++ Compiler 2023.0.0 (2023.0.0.20221201) + tags: + - non-shared + - large-disk-space + interruptible: true + script: + - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config + - mkdir build && cd build + - icpx --version + - CXX=icpx cmake .. -DCMAKE_BUILD_TYPE=Release -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --parallel 2 --output-on-failure + +oneapi-2022.2: + stage: build + image: intel/oneapi-hpckit:2022.2-devel-ubuntu20.04 + tags: + - non-shared + - large-disk-space + interruptible: true + script: + - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config + - mkdir build && cd build + - icpx --version + - CXX=icpx CXXFLAGS="-DPSTL_USE_PARALLEL_POLICIES=0" cmake .. -DCMAKE_BUILD_TYPE=Release + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --parallel 2 --output-on-failure + needs: ["oneapi"] + +oneapi-latest c++20: + stage: build + image: intel/oneapi-hpckit:latest # icpx --version (2023.2.0.20230721) as of Dec 2023 + allow_failure: true + tags: + - non-shared + - large-disk-space + - high-bandwidth + interruptible: true + script: + # - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + # - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list + - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config + - mkdir build && cd build + - icpx --version + - CXX=icpx cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=20 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --parallel 2 --output-on-failure + needs: ["oneapi"] + +nvhpc: + stage: build + image: nvcr.io/nvidia/nvhpc:22.11-devel-cuda11.8-ubuntu22.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags + tags: + - non-shared + - large-disk-space + interruptible: true + script: + - nvidia-smi + - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-test-dev libboost-serialization-dev + - /opt/nvidia/hpc_sdk/Linux_x86_64/2022/compilers/bin/nvc++ --version + - mkdir build && cd build + - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2022/compilers/bin/nvc++ cmake .. -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --output-on-failure nvhpc-22.7: stage: build + image: nvcr.io/nvidia/nvhpc:22.7-devel-cuda11.7-ubuntu22.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags + tags: + - non-shared + - large-disk-space + interruptible: true + script: + - nvidia-smi + - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev + - /opt/nvidia/hpc_sdk/Linux_x86_64/2022/compilers/bin/nvc++ --version + - mkdir build && cd build + - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2022/compilers/bin/nvc++ cmake .. -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 # TODO(correaa) add -DCMAKE_BUILD_TYPE=Release + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest --output-on-failure + needs: ["nvhpc"] + +nvhpc-24.3 c++20 par: + stage: build + image: nvcr.io/nvidia/nvhpc:24.3-devel-cuda12.3-ubuntu22.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags + tags: + - non-shared + - large-disk-space + interruptible: true script: - - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates - - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list - - apt-get update -y - - apt-get install --no-install-recommends -y nvhpc-22-7 cmake make libboost-test-dev libboost-serialization-dev - - /opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc++ --version + - nvidia-smi + - apt-get update && apt-get install --no-install-recommends -y cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libfftw3-dev pkg-config + - /opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ --version - mkdir build && cd build - - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.7/compilers/bin/nvc++ cmake .. - - make VERBOSE=1 + - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Release -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_STANDARD=20 -DCMAKE_CXX_FLAGS="-stdpar=multicore" + - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure + needs: ["nvhpc"] + +cuda: + stage: build + image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true + script: + - nvidia-smi + - apt-get -qq update && apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - mkdir build && cd build + - ls /usr/local + - ls /usr/local/cuda-11/bin + - /usr/local/cuda-11/bin/nvcc --version + - CUDACXX=/usr/local/cuda-11/bin/nvcc cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=61 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + - ../build/include/boost/multi/adaptors/thrust/test/speed.cu.x + - ../build/include/boost/multi/adaptors/thrust/test/speed_algo.cu.x + - ../build/include/boost/multi/adaptors/fftw/test/combinations.cpp.x needs: ["g++"] -circle-latest: +cuda-11.8 mkl: + stage: build + image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true + script: + - nvidia-smi + - apt-get -qq update && apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - DEBIAN_FRONTEND=interactive apt-get install --no-install-recommends --yes --force-yes -y libmkl-full-dev + - mkdir build && cd build + - ls /usr/local + - ls /usr/local/cuda-11/bin + - /usr/local/cuda-11/bin/nvcc --version + - CUDACXX=/usr/local/cuda-11/bin/nvcc cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=61 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + - ../build/include/boost/multi/adaptors/thrust/test/speed.cu.x + - ../build/include/boost/multi/adaptors/thrust/test/speed_algo.cu.x + - ../build/include/boost/multi/adaptors/fftw/test/combinations.cpp.x + needs: ["cuda"] + +cuda-11.4.3: + stage: build + image: nvcr.io/nvidia/cuda:11.4.3-devel-ubuntu20.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true + script: + - nvidia-smi + # - export CUDA_VISIBLE_DEVICES=2 + - apt-get -qq update + - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - mkdir build && cd build + - /usr/local/cuda/bin/nvcc --version + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=61 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["cuda"] + +cuclang++-16 cuda-11.4.3: + stage: build + image: nvcr.io/nvidia/cuda:11.4.3-devel-ubuntu20.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true + script: + - nvidia-smi + - apt-get -qq update + - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - apt-get install --no-install-recommends -y lsb-release software-properties-common + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - wget https://apt.llvm.org/llvm.sh + - chmod u+x llvm.sh + - ./llvm.sh 16 + - mkdir build && cd build + - clang++-16 --version + - cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=clang++-16 -DCMAKE_CXX_COMPILER=clang++-16 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["cuda", "clang++"] + +culang++-17 cuda-11.8: + stage: build + image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 # nvcr.io/nvidia/cuda:12.0.0-devel-ubuntu22.04 + tags: + - non-shared + - nvidia-gpu + - high-bandwidth + interruptible: true + script: + - nvidia-smi + - apt-get -qq update && apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - apt-get install --no-install-recommends -y lsb-release software-properties-common + - wget https://apt.llvm.org/llvm.sh + - chmod u+x llvm.sh + - ./llvm.sh 17 + - mkdir build && cd build + - clang++-17 --version + - cmake .. -DCMAKE_BUILD_TYPE=Release -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=clang++-17 -DCMAKE_CXX_COMPILER=clang++-17 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["cuda", "clang++"] + +culang++-19 cuda-12.1.1 tidy: + stage: build + image: nvcr.io/nvidia/cuda:12.1.1-devel-ubuntu22.04 # nvcr.io/nvidia/cuda:12.0.0-devel-ubuntu22.04 allow_failure: false - image: debian:stable # circle is incompatible with gcc 12 lib + tags: + - non-shared + - nvidia-gpu + - high-bandwidth + interruptible: true + script: + - nvidia-smi + - apt-get -qq update && apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - apt-get install --no-install-recommends -y lsb-release software-properties-common + - wget https://apt.llvm.org/llvm.sh + - chmod u+x llvm.sh + - ./llvm.sh 19 + - apt-get install --no-install-recommends -y clang-tidy-19 + - mkdir build && cd build + - clang++-19 --version + - clang-tidy-19 --version + - cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_CLANG_TIDY=clang-tidy-19 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=clang++-19 -DCMAKE_CXX_COMPILER=clang++-19 + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest -j 2 --output-on-failure + needs: ["cuda", "clang++"] + +cuda-12.3.1: stage: build + allow_failure: false + image: nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true script: - - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev tar gzip wget libelf1 grep - - mkdir -p build_latest && cd build_latest - - wget https://www.circle-lang.org/linux/build_latest.tgz --no-verbose - - tar -zxvf build_latest.tgz + - nvidia-smi + - apt-get -qq update && apt-get install --no-install-recommends -y cmake g++-12 wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev + - mkdir build && cd build + - g++-12 --version + - /usr/local/cuda/bin/nvcc --version + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=61 -DCMAKE_CUDA_HOST_COMPILER=g++-12 -DCMAKE_CUDA_FLAGS="-allow-unsupported-compiler" + - cmake --build . --parallel 2 || cmake --build . --verbose + - ctest || ctest --rerun-failed --output-on-failure || echo "ctest failed, probably due to lack of drivers" + needs: ["cuda"] + +rocm: + stage: build + image: rocm/dev-ubuntu-22.04 + allow_failure: false + tags: + - non-shared + - docker + - high-bandwidth + interruptible: true + script: + - apt-get -qq update + - apt-get install --no-install-recommends -y cmake wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev wget gpg + - apt-get install --no-install-recommends -y rocthrust-dev hipblas-dev hipfft-dev rocm-device-libs + - /opt/rocm/bin/hipconfig --full + - HIPCC_VERBOSE=1 /opt/rocm/bin/hipcc --version + - cmake --version + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - mkdir build && cd build + - export PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} + - export CMAKE_PREFIX_PATH=/opt/rocm:${CMAKE_PREFIX_PATH} + - export ROCM_PATH=/opt/rocm + - export HIP_PATH=/opt/rocm + - cmake .. -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ -DCMAKE_CXX_FLAGS="-DMULTI_USE_HIP" -DCMAKE_HIP_FLAGS="-DMULTI_USE_HIP" -DENABLE_HIP=1 -DCMAKE_HIP_ARCHITECTURES=gfx90a # TODO(correaa) add CMAKE_BUILD_TYPE + - cmake --build . --parallel 2 || cmake --build . --verbose --parallel 1 + - ctest || ctest --rerun-failed --output-on-failure || echo "ctest failed, probably due to lack of hardware" + needs: ["clang++", "g++"] + +# clang++-fedora c++23: +# stage: build +# image: fedora:rawhide # clang 17.0.6 as of Dec 2023 +# script: # clang 17 doesn't work with gcc 13 libstd +# - dnf install --setopt=install_weak_deps=False -y ca-certificates blas-devel boost-devel clang cmake fftw-devel make pkg-config +# - mkdir build && cd build +# - CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=23 # -DBoost_INCLUDE_DIR=../boost_1_84_0 -DBoost_LIBRARY_DIR=../boost_1_84_0/stage/lib -DBoost_NO_SYSTEM_PATHS=ON +# - cmake --build . --parallel 2 || cmake --build . --verbose +# - ctest --output-on-failure +# needs: ["clang++-testing c++20"] + +circle: + stage: build + tags: + - non-shared + - docker + # interruptible: false + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake g++-11 gzip libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libelf1 libfftw3-dev make pkg-config tar wget + - mkdir -p build_latest ; cd build_latest + - wget https://www.circle-lang.org/linux/build_200.tgz --no-verbose + - tar -zxvf build_???.tgz - cd .. - ls - ./build_latest/circle --version - mkdir build && cd build - - CXX=`pwd`/../build_latest/circle cmake .. -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu/ -DENABLE_CIRCLE=1 - - make VERBOSE=1 - - ctest --output-on-failure - needs: ["clang++"] + - CXX=`pwd`/../build_latest/circle cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_CIRCLE=1 + - cmake --build . --parallel 2 || make VERBOSE=1 + - ctest -j 2 --output-on-failure -icpc: # https://github.com/oneapi-src/oneapi-ci/blob/master/.gitlab-ci.yml - allow_failure: false - image: debian:stable # icpc 2022.1.2 is incompatible with gcc 12 lib - stage: build - script: - - apt-get update && apt-get install --no-install-recommends -y --quiet g++ ca-certificates cmake curl libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config - - curl --output webimage.sh --url https://registrationcenter-download.intel.com/akdlm/irc_nas/18479/l_HPCKit_p_2022.1.2.117_offline.sh --retry 5 --retry-delay 5 - - chmod +x webimage.sh - - ./webimage.sh -x -f webimage_extracted --log extract.log - - rm -rf webimage.sh - - ls -1 webimage_extracted/ - - WEBIMAGE_NAME=$(ls -1 webimage_extracted/) - - webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --components=intel.oneapi.lin.dpcpp-cpp-compiler-pro:intel.oneapi.lin.mpi.devel --eula=accept --log-dir=. - - rm -rf webimage_extracted - - . /opt/intel/oneapi/setvars.sh - - ln --symbolic --force . ../multi - - export CC=icc; export CXX=icpc; - - $CXX -v - - mkdir build.icpc && cd build.icpc - - cmake .. - - make VERBOSE=1 - - ctest --output-on-failure - needs: ["g++"] +circle-187: + stage: build + tags: + - non-shared + - docker + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake g++-11 gzip libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libelf1 libfftw3-dev make pkg-config tar wget + - mkdir -p build_latest ; cd build_latest + - wget https://www.circle-lang.org/linux/build_187.tgz --no-verbose + - tar -zxvf build_???.tgz + - cd .. + - ls + - ./build_latest/circle --version + - mkdir build && cd build + - CXX=`pwd`/../build_latest/circle cmake .. -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DENABLE_CIRCLE=1 + - cmake --build . --parallel 2 || make VERBOSE=1 + - ctest -j 2 --output-on-failure + needs: ["circle"] -icpx: # https://github.com/oneapi-src/oneapi-ci/blob/master/.gitlab-ci.yml - stage: build - script: - - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates curl - - curl --output webimage.sh --url https://registrationcenter-download.intel.com/akdlm/irc_nas/18487/l_BaseKit_p_2022.1.2.146_offline.sh --retry 5 --retry-delay 5 - - chmod +x webimage.sh - - ./webimage.sh -x -f webimage_extracted --log extract.log - - rm -rf webimage.sh - - ls -1 webimage_extracted/ - - WEBIMAGE_NAME=$(ls -1 webimage_extracted/) - - webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --components=`#intel.oneapi.lin.mkl.devel:`intel.oneapi.lin.dpcpp-cpp-compiler:intel.oneapi.lin.dpl --eula=accept --log-dir=. - - rm -rf webimage_extracted - - apt-get update && apt-get install --no-install-recommends -y --quiet g++ cmake libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config - - . /opt/intel/oneapi/setvars.sh - - ln --symbolic --force . ../multi - - export CC=icx; export CXX=icpx; - - $CXX -v - - mkdir build.icpx && cd build.icpx - - cmake .. #cmake --build . -- --quiet --no-print-directory - - make VERBOSE=1 - - export MKL_VERBOSE=1 - - ctest --output-on-failure +circle-latest: + stage: build + allow_failure: true + tags: + - non-shared + - docker + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates clang cmake g++-11 gzip libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libelf1 libfftw3-dev make pkg-config tar wget + - mkdir -p build_latest ; cd build_latest + - wget https://www.circle-lang.org/linux/build_latest.tgz --no-verbose + - tar -zxvf build_*.tgz - cd .. - needs: ["clang++"] + - ls + - ./build_latest/circle --version + - mkdir build && cd build + - CXX=`pwd`/../build_latest/circle cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_CIRCLE=1 + - cmake --build . --parallel 2 || make VERBOSE=1 + - ctest -j 2 --output-on-failure + needs: ["circle"] -variables: - msbuild: 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe' - mscmake: 'C:\Program Files\CMake\bin\cmake.exe' - -#build_windows: -# stage: build -# script: -# - choco install --no-progress -y cmake -# - choco install --no-progress -y boost-msvc-14.2 -# - '& "$msbuild" -help' -# - '& "$msbuild" -version' -# - mkdir build -# - cd build -# - '& "$mscmake" --version' -# - '& "$mscmake" ..' -# - '& "$mscmake" --build . --verbose' -# - dir -# tags: -# - shared-windows -# - windows -# - windows-1809 - -#g++-codecov-runner: -# stage: test -# allow_failure: true -# tags: -# - intel_compiler -# script: -# - g++ --version -# - mkdir build && cd build -# - cmake --version -# - CXX="g++" CXXFLAGS="-fprofile-arcs -ftest-coverage --coverage" cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXE_LINKER_FLAGS="-fprofile-arcs -ftest-coverage -lgcov --coverage" -# - cmake --build . -j 12 -# - ctest --output-on-failure -T Test -T Coverage # - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} -# - lcov --directory . --capture --output-file coverage.info -# - lcov --remove coverage.info '/usr/*' --output-file coverage.info -# - lcov --list coverage.info -# - bash <(curl -s https://codecov.io/bash) -t 999feb5b-a599-4d02-b9c5-46d977247f3a || echo "Codecov did not collect coverage reports" -# needs: ["g++"] - -#g++ pkg-config cmake make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev - -cobertura: - stage: build - script: - - apt-get update && apt-get -qq install --no-install-recommends -y --quiet cmake g++ gcovr make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev liblapack-dev libfftw3-dev make pkg-config - - mkdir build && cd build - - CXXFLAGS="--coverage" cmake .. - - cmake --build . - - ctest - - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} - coverage: /^\s*lines:\s*\d+.\d+\%/ - artifacts: - name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA} - expire_in: 2 days - reports: - coverage_report: - coverage_format: cobertura - path: build/coverage.xml +inq: + stage: test + image: debian:stable + tags: + - non-shared + - large-memory-space + interruptible: true + script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y ca-certificates cmake g++ git gfortran libopenmpi-dev libblas-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev make ninja-build pkg-config python3-dev + - export PREFIX=`mktemp -d` + - git clone --recurse-submodules https://gitlab.com/npneq/inq.git + - cd inq + - git submodule update + - cd external_libs/multi + - git checkout $CI_COMMIT_SHA # check that multi repo is mirrored correctly + - cd ../.. + - mkdir build && cd build + - cmake .. -G Ninja --install-prefix=$PREFIX -DCMAKE_BUILD_TYPE=Release + - cmake --build . || cmake --build . --parallel 1 + - cmake --install . + - export OMPI_ALLOW_RUN_AS_ROOT=1 + - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + - export OMPI_MCA_btl_vader_single_copy_mechanism=none + - export OMPI_MCA_rmaps_base_oversubscribe=1 + - ctest -j 2 --output-on-failure --timeout 600 + - INQ_EXEC_ENV="mpirun --oversubscribe -n 4" ctest --output-on-failure --timeout 600 needs: ["g++"] -#icpc-nomkl-runner: -# stage: test -# tags: -# - intel_compiler -# script: -# - export CXX="/opt/intel/oneapi/compiler/2021.2.0/linux/bin/intel64/icpc" -# - $CXX --version -# - mkdir build && cd build -# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -# - cmake --build . -j 12 -# - export MKL_VERBOSE=1 -# - ctest --output-on-failure - -#icpc-runner: # image: intel/oneapi-basekit -# stage: test -# tags: -# - intel_compiler -# script: -# - . /opt/intel/oneapi/setvars.sh -# - export CXX="icpc" -# - $CXX --version -# - mkdir build && cd build -# - cmake .. -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -# - cmake --build . -j 12 -# - export MKL_VERBOSE=1 -# - ctest --output-on-failure - -#nvcc-runner: -# stage: test -# tags: -# - intel_compiler -# script: -# - mkdir build && cd build -# - /usr/local/cuda/bin/nvcc --version -# - CUDACXX=/usr/local/cuda/bin/nvcc CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda cmake .. -DCMAKE_CXX_STANDARD=17 -DCMAKE_BUILD_TYPE=Release -# - cmake --build . -j 12 -# - ctest -j 1 --output-on-failure - -#nvcc-cuda-runner: -# stage: test -# tags: -# - intel_compiler -# script: -# - mkdir build && cd build -# - /usr/local/cuda/bin/nvcc --version -# - CUDACXX=/usr/local/cuda/bin/nvcc CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda cmake .. -DCMAKE_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=72 -DCMAKE_CUDA_FLAGS="-gencode arch=compute_72,code=sm_72" -# - cmake --build . -j 12 || make VERBOSE=1 -# - ctest -j 1 --output-on-failure - -qmcpack-g++: +inq cuda: + allow_failure: false stage: test + image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 + tags: + - non-shared + - nvidia-gpu + interruptible: true + before_script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y ca-certificates cmake git gfortran libopenmpi-dev libblas-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev pkg-config python3-dev script: - - export DEBIAN_FRONTEND=noninteractive + - nvidia-smi + # - export CUDA_VISIBLE_DEVICES=0,1 + - __nvcc_device_query + - export PREFIX=`mktemp -d` + - git clone --recurse-submodules https://gitlab.com/npneq/inq.git + - cd inq + - cd external_libs/multi + - git checkout $CI_COMMIT_SHA + - cd ../.. + - rm -f cmake/FindNCCL.cmake # disable NCCL workaround + - mkdir build && cd build + - /usr/local/cuda/bin/nvcc --version + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. --install-prefix=$PREFIX -DENABLE_CUDA=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=61 # =75 # =80 + - cmake --build . --parallel 4 || cmake --build . --parallel 1 + - cmake --install . + - export OMPI_ALLOW_RUN_AS_ROOT=1 + - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + - export OMPI_MCA_btl_vader_single_copy_mechanism=none + - export OMPI_MCA_rmaps_base_oversubscribe=1 + - ctest -j 2 --output-on-failure --timeout 2400 + - INQ_EXEC_ENV="mpirun --oversubscribe -n 4" ctest --output-on-failure --timeout 2400 + timeout: 2 hours 30 minutes + needs: ["cuda", "inq"] + +inq rocm: + stage: test + image: rocm/dev-ubuntu-22.04 + allow_failure: false + tags: + - non-shared + - large-disk-space + interruptible: true + script: + - apt-get -qq update + - apt-get -qq install --no-install-recommends -y ca-certificates cmake git gfortran gpg hipblas-dev hipfft-dev libopenmpi-dev libblas-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev make pkg-config python3-dev rocthrust-dev rocm-device-libs wget + - /opt/rocm/bin/hipconfig --full + - HIPCC_VERBOSE=1 /opt/rocm/bin/hipcc --version + - cmake --version + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - export PREFIX=`mktemp -d` + - git clone --recurse-submodules https://gitlab.com/npneq/inq.git + - cd inq + - cd external_libs/multi + - git checkout $CI_COMMIT_SHA + - cd ../.. + - mkdir build && cd build + - export PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} + - export CMAKE_PREFIX_PATH=/opt/rocm:${CMAKE_PREFIX_PATH} + - export ROCM_PATH=/opt/rocm + - export HIP_PATH=/opt/rocm + - cmake .. -DCMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ --install-prefix=$PREFIX -DENABLE_HIP=1 -DCMAKE_HIP_ARCHITECTURES=gfx90a + - cmake --build . --parallel 4 || cmake --build . --parallel 1 + - cmake --install . + - export OMPI_ALLOW_RUN_AS_ROOT=1 + - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 + - export OMPI_MCA_btl_vader_single_copy_mechanism=none + - export OMPI_MCA_rmaps_base_oversubscribe=1 + - INQ_EXEC_ENV="mpirun --oversubscribe -n 4" ctest --output-on-failure --timeout 2400 || echo "ctest failed, probably due to lack of hardware" + timeout: 2 hours 30 minutes + needs: ["rocm", "inq"] + +qmcpack: + stage: test + image: debian:latest + tags: + - non-shared + - docker + - high-bandwidth + interruptible: true + before_script: - apt-get -qq update && apt-get -qq install --no-install-recommends -y ca-certificates cmake g++ git gfortran libblas-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev make numdiff pkg-config python3 python3-h5py python3-numpy python3-mpi4py python3-scipy libxml2-dev - - git clone https://github.com/QMCPACK/qmcpack.git - - cp -r qmcpack qmcpack.noupdate + script: + - git clone --depth=1 https://github.com/QMCPACK/qmcpack.git # --branch fix_afqmc_pointer_traits - cd qmcpack - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" - - echo $CI_REPOSITORY_URL - - echo $CI_COMMIT_BRANCH - - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git - - cd ../qmcpack.noupdate - - cd build - - cd ../../qmcpack + - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_SHA || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git - cd build - cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_MIXED_PRECISION=1 -DCMAKE_BUILD_TYPE=Debug -DMPIEXEC_PREFLAGS="--allow-run-as-root;--bind-to;none" .. - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance @@ -726,284 +918,138 @@ qmcpack-g++: - ctest -R afqmc --output-on-failure needs: ["g++"] -#qmcpack-clang++: -# stage: test -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get -qq update && apt-get -qq install --no-install-recommends -y clang libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran pkg-config cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev libomp-dev -# - git clone https://github.com/QMCPACK/qmcpack.git -# - cp -r qmcpack qmcpack.noupdate -# - cd qmcpack -# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" -# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" -# - echo $CI_REPOSITORY_URL -# - echo $CI_COMMIT_BRANCH -# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git -# - cd ../qmcpack.noupdate -# - cd build -# - export OMPI_CC=clang -# - export OMPI_CXX=clang++ -# - cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_MIXED_PRECISION=1 -DCMAKE_BUILD_TYPE=Debug -DMPIEXEC_PREFLAGS="--allow-run-as-root;--bind-to;none" .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - ctest -R ppconvert --output-on-failure -# - ctest -R afqmc --output-on-failure -# - cd ../../qmcpack -# - cd build -# - OMPI_CC=clang OMPI_CXX=clang++ cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_MIXED_PRECISION=1 -DCMAKE_BUILD_TYPE=Debug -DMPIEXEC_PREFLAGS="--allow-run-as-root;--bind-to;none" .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - ctest -R ppconvert --output-on-failure -# - ctest -R afqmc --output-on-failure -# needs: ["clang++"] - -#qmcpack-cuda-11.6-compileonly: -# allow_failure: true -# stage: test -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y nvhpc-22-3 gfortran g++ git wget make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev libxml2-dev make numdiff pkg-config python3 python3-h5py python3-mpi4py python3-numpy python3-scipy -# - g++ --version -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc --version -# - wget https://cmake.org/files/v3.22/cmake-3.22.4-linux-x86_64.sh --no-verbose # following https://askubuntu.com/a/865294/15943 -# - mkdir /opt/cmake -# - sh cmake-3.22.4-linux-x86_64.sh --skip-license --prefix=/opt/cmake -# - ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake -# - cmake --version -# - git clone https://github.com/QMCPACK/qmcpack.git -# - cd qmcpack -# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" -# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" -# - echo $CI_REPOSITORY_URL -# - echo $CI_COMMIT_BRANCH -# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git -# - cd build -# - CUDACXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6 .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - ctest -R ppconvert --output-on-failure - -#qmcpack-cuda-11.6-gcc9-compileonly: -# allow_failure: true -# image: debian:stable-backports -# stage: test -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y nvhpc-22-3 gfortran g++ g++-9 git wget make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev libxml2-dev make numdiff pkg-config python3 python3-h5py python3-mpi4py python3-numpy python3-scipy -# - g++-9 --version -# - /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc --version -# - wget https://cmake.org/files/v3.22/cmake-3.22.4-linux-x86_64.sh --no-verbose # following https://askubuntu.com/a/865294/15943 -# - mkdir /opt/cmake -# - sh cmake-3.22.4-linux-x86_64.sh --skip-license --prefix=/opt/cmake -# - ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake -# - cmake --version -# - git clone https://github.com/QMCPACK/qmcpack.git -# - cd qmcpack -# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" -# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" -# - echo $CI_REPOSITORY_URL -# - echo $CI_COMMIT_BRANCH -# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git -# - cd build -# - CUDACXX=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-9 -DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6 .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - ctest -R ppconvert --output-on-failure - -#qmcpack-icpc-runner: -# stage: test -# tags: -# - intel_compiler -# script: -# - . /opt/intel/oneapi/setvars.sh -# - export CXX="icpc" -# - $CXX --version -# - cmake --version -# - git clone https://github.com/QMCPACK/qmcpack.git -# - cd qmcpack -# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" -# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" -# - echo $CI_REPOSITORY_URL -# - echo $CI_COMMIT_BRANCH -# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH || git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL # e.g. https://gitlab.com/correaa/boost-multi.git master -# - cd build -# - cmake -DCMAKE_C_COMPILER=mpiicc -DCMAKE_CXX_COMPILER=mpiicpc -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 .. -# - make -output-sync=recurse -j 16 ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - ctest -R ppconvert --output-on-failure - -qmcpack-cuda-runner: - allow_failure: false +# qmcpack-cuda: +# stage: test +# image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 +# tags: +# - nvidia-gpu +# before_script: +# - apt-get -qq update && apt-get -qq install --no-install-recommends -y ca-certificates cmake git libopenmpi-dev cmake g++ git gfortran libblas-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev make numdiff pkg-config python3 python3-h5py python3-numpy python3-mpi4py python3-scipy libxml2-dev +# script: +# - nvidia-smi +# - git clone --depth 1 https://github.com/QMCPACK/qmcpack.git +# # - git clone https://github.com/correaa/qmcpack.git --branch fix_afqmc_pointer_traits +# - cd qmcpack +# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" +# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" +# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_SHA # e.g. https://gitlab.com/correaa/boost-multi.git +# - cd build +# - nvcc --version +# - __nvcc_device_query +# - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" -DCMAKE_CUDA_ARCHITECTURES=80 +# - make -j 4 ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance +# - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R ppconvert --output-on-failure +# - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure +# needs: ["qmcpack","cuda"] + +qmcpack cuda-12.3.1: stage: test + image: nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 tags: - - intel_compiler + - non-shared + - nvidia-gpu + - high-bandwidth + interruptible: true + before_script: + - apt-get -qq update && apt-get -qq install --no-install-recommends -y ca-certificates cmake git libopenmpi-dev cmake g++ git gfortran libblas-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev make numdiff pkg-config python3 python3-h5py python3-numpy python3-mpi4py python3-scipy libxml2-dev script: - - cmake --version - - git clone https://github.com/QMCPACK/qmcpack.git - - cp -r qmcpack qmcpack.noupdate + - nvidia-smi + - git clone --depth 1 https://github.com/QMCPACK/qmcpack.git # --branch fix_afqmc_pointer_traits - cd qmcpack - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" - - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git - - cd ../qmcpack.noupdate - - cd build - - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-9 -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. - - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance - - ctest -R ppconvert --output-on-failure - - ctest -R afqmc --output-on-failure - - cd ../../qmcpack + - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_SHA # e.g. https://gitlab.com/correaa/boost-multi.git - cd build - - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-9 -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. - - make -j4 ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance - - ctest -R ppconvert --output-on-failure - - ctest -R afqmc --output-on-failure - needs: ["qmcpack-g++"] - -#qmcpack-cuda-11.8-compileonly: -# allow_failure: false -# image: debian:stable -# stage: test -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git gfortran gnupg libblas-dev libboost-serialization-dev libfftw3-dev libhdf5-dev liblapack-dev libopenmpi-dev libxml2-dev make numdiff pkg-config python3 python3-h5py python3-numpy python3-mpi4py python3-scipy software-properties-common -# - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/3bf863cc.pub -# - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/ /" -# - add-apt-repository contrib -# - apt-get update -# - apt-get -y install cuda -# - apt-get install --no-install-recommends -y cmake g++ g++-9 git gfortran make libfftw3-dev libblas-dev libboost-serialization-dev libboost-test-dev libboost-timer-dev libboost-filesystem-dev liblapack-dev libopenmpi-dev pkg-config wget -# - cmake --version -# - git clone https://github.com/QMCPACK/qmcpack.git -# - cp -r qmcpack qmcpack.noupdate -# - cd qmcpack -# - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" -# - git rm -r external_codes/boost_multi/multi && git commit -m "remove multi subtree" -# - git subtree add --squash -P external_codes/boost_multi/multi $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git -# - cd ../qmcpack.noupdate -# - cd build -# - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-9 -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# - cd ../../qmcpack -# - cd build -# - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-9 -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. -# - make ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance -# needs: ["cuda-11.8"] - -#inq-clang++-latest: -# stage: test -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates clang cmake gfortran git libblas-dev libboost-filesystem-dev libboost-serialization-dev libfftw3-dev liblapack-dev libopenmpi-dev make pkg-config -# - git clone --recurse-submodules https://gitlab.com/npneq/inq.git -# - cd inq -# - cd external_libs/multi -# - git fetch -# - git checkout $CI_COMMIT_BRANCH -# - cd ../.. -# - mkdir build && cd build -# - CXX=clang++ ../configure --prefix=$HOME --disable-debug -# - make -# - make install -# - ctest --output-on-failure -# needs: ["clang++"] - -#cuda-11.6: -# allow_failure: false -# stage: build -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates -# - echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list -# - apt-get update -y -# - apt-get install --no-install-recommends -y cmake nvhpc-22-3 wget pkg-config make libboost-test-dev libboost-serialization-dev libboost-timer-dev libblas-dev libfftw3-dev -# - cmake --version -# - apt-cache madison nvhpc -# - /opt/nvidia/hpc_sdk/Linux_x86_64/2022/cuda/bin/nvcc --version -# - mkdir build && cd build -# - cmake .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/22.3/cuda/11.6 -DCMAKE_CUDA_ARCHITECTURES=61 -DCMAKE_CUDA_FLAGS="-gencode arch=compute_61,code=sm_61" -# - cmake --build . || make VERBOSE=1 -# - ctest --output-on-failure - - -#inq-cuda-11.8-compileonly: -# allow_failure: false -# image: debian:stable -# stage: test -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates gnupg software-properties-common -# - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/3bf863cc.pub -# - add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/ /" -# - add-apt-repository contrib -# - apt-get update -# - apt-get -y install cuda -# - apt-get install --no-install-recommends -y cmake g++ g++-9 git gfortran make libfftw3-dev libblas-dev libboost-serialization-dev libboost-test-dev libboost-timer-dev libboost-filesystem-dev liblapack-dev libopenmpi-dev pkg-config wget -# - git clone --recurse-submodules https://gitlab.com/npneq/inq.git -# - cd inq -# - cd external_libs/multi -# - git fetch -# - git checkout $CI_COMMIT_BRANCH -# - cd ../.. -# - mkdir build && cd build -# - g++ --version -# - /usr/local/cuda-11.8/bin/nvcc --version -# - CUDACXX=/usr/local/cuda-11.8/bin/nvcc ../configure --prefix=$PREFIX --enable-cuda --with-cuda-prefix=/usr/local/cuda --pass-thru -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-11.8 -DCMAKE_CUDA_ARCHITECTURES=70 -# - make silicon -# needs: ["cuda-11.8"] - -#inq-cuda-11.4-compileonly: -# allow_failure: true -# image: nvidia/cuda:11.4.0-devel -# stage: test -# script: -# - export DEBIAN_FRONTEND=noninteractive -# - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev gfortran g++ pkg-config make git ca-certificates wget -# - wget https://cmake.org/files/v3.21/cmake-3.21.3-linux-x86_64.sh --no-verbose >/dev/null -# - mkdir /opt/cmake -# - sh cmake-3.21.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake -# - ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake -# - cmake --version -# - git clone --recurse-submodules https://gitlab.com/npneq/inq.git -# - cd inq -# - cd external_libs/multi -# - git fetch -# - git checkout $CI_COMMIT_BRANCH -# - cd ../.. -# - mkdir build && cd build -# - /usr/local/cuda/bin/nvcc -V -# - CUDACXX=/usr/local/cuda/bin/nvcc ../configure --prefix=$PREFIX --enable-cuda --with-cuda-prefix=/usr/local/cuda --pass-thru -DCMAKE_CUDA_ARCHITECTURES=70 -# - make silicon - -#inq-g++-latest: -# stage: test -# script: -# - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev gfortran g++ pkg-config cmake make git ca-certificates -# - git clone --recurse-submodules https://gitlab.com/npneq/inq.git -# - cd inq -# - cd external_libs/multi -# - git fetch -# - git checkout $CI_COMMIT_BRANCH -# - cd ../.. -# - mkdir build && cd build -# - ../configure --prefix=$HOME --disable-debug -# - make -# - make install -# - ctest --output-on-failure -# needs: ["g++"] - -inq-nvcc-ompi-runner: - stage: test + - nvcc --version + - __nvcc_device_query + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" -DCMAKE_CUDA_ARCHITECTURES=75 # =80 + - make -j 4 ppconvert afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R ppconvert --output-on-failure + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure + needs: ["qmcpack","cuda"] + +# sonar cloud +# from instructions +# another example here: https://github.com/sonarsource-cfamily-examples/linux-cmake-gitlab-ci-sc/blob/main/.gitlab-ci.yml + +# get-sonar-binaries: +# stage: .pre +# cache: +# policy: push +# key: "${CI_COMMIT_SHORT_SHA}" +# paths: +# - build-wrapper/ +# - sonar-scanner/ +# script: +# - apt-get -qq update && apt-get -qq install curl unzip +# # Download sonar-scanner +# - curl -sSLo ./sonar-scanner.zip 'https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-5.0.1.3006-linux.zip' +# - unzip -o sonar-scanner.zip +# - mv sonar-scanner-5.0.1.3006-linux sonar-scanner +# # Download build-wrapper +# - curl -sSLo ./build-wrapper-linux-x86.zip "${SONAR_HOST_URL}/static/cpp/build-wrapper-linux-x86.zip" +# - unzip -oj build-wrapper-linux-x86.zip -d ./build-wrapper +# only: +# - merge_requests +# - master +# - develop + +sonar: + stage: build + allow_failure: true + only: + refs: + - master tags: - - intel_compiler + - non-shared + - docker + # cache: + # policy: pull-push + # key: "${CI_COMMIT_SHORT_SHA}" + # paths: + # - build-wrapper/ + # - sonar-scanner/ + # - bw-output/ + interruptible: true script: - - export PREFIX=`mktemp -d` - - git clone --recurse-submodules https://gitlab.com/npneq/inq.git - - cd inq - - cd external_libs/multi - - git checkout $CI_COMMIT_BRANCH - - cd ../.. - - mkdir build && cd build - - CUDACXX=/usr/local/cuda/bin/nvcc ../configure --prefix=$PREFIX --enable-cuda --disable-debug --pass-thru -DCMAKE_CUDA_ARCHITECTURES=70 `#--with-cuda-prefix=/usr/local/cuda` - - make -j4 -output-sync=recurse - - make -j4 install - - ctest -j2 --output-on-failure --timeout 600 - - cd src; INQ_EXEC_ENV="mpirun --oversubscribe -np 4" ctest --output-on-failure --timeout 600; cd .. - - rm -rf $PREFIX - needs: ["cuda-11.8"] + # Run the build inside the build wrapper + - apt-get -qq update && apt-get -qq install --no-install-recommends -y --quiet ca-certificates cmake curl unzip g++ make libboost-test-dev libboost-timer-dev libboost-serialization-dev libblas-dev libfftw3-dev pkg-config gcovr lcov + - g++ --version + - mkdir build + # Download sonar-scanner + - curl -sSLo ./sonar-scanner.zip 'https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-5.0.1.3006-linux.zip' + - unzip -o sonar-scanner.zip + - mv sonar-scanner-5.0.1.3006-linux sonar-scanner + # Download build-wrapper + - curl -sSLo ./build-wrapper-linux-x86.zip "${SONAR_HOST_URL}/static/cpp/build-wrapper-linux-x86.zip" + - unzip -oj build-wrapper-linux-x86.zip -d ./build-wrapper + - cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="--coverage -O0 -fno-inline -fno-inline-small-functions -fno-default-inline" -DCMAKE_EXE_LINKER_FLAGS="--coverage" + - build-wrapper/build-wrapper-linux-x86-64 --out-dir bw-output cmake --build build/ --verbose + - cd build + - ctest -j 1 --output-on-failure -T Test + - ctest -j 1 --output-on-failure -T Coverage + - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} + - cd .. + - sonar-scanner/bin/sonar-scanner -Dsonar.host.url="${SONAR_HOST_URL}" -Dsonar.token="${SONAR_TOKEN}" -Dsonar.cfamily.build-wrapper-output=bw-output -Dsonar.cfamily.gcov.reportsPath="build/" + # only: + # - merge_requests + # - master + # - main + # - develop + # needs: ["g++"] + +# sonarcloud-check: +# stage: .post +# cache: +# policy: pull +# key: "${CI_COMMIT_SHORT_SHA}" +# paths: +# - build-wrapper/ +# - sonar-scanner/ +# - bw-output/ +# script: +# - sonar-scanner/bin/sonar-scanner -Dsonar.host.url="${SONAR_HOST_URL}" -Dsonar.token="${SONAR_TOKEN}" -Dsonar.cfamily.build-wrapper-output=bw-output +# only: +# - merge_requests +# - master +# - develop diff --git a/external_codes/boost_multi/multi/.readthedocs.yaml b/external_codes/boost_multi/multi/.readthedocs.yaml new file mode 100644 index 0000000000..71849fbf91 --- /dev/null +++ b/external_codes/boost_multi/multi/.readthedocs.yaml @@ -0,0 +1,32 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "19" + # rust: "1.64" + # golang: "1.19" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/external_codes/boost_multi/multi/CMakeLists.txt b/external_codes/boost_multi/multi/CMakeLists.txt index 5fafbae195..10ada484f6 100644 --- a/external_codes/boost_multi/multi/CMakeLists.txt +++ b/external_codes/boost_multi/multi/CMakeLists.txt @@ -1,96 +1,207 @@ -# -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -# Copyright 2018-2022 Alfredo A. Correa - -cmake_minimum_required(VERSION 3.13) -# for reference: Ubuntu 20.04 ships cmake 3.16.3, Ubuntu 21.10 -> 3.18.4, Fedora 31 -> 3.17.4, (Gitlab CI) debian:stable-backports ships cmake 3.13.4, debian:testing -> 3.18.4 - -# to install this project: cmake .. -DCMAKE_INSTALL_PREFIX:PATH=$HOME cmake --build . --config Release --target test --target install -- -j $(nproc) -# to use this project do: project("Your project") find_package(boost-multi CONFIG REQUIRED) add_executable(${PROJECT_NAME} src/your_main.cpp) target_link_libraries(${PROJECT_NAME} boost-multi::boost-multi) - -message("CMake version: ${CMAKE_VERSION}") -message("current build directory: ${CMAKE_CURRENT_BINARY_DIR}") - -project( - multi - VERSION 0.79.0 - DESCRIPTION "A header only C++ library that provides multidimensional array access to contiguous or regularly contiguous memory (or ranges)." - HOMEPAGE_URL "https://gitlab.com/correaa/boost-multi" - LANGUAGES CXX -) - -include(GNUInstallDirs) - -add_library(${PROJECT_NAME} INTERFACE) - -target_include_directories(${PROJECT_NAME} INTERFACE $ $) - -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -# target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17) -# $<$,$>: -# https://youtrack.jetbrains.com/issue/CPP-25608 -target_compile_features(${PROJECT_NAME} INTERFACE $<$>:cxx_std_17>) -target_compile_options (${PROJECT_NAME} INTERFACE $<$: -std=c++17>) - -target_compile_options ( - ${PROJECT_NAME} - INTERFACE - $<$: - -Xcompiler=-Werror,-Wall,-Wextra,-Wcast-align,-Wcast-qual,-Wno-double-promotion,-Wduplicated-branches,-Wduplicated-cond,-Wformat-truncation,-Wformat=2,-Wlogical-op,-Wmisleading-indentation,-Wno-missing-include-dirs,-Wnon-virtual-dtor,-Wno-missing-declarations,-Wnon-virtual-dtor,-Wnull-dereference,-Woverloaded-virtual,-Wpointer-arith,-Wno-redundant-decls,-Wno-shadow,-Wno-switch-enum,-Wno-unknown-pragmas,-Wtrampolines,-Wuninitialized,-Wunused,-Wunused-but-set-variable,-Wunused-result,-Wno-zero-as-null-pointer-constant - --expt-relaxed-constexpr --extended-lambda --Werror=cross-execution-space-call -Xcudafe=--display_error_number -Xcudafe=--diag_error=incompatible_assignment_operands -Xcudafe=--diag_error=returning_ptr_to_local_variable -Xcudafe=--diag_error=subscript_out_of_range -Xcudafe=--diag_error=used_before_set -Xcudafe=--diag_error=undefined_preproc_id -Xcudafe=--diag_error=implicit_func_decl -Xcudafe=--diag_error=implicit_return_from_non_void_function -Xcudafe=--diag_error=missing_type_specifier - > - $<$,$>: # EDG diagnostics list: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg - --display_error_number --diag_error=incompatible_assignment_operands,returning_ptr_to_local_variable,subscript_out_of_range,used_before_set,undefined_preproc_id,implicit_func_decl,implicit_return_from_non_void_function,missing_type_specifier - > -) - -install( - TARGETS ${PROJECT_NAME} - EXPORT ${PROJECT_NAME}_Targets - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) - -include(CMakePackageConfigHelpers) -write_basic_package_version_file( - "${PROJECT_NAME}ConfigVersion.cmake" - VERSION ${PROJECT_VERSION} - COMPATIBILITY SameMajorVersion -) - -install( - EXPORT ${PROJECT_NAME}_Targets - FILE ${PROJECT_NAME}Targets.cmake - NAMESPACE boost::multi:: - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake -) - -if (NOT MSVC) - install(FILES ${PROJECT_SOURCE_DIR}/include/multi/array_ref.hpp ${PROJECT_SOURCE_DIR}/include/multi/array.hpp DESTINATION include/multi) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/multi/detail DESTINATION include/multi) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/multi/adaptors DESTINATION include/multi) -endif() +# Copyright 2018-2024 Alfredo A. Correa +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +cmake_minimum_required(VERSION 3.16) + +# Library doesn't require installation, to still install this project: +# ~~~ +# $ cmake .. --install-prefix=$HOME && cmake --build . --config Release --target test --target install -- -j $(nproc) +# ~~~ +# to use this library in another CMake project +# ~~~ +# project("Your project") +# find_package(boost-multi CONFIG REQUIRED) +# add_executable(${PROJECT_NAME} src/your_main.cpp) +# target_link_libraries(${PROJECT_NAME} boost-multi::boost-multi) +# ~~~ + +if (DEFINED BOOST_SUPERPROJECT_VERSION AND NOT DEFINED BOOST_MULTI_STANDALONE) + + project(boost_multi VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) + + add_library(boost_multi INTERFACE) -#find_program(MEMORYCHECK_COMMAND valgrind) -#set(VALGRIND_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=51 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") -#set(MEMORYCHECK_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=52 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") # must go before `include(CTest)` -#set(MEMORYCHECK_SUPPRESSIONS_FILE "${PROJECT_SOURCE_DIR}/.valgrind-suppressions") # must go before `include(CTest)` + add_library(Boost::multi ALIAS boost_multi) -include(CTest) + target_include_directories(boost_multi INTERFACE include) -find_package(Boost COMPONENTS unit_test_framework) + target_compile_features(boost_multi INTERFACE cxx_std_14) -if(Boost_FOUND) - enable_testing() - add_subdirectory(test) + message(STATUS "Boost.Multi: standalone mode OFF") + + if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") + + add_subdirectory(test) + + endif() - add_subdirectory(include/multi/adaptors/blas ) - add_subdirectory(include/multi/adaptors/fftw ) - add_subdirectory(include/multi/adaptors/cuda ) - add_subdirectory(include/multi/adaptors/thrust) else() - message (WARNING "Cannot find Boost, library will not be tested. If you want this feature install Boost.Test, for example please run:\n sudo apt install libboost-test-dev") -endif() + project( + multi + HOMEPAGE_URL "https://gitlab.com/correaa/boost-multi" + DESCRIPTION "A header-only modern C++ library that provides access and manipulation of data in multidimensional arrays." + VERSION 0.80.1 + LANGUAGES CXX) + + message(STATUS "Boost.Multi: standalone mode ON") + + message("current binary directory: ${CMAKE_CURRENT_BINARY_DIR}") + + include_directories(${PROJECT_SOURCE_DIR}/include) # workaround for vscode to detect headers https://stackoverflow.com/a/68139743/225186 + + include(CMakePackageConfigHelpers) + include(CMakeDependentOption) + include(GNUInstallDirs) + + find_program(MEMORYCHECK_COMMAND valgrind) + set(VALGRIND_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=51 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") + + # must go before `include(CTest)` + set(MEMORYCHECK_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=52 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") + set(MEMORYCHECK_SUPPRESSIONS_FILE "${PROJECT_SOURCE_DIR}/.valgrind-suppressions") + + include(CTest) + + option(MULTI_BUILD_PACKAGE "Build package files as well" ON) + + cmake_dependent_option(MULTI_BUILD_TESTS "Enable multi tests" ON "BUILD_TESTING" OFF) + + cmake_dependent_option(MULTI_BUILD_PACKAGE_DEB "Create a DEB" ON "MULTI_BUILD_PACKAGE" OFF) + + add_library(multi INTERFACE) + #target_compile_features(multi PUBLIC cxx_std_17) + + target_include_directories(multi INTERFACE $ $) + target_compile_options(multi INTERFACE $<$: --expt-relaxed-constexpr --extended-lambda>) + + if(NOT CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + add_library(correaa::multi ALIAS multi) + endif() + + # https://youtrack.jetbrains.com/issue/CPP-25608 + target_compile_features(${PROJECT_NAME} INTERFACE $<$>:cxx_std_17>) + target_compile_options(${PROJECT_NAME} INTERFACE $<$:-std=c++17>) + + # this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/ + if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + return() + endif() + + # Installation help + configure_package_config_file("${PROJECT_SOURCE_DIR}/cmake/multi-config.cmake.in" "${PROJECT_BINARY_DIR}/multi-config.cmake" INSTALL_DESTINATION "share/cmake/multi") + + write_basic_package_version_file("${PROJECT_BINARY_DIR}/multi-config-version.cmake" COMPATIBILITY SameMajorVersion ARCH_INDEPENDENT) + + message("current install prefix directory: ${CMAKE_INSTALL_PREFIX}") + + install( + TARGETS multi + EXPORT multi-targets + INCLUDES + DESTINATION "${CMAKE_INSTALL_DATADIR}") + + install( + EXPORT ${PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" + NAMESPACE multi:: + FILE "${PROJECT_NAME}-targets.cmake") + + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" DESTINATION "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}") + + install(DIRECTORY "include/" TYPE INCLUDE) + + add_subdirectory(test) + + # if(MULTI_BUILD_TESTS) + find_package(Boost 1.65 COMPONENTS unit_test_framework) # 1.65 needed for BOOST_TEST_GLOBAL_FIXTURE, you can use your own Boost and use `cmake -DBOOST_ROOT=$HOME/local` + if(NOT Boost_FOUND) + message(WARNING "Cannot find Boost 1.65+, Multi library will not be fully tested.") + else() + enable_testing() + + add_subdirectory(include/boost/multi/adaptors/blas) + # add_subdirectory(include/boost/multi/adaptors/complex) + add_subdirectory(include/boost/multi/adaptors/cuda) + add_subdirectory(include/boost/multi/adaptors/fftw) + + find_package(LAPACK) + if(LAPACK_FOUND) + add_subdirectory(include/boost/multi/adaptors/lapack) + endif() + + add_subdirectory(include/boost/multi/adaptors/thrust) + if(ENABLE_CUDA) + add_subdirectory(include/boost/multi/adaptors/cufft) + endif() + if(ENABLE_HIP) + add_subdirectory(include/boost/multi/adaptors/hipfft) + add_subdirectory(include/boost/multi/adaptors/hipthrust/test) + endif() + endif() + # endif() + + if(MULTI_BUILD_PACKAGE) + list(APPEND source-generators TBZ2 TGZ TXZ ZIP) + + if(CMAKE_HOST_WIN32) + list(APPEND binary-generators "WIX") + endif() + + if(MULTI_BUILD_PACKAGE_DEB) + list(APPEND binary-generators "DEB") + endif() + + if(MULTI_BUILD_RPM) + list(APPEND binary-generators "RPM") + endif() + + set(CPACK_PACKAGE_NAME ${PROJECT_NAME} + CACHE STRING "The resulting package name" + ) + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "all") + set(CPACK_PACKAGE_ARCHITECTURE "all") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Multidimensional arrays for C++" + CACHE STRING "Package description for the package metadata" + ) + + set(CPACK_PACKAGE_VENDOR "alfredo.correa@gmail.com") + # set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CPACK_PACKAGE_NAME}) + # SET(CPACK_OUTPUT_FILE_PREFIX "${CMAKE_SOURCE_DIR}/_packages") + # set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/some")#/${CMAKE_PROJECT_VERSION}") + + # set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) + # set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) + # set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) + + set(CPACK_PACKAGE_CONTACT "alfredo.correa@gmail.com") + set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Alfredo A. Correa") + + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") + set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + + set(CPACK_DEBIAN_FILE_NAME "multi_all.deb") + # set(CPACK_COMPONENTS_GROUPING ALL_COMPONENTS_IN_ONE)#ONE_PER_GROUP) + # set(CPACK_DEB_COMPONENT_INSTALL YES) + + set(CPACK_SOURCE_GENERATOR ${source-generators}) + set(CPACK_GENERATOR ${binary-generators}) + + # set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}") + # set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}") + + list( + APPEND + CPACK_SOURCE_IGNORE_FILES + /.git/ + /.build*/ + /build/ + .gitignore + .DS_Store) + + include(CPack) + endif() + +endif() diff --git a/external_codes/boost_multi/multi/CPPLINT.cfg b/external_codes/boost_multi/multi/CPPLINT.cfg new file mode 100644 index 0000000000..7bf7ea8b9c --- /dev/null +++ b/external_codes/boost_multi/multi/CPPLINT.cfg @@ -0,0 +1,17 @@ +set noparent +root=./include/ + +linelength=240 + +filter=-build/include_order + +filter=-readability/alt_tokens +filter=-readability/nolint + +filter=-runtime/references + +filter=-whitespace/braces +filter=-whitespace/operators +filter=-whitespace/parens +filter=-whitespace/semicolon +filter=-whitespace/tab diff --git a/external_codes/boost_multi/multi/LICENSE b/external_codes/boost_multi/multi/LICENSE new file mode 100644 index 0000000000..36b7cd93cd --- /dev/null +++ b/external_codes/boost_multi/multi/LICENSE @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/external_codes/boost_multi/multi/README.md b/external_codes/boost_multi/multi/README.md index 2bc571e7ae..dc2f5032d7 100644 --- a/external_codes/boost_multi/multi/README.md +++ b/external_codes/boost_multi/multi/README.md @@ -3,147 +3,397 @@ --> # [Boost.]Multi -(not an official Boost library) +> **Disclosure: This is not an official or accepted Boost library and is unrelated to the std::mdspan proposal.** -_© Alfredo A. Correa, 2018-2022_ +_© Alfredo A. Correa, 2018-2024_ -`Multi` provides multidimensional array access to contiguous or regularly contiguous memory in modern C++. +_Multi_ is a modern C++ library that provides access and manipulation of data in multidimensional arrays, for both CPU and GPU memory. -This library aims to offer manipulation of array data in arbritrary dimension with well behaved value semantics, offering total compatibility with the Standard (STL) Algorithms, special memory (e.g. GPU), and following modern C++ design principles. -It requires at least C++17. (It is C++20 ready.) +Multidimensional array data structures are fundamental to several branches of computing, such as data analysis, image processing, and scientific simulations, and in combination with GPUs to Artificial Intelligence and Machine Learning. +This library offers array containers and subarrays in arbitrary dimensions with well-behaved value semantics, +featuring logical access recursively across dimensions and to elements through indices and iterators. -Some features: +The data structure is stride-based, which makes it compatible with low-level C-libraries. -* Value semantics of multi-dimensional array container -* Well defined referential semantics of subarray (view) types -* Interoperability with other libraries, STL, ranges, thrust, Boost, and C-libraries. -* Fast access of elements and subarrays (views) types +The library interface is designed to be compatible with standard algorithms and ranges (STL) and special memory (including GPUs) and follows modern C++ design principles. + +The library's primary concern is with the storage and logic structure of data; +it doesn't make algebraic or geometric assumptions about the arrays and their elements. +In this sense, it is instead a building block to implement algorithms to represent mathematical operations, specifically on numeric data. +Although most of the examples use numeric elements for conciseness, the library is designed to hold general types (e.g. non-numeric, non-trivial types, like `std::string`, other containers or, in general, user-defined value-types.) + +Some features of this library: + +* Value semantics of multidimensional array containers +* Well-defined referential semantics of subarray (also called "view") types +* Interoperability with other libraries, STL, ranges, thrust (CUDA and AMD GPUs), Boost, and C-libraries +* Fast access to elements and subarrays (views) types * Arbitrary pointer types (fancy pointers, memory spaces) -* Simplified implementation (~4000 lines) -(Do not confuse this library with Boost.MultiArray or Boost.MultiIndex. -It shares the goals of [Boost.MultiArray](https://www.boost.org/doc/libs/1_69_0/libs/multi_array/doc/index.html), -although the code is completely independent and with important semantic differences.) +Do not confuse this library with [Boost.MultiArray](https://www.boost.org/doc/libs/1_69_0/libs/multi_array/doc/index.html) +or with the standard MDSpan proposal `std::mdspan`. +This library shares some of their goals and is compatible with them, but it is otherwise designed at a different level of generality. +The code is entirely independent and has fundamental implementation and semantics differences. + +It requires, at least, C++17. ## Contents [[_TOC_]] ## Using the library, installation and tests -`Multi` doesn't require instalation, single file `#include` is enough to use the full core library. -`Multi`'s _only_ dependecy is the standard C++ library. - -It is important to compile programs that use the library with some level of optimization, specially if element-access is intensively used. -For example, when testing speed, please make sure that you are compiling in release mode (`-DNDEBUG`) and with optimizations (`-O3`). +You can try the library [online](https://godbolt.org/z/dvacqK8jE) before using it. -Testing the library requires CMake, a build system (e.g. make), and the Boost.Test library. +_Multi_ doesn't require installation; a single header `#include ` is enough to use the entire core library. +_Multi_ has no dependencies (except for the standard C++ library) and can be used immediately after downloading. ```bash -sudo apt install cmake libboost-test-dev make +git clone https://gitlab.com/correaa/boost-multi.git ``` -A CMake build system is provided to automatically run basic tests. (Test do depend on the Boost.Test library.) +Although installation is not necessary, the library can still be installed with CMake. +The header (and cmake) files will typically end up in `/usr/local/include/multi` and `/usr/local/share/multi`. ```bash -git clone https://gitlab.com/correaa/boost-multi.git -cd multi +cd boost-multi mkdir -p build && cd build -cmake .. +cmake .. # --install-prefix=$HOME/.local +cmake --install . # or sudo ... +``` + +_Testing_ the library requires Boost.Test library, installed for example via `sudo apt install cmake git g++ libboost-test-dev make` or `sudo dnf install boost-devel cmake gcc-c++ git`. +A CMake build system is provided to compile and run basic tests. + +```bash cmake --build . ctest ``` -### Dependecies and compiler requirements +Once installed, other CMake projects (targets) can depend on Multi by adding a simple `add_subdirectory(my_multi_path)` or by `find_package`: + +```cmake +find_package(multi) # see https://gitlab.com/correaa/boost-multi#using-the-library-installation-and-tests +``` + +Alternatively to `find_package` the library can be fetched on demand: +```cmake +include(FetchContent) +FetchContent_Declare(multi GIT_REPOSITORY https://gitlab.com/correaa/boost-multi.git) +FetchContent_MakeAvailable(multi) +... +target_link_libraries(my_target PUBLIC multi) +``` + +The code requires compilers with standard C++17 support; for reference any of: +LLVM's `clang` [(5.0+)](https://godbolt.org/z/51E1hjfnn) (`libc++` and `libstdc++`), +GNU's `g++` [(7.1+)](https://godbolt.org/z/1nGEbKc5a), +Nvidia's [`nvcc`](https://godbolt.org/z/abdT73PqM) (11.4+) and `nvc++` (22.7+), +Intel's `icpc` (2021.2.0+) and `icpx` (2022.0.0+), +Baxter's [`circle`](https://www.circle-lang.org/) (build 187+), +[Zig](https://zig.news/kristoff/compile-a-c-c-project-with-zig-368j) in [c++ mode (v0.9.0+)](https://godbolt.org/z/cKGebsWMG), and +Microsoft's [MSVC](https://visualstudio.microsoft.com/vs/features/cplusplus/) ([+14.2](https://godbolt.org/z/vrfh1fxWK)). + +(Multi code inside CUDA kernel can be compiled with `nvcc` and with [`clang` (in CUDA mode)](https://godbolt.org/z/7dTKdPTxc). +Inside HIP code, it can be compiled with AMD's clang rocm (5.0+).) + +Optional "adaptor" sublibraries (included in `multi/adaptors/`) have specific dependencies: fftw, blas, lapack, thurst, or CUDA +(all can be installed with `sudo apt install libfftw3-dev libblas64-dev liblapack64-dev libthrust-dev nvidia-cuda-dev` or `sudo dnf install blas-devel fftw-devel`.) + +## Reference of types + +The library interface presents several closely related C++ types (classes) representing arrays. +The most important types represent multidimensional containers (called `array`), references that can refer to subsets of these containers (called `subarray`), and iterators. +In addition, there are other classes for advanced uses, such as multidimensional views of existing buffers (called `array_ref`) and non-resizable owning containers (called `static_array`). + +When using the library, it is simpler to start from `array`, and other types are rarely explicitly used, especially if using `auto`; +however, it is convenient for documentation to present the classes in a different order since the classes `subarray`, `array_ref`, `static_array`, and `array` have a *is-a* relationship (from left to right). +For example, `array_ref` has all the methods available to `subarray`, and `array` has all the operations of `array_ref`. + +### class `multi::subarray` + +An instance of this class represents a part (or a whole) of another `subarray` (including an `array`). +These have reference semantics, and in essence, they behave like language-references. +As references, they cannot be rebinded or resized; assignments are always "deep". +They are characterized by a size that does not change. +They are usually the result of indexing over other `subarray`s and `array`s (generally of higher dimensions); therefore, the library doesn't expose constructors for this class. +The whole object can be invalidated if the original array is destroyed. + +| Member types | | +|--- |--- | +| `value_type` | `multi::array` or, for `D == 1`, `iterator_traits

::value_type` (usually `T`) +| `size_type` | `multi::size_t` (usually signed size) +| `difference_type` | `multi::diffptr_t` (usually signed size) +| `reference` | `multi::subarray` or, for `D == 1`, `pointer_traits

::reference` (usually `T&`) +| `const_reference` | `multi::const_subarray` or, for `D == 1`, `pointer_traits

::rebind::reference` (usually `T const&`) +| `pointer` | `multi::subarray_ptr or, for `D == 1, `P` (usually `T*`) +| `const_pointer` | `multi::const_subarray_ptr` or, for `D == 1, `pointer_traits

::rebind` (usually `T const*`) +| `iterator` | `multi::array_iterator_t` +| `const_iterator` | `multi::const_array_iterator_t` + +| Member fuctions | | +|--- |--- | +| (constructors) | Not exposed; copy constructor is not available since the instances are not copyable; destructors are trivial since it doesn't own the elements. | +| `operator=` | assigns the elements from the source, sizes must match. + +It is important to note that assignments in this library are always "deep," and reference-like types cannot be rebound after construction. +(Reference-like types have corresponding pointer-like types that provide an extra level of indirection and can be rebound (just like language pointers); +these types are `multi::array_ptr` and `multi::subarray_ptr` corresponding to `multi::array_ref` and `multi::subarray` respectively.) + +| Relational fuctions | | +|--- |--- | +| `operator==`/`operator!=` | Tells if elements of two `subarray` are equal (and if extensions of the subarrays are the same) +| `operator<`/`operator<=` | Less-than lexicographical comparison (requires elements to be comparable) +| `operator>`/`operator>=` | Less-than lexicographical comparison (requires elements to be comparable) + +It is important to note that, in this library, comparisons are always "deep". + +| Element access | | +|--- |--- | +|`operator[]` | access specified element by index, returns a `reference` (see above), for `D > 1` it can be used recursively | +|`front` | access first element (undefined result if array is empty). +|`back` | access last element (undefined result if array is empty). +|`operator()` | When used with zero arguments, it returns a `subarray` representing the whole array. When used with one argument, access a specified element by index (return a `reference`) or by range (return a `subarray` of equal dimension). For more than one, arguments are positional and reproduce expected array access syntax from Fortran or Matlab: | + +- `subarray::operator()(i, j, k, ...)`, as in `S(i, j, k)` for indices `i`, `j`, `k` is a synonym for `A`[i][j][k]`, the number of indices can be lower than the total dimension (e.g., `S` can be 4D). +Each index argument lowers the dimension by one. +- `subarray::operator()(ii, jj, kk)`, the arguments can be indices or ranges. +This function allows positional-aware ranges. Each index argument lowers the rank by one. +A special range is given by `multi::_`, which means "the whole range" (also spelled `multi::all`). +For example, if `S` is 3D, `S(3, {2, 8}, {3, 5})` gives a reference to a 2D array where the first index is fixed at 3, with sizes `6` by `2` referring the subblock in the second and third dimension. Note that `S(3, {2, 8}, {3, 5})` is not equivalent to `S[3]({2, 8})({3, 5})`. +- `operator()()` (no arguments) gives the same array but always as a subarray (for consistency), `S()` is equivalent to `S(S.extension())` and, in turn to`S(multi::_)` or `S(multi::all)`. + +| Structure access | (Generally used for interfacing with C-libraries) | +|--- |--- | +| `base` | direct access to underlying memory pointer (`S[i][j]... == S.base() + std::get<0>(S.strides())*i + std::get<1>(S.strides())*j + ...`) +| `stride` | return the stride value of the leading dimension, e.g `(&A[1][0][0]... - &A[0][0]...)` +| `strides` | returns a tuple with the strides defining the internal layout +| `layout` | returns a single layout object with stride and size information | + +| Iterators | | +|--- |--- | +| `begin/cbegin` | returns (const) iterator to the beginning +| `end/cend` | returns (const) iterator to the end + +| Capacity | | +|--- |--- | +| `sizes` | returns a tuple with the sizes in each dimension +| `extensions` | returns a tuple with the extensions in each dimension +| `size` | returns the number of subarrays contained in the first dimension | +| `extension` | returns a contiguous range describing the set of valid indices +| `num_elements` | returns the total number of elements + +| Creating views | (this operations do not copy elements or allocate) | +|--- |--- | +| `broadcasted` | returns an infinite view of the array of higher dimensions obtained by repeating elements. This returns a special kind of subarray with a degenerate layout and no size operation +| `dropped` | returns a subarray with the first n-elements (in the first dimension) dropped from the original subarray. This doesn't remove or destroy elements or resize the original array +| `element_transformed` | creates a view of the array, where each element is transformed according to a function | +| `elements` | a flatted view of all the elements rearranged in a canonical way. `A.elements()[0] -> A[0][0]`, `A.elements()[1] -> A[0][1]`, etc. The type of the result is not a subarray but a special kind of range. +| `rotated/unrotated` | a view (`subarray`) of the original array with indices (un)rotated from right to left (left to right), for `D = 1` returns the same `subarray`. For given `i`, `j`, `k`, `A[i][j][k]` gives the same element as `A.rotated()[j][k][i]` and, in turn the same as `A.unrotated()[k][i][j])`. Preserves dimension. The function is cyclic; `D` applications will give the original view. | +| `transposed` (same as `operator~`) | a view (`subarray`) of the original array with the first two indices exchanged, only available for `D > 1`; for `D = 2`, `rotated`, `unrotated` and `transposed` give same view | +| `sliced` | returns a subarray with elements from index `a`to index `b` (non-inclusive) `{S[a], ... S[b-1]}`. Preserves the dimension. +| `strided` | returns a subarray skipping `s` elements. Preserves the dimension. + +| Creating views by pointer manipulation | | +|--- |--- | +| `static_cast_array(args...)` | produces a view where the underlying pointer constructed by `P2{A.base(), args...}`. Usually, `args...` is empty. Non-empty arguments are useful for stateful fancy pointers, such as transformer iterators. +| `reinterpret_cast_array` | underlying elements are reinterpreted as type T2, element sizes (`sizeof`) have to be equal; `reinterpret_cast_array(n)` produces a view where the underlying elements are interpreted as an array of `n` elements of type `T2`. + +| Creating arrays | | +|--- |--- | +| `decay` (same as prefix `operator+`) | creates a concrete independent `array` with the same dimension and elements as the view. Usually used to force a copy of the elements or in combination with `auto` (e.g., `auto A2_copy = + A[2];`). + +A reference `subarray` can be invalidated when its origin array is invalidated or destroyed. +For example, if the `array` from which it originates is destroyed or resized. + +### class `multi::array_ref` + +A D-dimensional view of the contiguous pre-existing memory buffer. +This class doesn't manage the elements it contains, and it has reference semantics (it can't be rebound, assignments are deep, and have the same size restrictions as `subarray`) + +Since `array_ref` is-a `subarray`, it inherits all the class methods and types described before, in addition it defines these members below. + +| Member types | same as for `subarray` | +|--- |--- | + +| Member functions | same as for `subarray` plus ... | +|--- |--- | +| (constructors) | `array_ref::array_ref({e1, e2, ...}, p)` constructs a D-dimensional view of the contiguous range starting at p and ending at least after the size size of the multidimensional array (product of sizes). The default constructor and copy constructor are not exposed. Destructor is trivial since elements are not owned or managed. | + +| Element access | same as for `subarray` | +|--- |--- | + +| Structure access | same as for `subarray` | +|--- |--- | + +| Iterators | same as for `subarray` | +|--- |--- | + +| Capacity | same as for `subarray` | +|--- |--- | + +| Creating views | same as for `subarray` | +|--- |--- | + +| Creating arrays | same as for `subarray` | +|--- |--- | + +| Relational functions | same as for `subarray` | +|--- |--- | + +An `array_ref` can be invalidated if the original buffer is deallocated. + +### class `multi::static_array, ...>` + +A D-dimensional array that manages an internal memory buffer. +This class owns the elements it contains; it has restricted value semantics because assignments are restricted to sources with equal sizes. +Memory is requested by an allocator of type Alloc (standard allocator by default). +It supports stateful and polymorphic allocators, which are the default for the special type `multi::pmr::static_array`. + +The main feature of this class is that its iterators, subarrays, and pointers do not get invalidated unless the whole object is destroyed. +In this sense, it is semantically similar to a C-array, except that elements are allocated from the heap. +It can be useful for scoped uses of arrays and multi-threaded programming and to ensure that assignments do not incur allocations. +The C++ coreguiles proposed a similar (albeith one-dimensional) class, called [`gsl::dyn_array`](http://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#gslowner-ownership-pointers). + +For most uses, a `multi::array` should be preferred instead. + +| Member types | same as for `array_ref` | +|--- |--- | + +| Member fuctions | same as for `array_ref` plus ... | +|--- |--- | +| (constructors) | `static_array::static_array({e1, e2, ...}, T val = {}, Alloc = {})` constructs a D-dimensional array by allocating elements. `static_array::static_array(std::initializer_list<...>` constructs the array with elements initialized from a nested list. +| (destructor) | Destructor deallocates memory and destroy the elements | +| `operator=` | assigns the elements from the source, sizes must match. + +| Element access | same as for `array_ref` | +|--- |--- | + +| Structure access | same as for `array_ref` | +|--- |--- | + +| Iterators | same as for `array_ref` | +|--- |--- | + +| Capacity | same as for `array_ref` | +|--- |--- | + +| Creating views | same as for `array_ref` | +|--- |--- | + +| Creating arrays | same as for `array_ref` | +|--- |--- | + +| Relational fuctions | same as for `array_ref` | +|--- |--- | + +### class `multi::array, ...>` + +An array of integer positive dimension D has value semantics if element type T has value semantics. +It supports stateful and polymorphic allocators, the default for the special type `multi::pmr::static_array`. + +| Member types | same as for `static_array` | +|--- |--- | + +| Member fuctions | | +|--- |--- | +| (constructors) | `array::array({e1, e2, ...}, T val = {}, Alloc = {})` constructs a D-dimensional array by allocating elements;`array::array(It first, It last)` and `array::array(Range const& rng)`, same for a range of subarrays. `static_array::static_array(std::initializer_list<...>, Alloc = {})` constructs the array with elements initialized from a nested list. +| (destructor) | Destructor deallocates memory and destroy the elements | +| `operator=` | assigns for a source `subarray`, or from another `array`. `array`s can be moved | + +| Element access | same as for `static_array` | +|--- |--- | + +| Structure access | same as for `static_array` | +|--- |--- | -The core of the library doesn't have dependencies (other than the standard library). +| Iterators | same as for `static_array` | +|--- |--- | -Compiling and running the tests depends on Boost.Test -(which can be installed with `sudo apt install libboost-test-dev` in Debian-like systems.) +| Capacity | same as for `static_array` | +|--- |--- | -"Adaptor" sublibraries (included in `multi/adaptors/`) have specific dependencies, Boost.Serialization, fftw, blas, lapack, thurst, CUDA -(which can be installed with `sudo apt install libboost-serialization-dev libfftw3-dev libblas64-dev liblapack64-dev libthrust-dev libcudart11.0` or indiviudually.) +| Creating views | same as for `static_array` | +|--- |--- | -The code is developed for several compilers with standard C++17 support, for reference: -LLVM's `clang` (5.0+) (`libc++` and `libstdc++`), -GNU's `g++` (7.1+), -Nvidia's `nvcc` (11.3+) and `nvc++` (20.7-21.3+), -Intel's `icpc` (2021.2.0+) and `icpx` (2022.0.0+), -Baxter's [`circle`](https://www.circle-lang.org/) (build 168+), and Microsoft's [MSVC](https://visualstudio.microsoft.com/vs/features/cplusplus/) (+19.14 in [conformant mode](https://godbolt.org/z/vrfh1fxWK)). +| Creating arrays | same as for `static_array` | +|--- |--- | -## Types +| Relational fuctions | same as for `static_array` | +|--- |--- | -* `multi::array`: -Array of integer dimension `D`, it has value semantics if element type `T` has value semantics. -Memory is requested by allocator of type `A`, supports stateful and polymorphic allocators. -* `multi::array_ref`: -Array interpretation of a random access range, usually a contiguous memory block. -It has reference semantics. -Thanks to (non-virtual) inheritance an `array` is-a `array_ref`. -* Other derived "unspecified types" fulfill a `MultiArrayView` concept, for example by taking partial indices or rotations (transpositions). -These reference types cannot be stored except through life-time extensions `auto&&` or `auto const&`, -and they can decay to value types. -* `MultiArrayView::(const_)iterator`: -Iterator to subarrays of dimension `D - 1`. For `D == 1` this is an iterator to an element. This types are generated by `begin` and `end` functions. -* `MultiArrayView::(const_)reference`: -Reference to subarrays of dimension `D - 1`. For `D > 1` this are not true C++-references but types emulate them (with reference semantics). -For `D == 1` this is a true language reference to an element type (`T&`). These types are generated by dereferencing iterators, e.g. `*begin(MA)`. +| Manipulation | | +|--- |--- | +| `clear` | Erases all elements from the container. The array is resized to zero size. | +| `reextent` | Changes the size of the array to new extensions. `reextent({e1, e2, ...})` elements are preserved when possible. New elements are initialized with `reextent({e1, e2, ...}, val)`. + +### class `multi::subarray::(const_)iterator` + +A random-access iterator to subarrays of dimension `D - 1`, generaly used to interact with or implement algorithms. +They can be default constructed but do not expose other constructors since they are generally created from `begin` or `end`, manipulated arithmetically, `operator--`, `operator++` (pre and postfix), or random jumps `operator+`/`operator-` and `operator+=`/`operator-=`. +They can be dereferenced by `operator*` and index access `operator[]`, returning objects of lower dimension `subarray::reference` (see above). +Note that this is the same type for all related arrays, for example, `multi::array::(const_)iterator`. + +`iterator` can be invalidated when its original array is invalidated, destroyed or resized. +An `iterator` that stems from `static_array` becomes invalid only if the original array was destroyed or out-of-scope. ## Basic Usage -The following code declares an array by specifying the element type and the dimension; -indiviudual elements can be initialized from a nested rectangular list. +The following code declares an array by specifying the element type and the dimensions; +individual elements can be initialized from a nested rectangular list. ```cpp multi::array A = { - {1., 2., 3.} - {4., 5., 6.} + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, }; -assert( A.size() == 2 ); -assert( A.num_elements() == 6 ); +auto const [n, m] = A.sizes(); -assert( std::get<0>(A.sizes()) == 2 ); -assert( std::get<1>(A.sizes()) == 3 ); +assert( n == 2 ); // or std::get<0>(A.sizes()) == 2 +assert( m == 3 ); // or std::get<1>(A.sizes()) == 3 + +assert( A.size() == 2 ); // size in first dimension, same as std::get<0>(A.sizes()) +assert( A.num_elements() == 6 ); // total number of elements ``` The value of an array can be copied, (moved,) and compared; -copies are equal but independent (value semantics). +copies are equal but independent (disjoint). ```cpp std::array B = A; -assert( extensions(B) == extensions(A) ); -assert( B == A ); -assert( B[0][1] == A[0][1] ); -assert( &B[0][1] != &A[0][1] ); +assert( B == A ); // copies are equal +assert( extensions(B) == extensions(A) ); // extensions (sizes) are equal +assert( B[0][1] == A[0][1] ); // all elements are equal +assert( &B[0][1] != &A[0][1] ); // elements are independent (dfferent addresses) ``` -Individual elements can be accessed by the multidimensional indices, either with square bracket (one index at a time) or with parenthesis. +Individual elements can be accessed by the multidimensional indices, either with square brackets (one index at a time, as above) or with parenthesis (comma separated). -``` -assert( A(1, 2) == A[1][2] ); +```cpp +assert( &A(1, 2) == &A[1][2] ); ``` -Arrays can be initialized from its sizes alone, in which case the element values are default constructed: +An array can be initialized from its sizes alone, in which case the element values are defaulted (possibly uninitialized): ```cpp multi::array C({3, 4, 5}); -assert( num_elements(C) == 3*4*5 ); // 60 elements +assert( num_elements(C) == 3*4*5 ); // 60 elements with unspecified values ``` Arrays can be passed by value or by reference. -Most of the time, arguments should be passed through generic parameters to allow functions to work with parts (subblocks, slices, etc.) of an array. -Usually, the most useful functions are those that work on the concept of array rather than on a concrete type. +Most of the time, arguments should be passed through generic parameters to also allow functions work with parts (subblocks, slices, etc.) of an array. +Usually, the most useful functions work on the _concept_ of an array rather than on a concrete type, for example: ```cpp -template // instead of the over specific argument std::array +template // instead of the overspecific argument std::array auto element_1_1(ArrayDouble2D const& m) -> double const& {return m[1][1];} ... -assert( element_1_1(A) == A[1][1] ); +assert( &element_1_1(A) == &A[1][1] ); ``` -The function obviously expect any kind of array or subarray of dimension 2 and element type `double`. +The function expects any array or subarray of dimension 2 and return an element with type `double`. -The generic function template arguments that are not intended to be modified are passed by `const&`; otherwise pass by forward-reference `&&`. -In this way the functions can be called on subblocks of larger matrices. +The generic function template arguments that are not intended to be modified are passed by `const&`; otherwise, they are passed by forward-reference `&&`. +In this way, the functions can be applied on subblocks of larger matrices. ```cpp assert( &element_1_1(C3D[0]) == &C3D[0][1][1] ); @@ -154,33 +404,31 @@ assert( &element_1_1(C3D[0]) == &C3D[0][1][1] ); We can create a static C-array of `double`s, and refer to it via a bidimensional array `multi::array_ref`. ```cpp -#include "../array.hpp" +#include "multi/array.hpp" #include // for sort #include // for print namespace multi = boost::multi; -using std::cout; int main() { double d_data[20] = { - 150., 16., 17., 18., 19., - 30., 1., 2., 3., 4., - 100., 11., 12., 13., 14., - 50., 6., 7., 8., 9. + 150.0, 16.0, 17.0, 18.0, 19.0, + 30.0, 1.0, 2.0, 3.0, 4.0, + 100.0, 11.0, 12.0, 13.0, 14.0, + 50.0, 6.0, 7.0, 8.0, 9.0 }; // block of 20 elements ... - multi::array_ref d2D_ref{&d_data[0], {4, 5}}; // interpreted as a 4 by 5 array + multi::array_ref d2D_ref{&d_data[0], {4, 5}}; // .. interpreted as a 4 by 5 array ... ``` -Note that the syntax of creating a reference array involves passing the pointer to a memory block (20 elements here) and the logical dimensions of that memory block (4 by 5 here). - -Next we print the elements in a way that corresponds to the logical arrangement: +Next, we print the elements in a way that corresponds to the logical arrangement: ```cpp ... - auto [ix, jx] = d2D_ref.extensions(); + auto [is, js] = d2D_ref.extensions(); for(auto i : is) { + using std::cout; for(auto j : js) { cout<< d2D_ref[i][j] <<' '; } @@ -191,15 +439,17 @@ Next we print the elements in a way that corresponds to the logical arrangement: This will output: -> ```cpp -> 150 16 17 18 19 -> 30 1 2 3 4 -> 100 11 12 13 14 +> ``` +> 150 16 17 18 19 +> 30 1 2 3 4 +> 100 11 12 13 14 > 50 6 7 8 9 > ``` -It is sometimes said (by Sean Parent) that the whole of STL algorithms can be seen as intermediate pieces to implement `std::stable_sort`. -Pressumably if one can sort over a range, one can perform any other standard algorithm. +The arrays provide iterator-based access, which allows it to interface with algorithms and implement new ones. + +It is sometimes said (by Sean Parent) that the whole of STL algorithms can be seen as intermediate pieces to implement `std::stable_sort`. +Presumably, if one can sort over a range, one can perform any other standard algorithm. ```cpp ... @@ -207,23 +457,22 @@ Pressumably if one can sort over a range, one can perform any other standard alg ... ``` -If we print this we will get +If we print the result, we will get: -> ```cpp -> 30 1 2 3 4 -> 50 6 7 8 9 -> 100 11 12 13 14 +> ``` +> 30 1 2 3 4 +> 50 6 7 8 9 +> 100 11 12 13 14 > 150 16 17 18 19 > ``` - The array has been changed to be in row-based lexicographical order. Since the sorted array is a reference to the original data, the original C-array has changed. -(Note that `std::*sort` cannot be applied directly to a multidimensional C-array or to Boost.MultiArray types. -The arrays implemented by this library are, to the best of my knowledge, the only ones that support STL algorithms directly.) +(Note that `std::sort` cannot be applied directly to a multidimensional C-array or to other libraries, such as Boost.MultiArray. +The arrays implemented by this library are, to the best of my knowledge, the only ones that support all STL algorithms directly.) -If we want to order the matrix in a per-column basis we need to "view" the matrix as range of columns. +If we want to order the matrix in a per-column basis, we need to "view" the matrix as range of columns. This is done in the bidimensional case, by accessing the matrix as a range of columns: ```cpp @@ -232,16 +481,18 @@ This is done in the bidimensional case, by accessing the matrix as a range of co } ``` -Which will transform the (original) matrix into: +The `rotate` operation rotates indices, providing a new logical view of the original array without modifying it. + +In this case, the original array will be transformed by sorting the matrix into: -> ```cpp +> ``` > 1 2 3 4 30 > 6 7 8 9 50 > 11 12 13 14 100 > 16 17 18 19 150 > ``` -In other words, a matrix of dimension `D` can be viewed simultaneously as `D` different ranges of different "transpositions" (rotation/permutation of indices.) +By combining index rotations and transpositions, an array of dimension `D` can be viewed simultaneously as `D!` (D-factorial) different ranges of different "transpositions" (rotation/permutation of indices.) ## Initialization @@ -254,186 +505,192 @@ multi::array_ref B({2, 6}, dp); ... delete[] dp; ``` -Array references do not own memory and can not be resized or "reseated" to refer to a different location. + +Array references do not own memory and, just as language references, can not be rebinded (resized or "reseated") to refer to a different memory location. Since `array_ref` is an array reference, it can "dangle" if the the original memory is deallocated. -Array objects own the elements it contains and can be resized; +Array objects (`multi::array`), in constrast, own the elements it contains and can be resized; `array` is initialized by specifying the index extensions (and optionally a default value). ```cpp -multi::array A1({3} , 11.); // {11., 11., 11.} +multi::array A1({3} , 11.0); // {11.0, 11.0, 11.0} -multi::array A2({2, 3} , 22.); // { {22., 22., 22.}, {22., 22., 22.} } +multi::array A2({2, 3} , 22.0); // { {22.0, 22.0, 22.}, {22.0, 22.0, 22.0} } -multi::array A3({3, 2, 2}, 33.); // { { { 33., ...}, { ... }, ... } } +multi::array A3({3, 2, 2}, 33.0); // { { { 33., ...}, { ... }, ... } } ``` ... or alternatively from a rectangular list. ```cpp -multi::array A1 = {1., 2., 3.}; +multi::array A1 = {1.0, 2.0, 3.0}; assert( num_elements(A1)==3 ); multi::array A2 { - { 1., 2., 3.}, - { 4., 5., 6.} + { 1.0, 2.0, 3.0}, + { 4.0, 5.0, 6.0} }; assert( num_elements(A2) == 2*3); multi::array const A3 = { - {{ 1.2, 0.}, { 2.4, 1.}}, - {{11.2, 3.}, {34.4, 4.}}, - {{15.2, 99.}, {32.4, 2.}} + {{ 1.2, 0.0}, { 2.4, 1.0}}, + {{11.2, 3.0}, {34.4, 4.0}}, + {{15.2, 99.0}, {32.4, 2.0}} }; -assert( num_elements(A3) == 3*2*2 ); +assert( A3.num_elements() == 3 * 2 * 2 ); ``` -In all cases constness (`const` declaration) is honored in the expected way. +In all cases, constness (`const` declaration) is honored in the expected way. ## Copy and assigment The library offer value semantics for the `multi::array` family of classes. -Constructing or assignment from an existing array generates a copy of the original object, that is, and object that is independent but equal in value. +Constructing or assigning from an existing array generates a copy of the original object, that is, and object that is independent but equal in value. ```cpp -auto B2 = A2; // same as multi::array B3 = A3; -assert( B2 == A2 ); // copies have the same value (and also the same shape) -assert( B2.base() != A2.base() ); // but they are independent +auto B2 = A2; // same as multi::array B2 = A2; (A2 is defined above) + +assert( B2 == A2 ); // copies have the same value (and also the same shape) +assert( B2[0][0] == A2[0][0] ) +assert( &B2[0][0] != &A2[0][0] ); // but they are independent ``` -Any (mutable) array can be assigned at any moment, independent of the previous state or shape of the variable. +A (mutable) array can be assigned at any moment, independently of the previous state or shape (extensions). The dimensionalities must match. ```cpp -B2 = A2; // same as multi::array B3 = A3; +B2 = A2; // both have dimensionality 2 ``` -(The operation can fail there is no enough memory to hold a copy.) - -Sometimes it is necessary to generate copies from views, the dimensionality must match. +Sometimes it is necessary to generate copies from views or subblocks. ```cpp multi::array C2 = A2( {0, 2}, {0, 2} ); ``` -or equivalently: +or equivalently,: ```cpp auto C2 = + A2( {0, 2}, {0, 2} ); ``` -Note the use of `+` as indicator that a copy must be created (it has no arithmetic implications), otherwise `C3` will still be a non-indepdent view of the original array. +Note the use of the prefix `+` as an indicator that a copy must be created (it has no arithmetic implications). +Due to limitations of the language, omiting the `+` will create effectively another reference non-indepdent view of the left-hand-side, which is generally undesired. -Subviews can also assigned but only if the shape of the left-hand side (LHS) and right-hand side (RHS) match. -Otherwise the behavior is undefined (in debug mode the program will fail an `assert`). +Subviews can also assigned, but only if the shape of the left-hand side (LHS) and right-hand side (RHS) match. +Otherwise, the behavior is undefined (in debug mode, the program will fail with an `assert`). ```cpp -C2( {0, 2}, {0, 2} ) = A2( {0, 2}, {0, 2} ); // both are 2x2 views of arrays, *elements* are copies +C2( {0, 2}, {0, 2} ) = A2( {0, 2}, {0, 2} ); // both are 2x2 views of arrays, *elements* are copied ``` -Introducing the same or overlapping elements in the RHS and LHS produces undefined behavior in general (and the library doesn't check); -for example this instruction does not transpose the array, but produces an undefined result. +Using the same or overlapping arrays in the RHS and LHS of assignment produces undefined behavior in general (and the library doesn't check). +Notably, this instruction does not transpose the array but produces an undefined result: ```cpp -A2 = A2.transposed(); +A2 = A2.transposed(); ``` -... while this does produce a transposition (at the cost of making a copy of the tranposed array first and assigning it back to the original array). +While this below instead does produce a transposition, at the cost of making one copy (implied by `+`) of the transposed array first and assigning (or moving) it back to the original array. ```cpp A2 = + A2.transposed(); ``` +In-place transposition is an active subject of research; +_optimal_ speed and memory transpositions might require specially designed libraries. + Finally, arrays can be efficiently moved by transferring ownership of the internal data. ```cpp auto B2 = std::move(A2); // A2 is empty after this ``` -Subarrays do not own the data therefore they cannot be moved in the same sense. -However, indivial elements of a view can be moved, this is particularly useful if the elements are expensive to copy. -A moved subview is simply another kind view of the elements. +Subarrays do not own the data; therefore they cannot be moved in the same sense. +However, individual elements of a view can be moved; this is particularly useful if the elements are expensive to copy. +A "moved" subview is simply another kind of view of the elements. ```cpp -multi::array, 2> A({10, 10}); +multi::array, 2> A({10, 10}, std::vector(1000)); multi::array, 2> B({10, 10}); ... -B[1] = A[2].moved(); // 10 *elements* of the third row of A is moved into the second row of B. +B[1] = A[2].element_moved(); // 10 *elements* of the third row of A is moved into the second row of B. A[2] still has 10 (moved-from) empty vectors ``` -## Changing extents (sizes) +## Change sizes (extents) -Arrays can change their size preserving elements with `reextents`. +Arrays can change their size while _preserving elements_ with the `rextent` method. ```cpp multi::array A { - {1., 2., 3.}, - {4., 5., 6.} + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} }; -A.reextents({4, 4}); +A.rextent({4, 4}); -assert( A[0][0] = 1. ); +assert( A[0][0] == 1.0 ); ``` -Subarrays, or views cannot change their size. `A[1].reextents({4})`. -The main utility of `reextents` is element preservation. -If element preservation is not desired, simple (move) assignment from a new array expresses the intention better as it is more efficient. +Arrays can be emptied (to zero-size) with `.clear()` (equivalent to `.rextent({0, 0, ...})`). + +The main purpose of `rextent` is element preservation. +Allocations are not amortized; +except for trivial cases, all calls to reextend allocate and deallocate memory. +If element preservation is not desired, a simple assignment (move) from a new array expresses the intention better and it is more efficient since it doesn't need to copy preexisiting elements. ```cpp -A = multi::array({4, 4}); +A = multi::array({4, 4}); // like A.rextent({4, 4}) but elements are not preserved. ``` +An alternative syntax, `.rextent({...}, value)` sets _new_ (not preexisting) elements to a specific value. + +Subarrays or views cannot change their size or be emptied (e.g. `A[1].rextent({4})` or `A[1].clear()` will not compile). +For the same reason, subarrays cannot be assigned from an array or another subarray of a different size. + +Changing the size of arrays by `rextent`, `clear`, or assignment generally invalidates existing iterators and ranges/views. + ## Iteration -Accessing arrays by iterators (`begin`/`end`) enables the use of many iterator based algorithms (see the sort example above). +Accessing arrays by iterators (`begin`/`end`) enables the use of many iterator-based algorithms (see the sort example above). `begin(A)/end(A)` (or equivalently `A.begin()/A.end()`) gives iterators that are linear and random access in the leading dimension. -`cbegin/cend` give constant (read-only access). - Other non-leading dimensions can be obtained by "rotating" indices first. - `A.rotated().begin()/.end()` gives access to a range of subarrays in second dimension number (first dimension is put at the end). -For example in a three dimensional array, - -```cpp - (cbegin(A)+1)->operator[](1).begin()[0] = 342.4; // error, read-only - ( begin(A)+1)->operator[](1).begin()[0] = 342.4; // assigns to A[1][1][0] - assert( ( begin(A)+1)->operator[](1).begin()[0] == 342.4 ); -``` +`cbegin/cend` give constant (read-only) access. As an example, this function allows printing arrays of arbitrary dimension into a linear comma-separated form. ```cpp -void print(double const& d){cout< -void print(MultiArray const& ma) { // note the recursion in the template function `print` - cout<<"{"; +void flat_print(double const& d) { cout< +void flat_print(Array const& ma) { + cout << "{"; if(not ma.empty()) { - print(*cbegin(ma)); - std::for_each(cbegin(ma)+1, cend(ma), [](auto&& e) {cout<<","; print(e);}); + flat_print(*cbegin(ma)); // first element + std::for_each(cbegin(ma)+1, cend(ma), [](auto&& e) { cout<<", "; flat_print(e);}); // rest } - cout<<"}"; + cout << "}"; } ... print(A); ``` > ``` -> {{{1.2,1.1},{2.4,1}},{{11.2,3},{34.4,4}},{{15.2,99},{32.4,2}}} +> {{{1.2, 1.1}, {2.4, 1}}, {{11.2, 3}, {34.4, 4}}, {{15.2, 99}, {32.4, 2}}} > ``` - -Except for those corresponding to the one-dimensional case, derreferencing iterators generally produce "proxy"-references (i.e. objects that behave in a large degree like language references). +Except for those corresponding to the one-dimensional case, dereferencing iterators generally produce "proxy"-references (i.e. objects that behave in a large degree like language references). These references can be given a name; using `auto` can be misleading since the resulting variable does not have value semantics. ```cpp auto row = *begin(A); // accepted by the language but misleading, row is not an independent value ``` -In my experience, however, the following usage patter a more consistent idiom for generating references (still without copying elements): +In my experience, however, the following usage pattern produces a more consistent idiom for generating references (still without copying elements): ```cpp auto&& row0 = * begin(A); // same as decltype(A):: reference row0 = * begin(A); auto const& crow0 = *cbegin(A); // same as decltype(A)::const_reference crow0 = *cbegin(A); -auto&& row0 = A [1]; // same as decltype(A):: reference row0 = A [1]; +auto&& row1 = A [1]; // same as decltype(A):: reference row1 = A [1]; auto const& crow1 = std::as_const(A)[1]; // same as decltype(A)::const_reference crow0 = std::as_const(A)[1]; ``` @@ -444,6 +701,26 @@ decltype(A)::value_type row = *begin(A); // there is a real copy of the row auto row = + *begin(A); // there is another copy, note the use of '+' (unary plus) ``` +### "Pointer" to subarray + +The library strongly relies on value-sematics, and it doesn't entertain the concept of "shallow" copy; however, it supports refenece- and pointer-sematics. + +Subarrays (e.g., rows in a 2D array) are reference-like objects with a concrete address-like value that identifies them uniquely. +These addresses, which behave like pointers, can be helpful to "mark" subviews; these markers can be copied and stored in arrays. + +```cpp +auto A = multi::array({4, 4}); + +auto row2_ptr = &A[2]; // A[2] is a row of A (not an element) +assert( row2_ptr == &*(A.begin() + 2) ); +``` + +The expression `A[2]` above is technically a C++ temporary object, and therefore it doesn't have a C++ address (taking `std::addressof` gives a compilation error). +However, in the library's abstraction, `A[2]` references an existing part of the original array, i.e. it is a "library reference", whose "library address" can be obtained operator `&`. +The case is an illustration that, in the library, operator `&` is, for subarrays, different than the `std::addressof` operator; the latter may not be defined and even not compile for some expressions. + +Comparing these markers/pointers with different provenance, i.e., originating from different arrays, is generally undefined. + ## Indexing Arrays provide random access to elements or subviews. @@ -451,16 +728,16 @@ Many algorithms on arrays are oriented to linear algebra, which are ubiquitously implemented in terms of multidimensional index access. Iterator access and index access are two alternatives for accessing elements. -For example `*(begin(A) + n)` and `A[n]` are semantically equivalent -and the range defined by the pair `begin(A), end(A)` is `A(extension(A))` (even for multidimensional `A`). -The syntax can be combined in arbitrary ways, for example `*begin(A[n])` is equivalent to `A[n][0]` (if the dimensionality of `A` is equal or greater than two). +For example `*(begin(A) + n)` and `A[n]` are equivalent +and the range defined by the pair `begin(A), end(A)` is equivalent to `A(extension(A))` and, in turn, to `A()` (even for a multidimensional array, `D > 1`). +The syntax can be combined in arbitrary ways, for example `*begin(A[n])` is equivalent to `A[n][0]`. ### Element access and partial access Index access mimics that of C-fixed sizes arrays. For example, a 2-dimensional array will access to an element by specifying two indices `A[1][2]`, which can be used for direct write and read operations; -while _partial_ index arguments `A[1][2]` generate a view 1-dimensional object (reference). +while _partial_ index arguments `A[1]` generate a view 1-dimensional object (a reference). ```cpp A // is a 2D value array @@ -468,8 +745,8 @@ A[0] // is a 1D "reference"/"view" array A[0][0] // is a an element reference, zero-D ``` -Transpositions are also multi-dimensional arrays views in which the index are *logically* rearranged, for example `rotated(m)[2][3][1] == m[1][2][3]`. -(`rotate` refers to the fact that the logical indices are _rotated_ to the left.) +Transpositions are also multidimensional arrays _views_ in which the index are *logically* rearranged, for example `rotated(m)[2][3][1] == m[1][2][3]`. +(`rotated`/`unrotated` refers to the fact that the logical _indices_ are rotated to the left/right.) As an illustration of an algorithm based on index access (as opposed to iterators), this example code implements Gauss Jordan Elimination without pivoting: @@ -501,8 +778,8 @@ auto gj_solve(Matrix&& A, Vector&& y) -> decltype(y[0]/=A[0][0], y) { This function can be applied to a `multi::array` container: ```cpp -multi::array A = {{-3., 2., -4.},{0., 1., 2.},{2., 4., 5.}}; -multi::array y = {12.,5.,2.}; //(M); assert(y.size() == M); iota(y.begin(), y.end(), 3.1); +multi::array A = {{-3.0, 2.0, -4.0},{0.0, 1.0, 2.0},{2.0, 4.0, 5.0}}; +multi::array y = {12.0, 5.0, 2.0}; // (M); assert(y.size() == M); iota(y.begin(), y.end(), 3.1); gj_solve(A, y); ``` @@ -516,16 +793,17 @@ gj_solve(A({1000, 4000}, {0, 3000}), y); ### Slices and strides -Given an array, a slice in the first dimension can be taken with the `sliced` function. `sliced` takes two arguments, the first index of the slice and the last index (not included) of the slice. For example, +Given an array, a slice in the first dimension can be taken with the `sliced` function. +`sliced` takes two arguments, the first index of the slice and the last index (not included) of the slice. For example, ```cpp multi::array A({4, 5}); // A is a value -assert( std::get<0>(A) == 2 ); -assert( std::get<1>(A) == 5 ); +assert( std::get<0>(A.sizes()) == 4 ); +assert( std::get<1>(A.sizes()) == 5 ); auto&& A_sliced = A.sliced(1, 3); // {{d2D[1], d2D[2]}} -assert( std::get<0>(A_sliced) == 2 ); -assert( std::get<1>(A_sliced) == 5 ); +assert( std::get<0>(A_sliced.sizes()) == 2 ); +assert( std::get<1>(A_sliced.sizes()) == 5 ); ``` The number of rows in the sliced matrix is 2 because we took only two rows, row 1 and row 2 (row 3 is excluded). @@ -543,22 +821,22 @@ Operations can be combined in a single line: ```cpp auto&& d2D_slicedstrided = d2D.sliced(1, 3).strided(2); // {{ d2D[1] }}; -assert( d2D_slicedstrided.size(0) == 1 and d2D_slicedstrided.size(1) == 5 ); +assert( std::get<0>(d2D_slicedstrided.sizes()) == 1 and std::get<1>(d2D_slicedstrided.sizes()) == 5 ); ``` For convenience, `A.sliced(a, b, c)` is the same as `A.sliced(a, b).strided(c)`. -By combining `rotated`, `sliced` and `strided` one can take sub arrays at any dimension. +By combining `rotated`, `sliced` and `strided` one can take sub arrays at any dimension index. For example in a two dimensional array one can take a subset of columns by defining. ```cpp -auto&& subA = A.rotated().strided(1, 3).sliced(2).unrotated(); +auto&& subA = A.rotated().sliced(1, 3).strided(2).unrotated(); ``` -Other notations are available, but when in doubt, the `rotated/strided/sliced/rotated` and combinations of them provides the most control over the subview operations. -(At the moment the `strided` argument has to divide the total size of the slice (or matrix), otherwise the behavior is undefined.) +Other notations are available, for example this is equivalent to `A(multi::_ , {1, 3, /*every*/2})` or `~(~A)({1, 3, 2})`. +The `rotated/strided/sliced/rotated` and combinations of them provides the most control over the subview operations. -Blocks (slices) in multidimensions can be obtained but pure index notation using parentheses `()` (`.operator()`): +Blocks (slices) in multidimensions can be obtained by pure index notation using parentheses `()` (`.operator()`): ```cpp auto A = multi::array({6, 7}); // 2D value array @@ -566,12 +844,11 @@ auto A = multi::array({6, 7}); // 2D value array auto&& A_block1 = A({1, 4}, {2, 4}); // 2D subarray reference (modifiable) auto const& A_block2 = A({1, 4}, {2, 4}); // 2D subarray reference (non-modifiable) -auto A_block3 = A({1, 4}, {2, 4}); // disabled +auto A_block3 = A({1, 4}, {2, 4}); // works but it can be confusing, use `auto&&` instead ``` -Note that the last case gives a compilation error, the library prevents the use of this references as if they are were values. -Some times copies are necessary, specifically from a subarray block, this can be done by constructing a new array. -The value array can be deduces by using `auto` and the `decay` member, which in turn is equivalent to the unary `+` operator. +Sometimes copies are necessary, specifically from a subarray block, this can be done by constructing a new array. +The value array can be deduced by using `auto` and the `decay` member, which in turn is equivalent to the prefix `+` operator. ```cpp multi::array block_value_1 = A({1, 4}, {2, 4}) ; @@ -579,22 +856,74 @@ auto block_value_2 = A({1, 4}, {2, 4}).decay(); auto block_value_3 = + A({1, 4}, {2, 4}) ; ``` +Any parenthesis argument can be either a range (with or without stride) or an index. +Range argument can be substituted by `multi::all` to obtain the whole range. + +## Conversions + +Conversion between arrays of distinct types is possible if the underlying elements allow it. +The result is as if elements are converted one by one; array sizes (extensions) are preserved. +Allowed conversions can be implicit or explicit and reflect the behavior of the element types. + +```cpp +// implicit conversions from real to complex is allowed ... +double d = 5.0; std::complex z = d; +// ... therefore it is also allowed from array of reals to arrays of complex +multi::array D({10, 10}); multi::array, 2> Z = D; +// (implicit or explicit) conversions from real to complex are disallowed (compilation error) +// multi::array D = Z; // or D{Z}; +``` + +Another case is illustrated by `std::complex` and `std::complex`; +in one direction, the conversion can be implicit, while in the other, it can only be explicit. +This behavior is reflected in the corresponding arrays: +```cpp +multi::array> C; +multi::array> Z = C; // implicit conversion ok +multi::array> C2{Z}; // explicit conversion is allowed +// multi::array> C3 = Z; // implicit conversion is disallowed (compilation error) +``` + +Implicit conversions are generally considered harmful, but inconsistent conversions are worst; therefore, the library allows them when appropriate. +The main drawback of implicit conversions in this context is that they might incur unexpected (e.g. costly) data conversions when passing arguments to functions. + +```cpp +void fun(multi::array> Z) { ... }; +... +multi::array D({10, 10}); +fun(D); // real elements are converted to complex silently here +``` +In many instances, specially in generic code, it might still be a desirable behavoir. + +To prevent implicit conversions, use element types with no implicit conversions when possible. + +Finally, arrays of unrelated element types are prevented from producing direct conversions, resulting in compilation errors. +Element-wise transformations can be used instead. +For example, to convert an array of integers to an array of text strings: + +```cpp + multi::array const A = {{1, 2}, {3, 4}}; + + auto to_string = [](int e) {return std::to_string(e);}; + multi::array B = A.element_transformed(to_string); + assert( B[1][1] == "4" ); +``` + ## Const-correctness -The library goes to great lenghts to ensure const-correctness. -Const-correctness refers to the property of an object, or parts of it, of not accepting mutation. -It is not only important to avoid bugs and typos but also to ensure compatibility with thread safety. +Const-correctness refers to the property of a program to disallow mutation of certain objects when it is undesired or logically incorrect. +Honoring the const-ness declaration is fundamental not only to avoid bugs and typos but also for thread safety and generic programming. +The library goes to great lengths to ensure const-correctness for the whole or parts of any object. -An array can be declared to be constant using the keyword `const`. -A reference array (`array_ref`) is never resizable (or reassignable) but their elements are mutable unless the reference is declared with `const`. +Arrays are resizable, and their elements can be mutated unless declared constant (using the keyword `const`). -The design ensures that constness of references and values is propagated to subsets (views) and to elements. -Any subarray (views) will propagate the constness of the original array. +A reference array or subarray is never resizable, but its elements are mutable if not declared `const`. +The design ensures that the const-ness of references and values propagates to subarrays (views) and, ultimately, their elements. ```cpp template void print(Array1D const& coll) { -// *coll.begin() = 99; // doesn't compile "assignment of read-only location" +// *coll.begin() = 99; // doesn't compile, "assignment of read-only location" for(auto const& e : coll) {std::cout<< e <<", ";} std::cout << std::endl; @@ -608,16 +937,15 @@ int main() { } ``` -As a general rule, in functions take generic array arguments that are _not_ going to be mutated as `Array const&` (in the context of `template`). -If mutation is expected take them as `Array&&` (note the double ampersand, i.e. universal/forwarding reference). -Views can be *named* into "constant language references" by using `auto const&`. -If mutation is desired use `auto&&`. -Normal references `Array&` or `auto&` in general doesn't have the expected behavior for views, +As a general rule for passing generic arrays as arguments, pass them as `Array const&` (in the context of `template`); +unless mutation is expected, in which case take arguments as `Array&&` (note the double ampersand, i.e., universal/forwarding reference). +Analogously, subarrays can be locally *named* into "constant language references" using `auto const&` and, if mutation is desired, `auto&&` should be used. +Regular references `Array&` or `auto&` in general do not have the expected behavior for views. ```cpp template void fill_99(Array1D&& coll) { - for(auto& e : coll) {e = 99;} + for(auto& e : coll) { e = 99; } } int main() { @@ -638,34 +966,185 @@ int main() { } ``` +## Compile-time evaluation (constexpr-all-the-things) + +With certain limitations imposed by the language, arrays can be declared in contexts with compile-time evaluation. + +```cpp +constexpr auto trace() { + multi::array arr = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + arr[2][2] = 10; + return std::accumulate(arr.diagonal().begin(), arr.diagonal().end()); +} + +static_assert( trace() == 4 + 2 + 10 ); +``` +https://godbolt.org/z/Porre3z8s + +## Broadcast (infinite views) + +Broadcasting is a technique by which arrays are reinterpreted as having a higher dimension by repeating elements. +The technique allows the reuse of operations designed for high dimensionality and effectively apply them to arrays of lower dimensionality. +The result is generally an economy in the number of distinct operations that need to be provided in exchange for understanding how and where to exploit the broadcast operations. + +Broadcasting is popular in array-based languages, such as Julia and NumPy, and the broadcast is generally applied automatically to match the dimension expected by the operation and other operation inputs. +The library provides a basic form of broadcasting with certain limitations. + +Here is an example of an algorithm designed for two 2D arrays to obtain the row-by-row inner product. + +```cpp +auto row_by_row_dot = [](auto const& A2D, auto const& B2D, auto& results) { + std::transform(A2D.begin(), A2D.end(), B2D.begin(), results.begin(), + [](auto const& Arow, auto const& Brow) {return std::inner_product(Arow.begin(), Arow.end(), Brow.begin(), 0);} + ); +}; + +auto A = multi::array{{ 0, 1}, { 2, 3}, { 4, 5}}; +auto B = multi::array{{10, 11}, {12, 13}, {14, 15}}; + +auto dots = multi::array({A.size()}); + +row_by_row_dot(A, B, dots); +``` + +If, for some reason, we want to obtain the inner product against a _single_ right-hand vector instead of several (a single 1D array of two elements), we would need to (re)write the function (or copy the repeated vector into the 2D `B` array, which is not ideal.) +Broadcasting can help reuse the same function without changes. + +```cpp +multi::array b = {10, 11}; + +row_by_row_dot(A, b.broadcasted(), dots); +``` + +The alternative, not using broadcast, is to write a very similar function, + +```cpp +auto row_fixed_dot = [](auto const& A2D, auto const& b1D, auto& results) { + std::transform(A2D.begin(), A2D.end(), results.begin(), + [&b1D](auto const& Arow) {return std::inner_product(Arow.begin(), Arow.end(), b1D.begin(), 0);} + ); +}; + +row_fixed_dot(A, b, dots3); +``` +(https://godbolt.org/z/9ndvfKqhc) + +Broadcasted arrays do not behave like normal array views in several aspects: +First, broadcasted arrays are infinite in the broadcasted dimension; iteration will never reach the end position, and calling `.size()` is undefined behavior. +Explicit loops or algorithms that depend on reaching `.end()` from `.begin()` will effectively be non-terminating. +Second, these array views are strictly read-only and alias their element addresses, e.g. `&b.broadcasted()[1][0] == &b.broadcasted()[2][0]` (since internal layouts' strides can be zero). + +For illustration purposes only, `fill` here is replaced by `copy`; problematic uses are highlighted: + +```cpp +multi::array B({10, 2}); +std::fill (B.begin(), B.end(), b); // canonical way +std::fill_n(B.begin(), B.size(), b); // canonical way + +std::copy_n(b.broadcasted().begin(), B.size(), B.begin()); // equivalent, using broadcast + +std::copy_n(b.broadcasted().begin(), b.broadcasted().size(), B.begin()); // incorrect, undefined behavior, no useful size() +std::copy (b.broadcasted().begin(), b.broadcasted().end(), B.begin()); // incorrect, undefined behavior, non-terminating loop (end is not reacheable) +B = b.broadcasted(); // incorrect, undefined behavior, B would be of infinite allocated size +``` + +Unlike popular languages, broadcasting is not automatic in the library and is applied to the leading dimension only, one dimension at a time. +Broadcasting in non-leading dimensions can be achieved by transpositions and index rotation. + +Abuse of broadcast can make it harder to reason about operations; its primary use is to reuse existing efficient implementations of algorithms when implementations for a specific lower dimensions are not available. +These algorithms need to be compatible with broadcasted views (e.g., no explicit use of `.size()` or infinite loops stemming from problematic use of `.begin()/end()`.) + +As a final example, consider a function that computes the elements-by-element product of two 2D arrays, + +```cpp +auto hadamard = [](auto const& A, auto const& B, auto&& C) { + auto const [is, js] = C.extensions(); + for(auto i : is) for(auto j : js) C[i][j] = A[i][j]*B[i][j]; +}; +``` + +As it is, this function can be reused to calculate the outer product of two 1D arrays: + +```cpp +auto outer = [&](auto const& a, auto const& b, T&& C) { + return hadamard(~(a.broadcasted()), b.broadcasted(), std::forward(C)); +}; +``` +(https://godbolt.org/z/5o95qGdKz) + +Note that the function `hadamard`, acting on 2D arrays, doesn't use the undefined (infinite) sizes (second dimension of `A` and first dimension of `B`). + +## Uninitialized vs. initialized elements + +The library can take advantage of trivial initialization if it is available for specific element types. +Such types can be primitive types or used defined with trivial default constructor. +These types are characterized for having trivial default construction, i.e. a constructor that doesn't define or performs any operation, not even setting values. + +When used in the stack these types can be declared with no initialization (e.g. `double x;`, initial value is not well defined or partially-formed) or with initialization (e.g. `double x{};`, same as `double x = 0.0;`). +Analogously, `multi::array` does not initialize individual elements of this kind of types, unless specified. + +For example, after this construction of the array, the values of the six elements of this array are unspecified (partially-formed). +```cpp +multi::array A2({2, 3}); +``` + +No behavior of the program should depend on these values. +(Address sanitizers and memory checkers can detect this.) +This design is a slight departure from the STL's design, which [eagerly initializes elements in containers](https://lemire.me/blog/2012/06/20/do-not-waste-time-with-stl-vectors/). + +If trivial construction is unavailable, the library uses the default initialization. +For types that afford this partially formed states, elements can be later specified via assignment or assigning algorithms (e.g., copy or transform destination). + +Initialization can be enforced by passing a single value argument after the extensions. +```cpp +multi::array A2({2, 3}, 0); // generically multi::array({2, 3}, T{}); or multi::array({2, 3}, {}) +``` + +This design is particularly advantageous for *numeric* types for which external low-level libraries can fill values. +(or when data sits in GPUs, where the initialization step would require an expensive kernel launch and subsequent synchronization). + +Unfortunately, regarding the numeric types, STL's `std::complex` was standardized as not-trivially constructible. +A workaround is possible by forcing a particular flag on the client code in global scope, for example, immediately after including the library: +```cpp +#include + +template<> inline constexpr +bool multi::force_element_trivial_default_construction> = true; // should be defined as early as possible +``` + +With this line, `std::complex` elements inside arrays will be left uninitialized unless a value is specified. +The rule will only apply to this library's containers (`multi::array`, etc), and not to other containers (such as `std::vector`) or individual `std::complex` variables. + ## Type Requirements -The design tries to impose the minimum possible requirements over the types that parameterize the arrays. +Thelibrary design tries to impose the minimum possible requirements over the types that parameterize the arrays. Array operations assume that the contained type (element type) are regular (i.e. different element represent disjoint entities that behave like values). Pointer-like random access types can be used as substitutes of built-in pointers. (Therefore pointers to special memory and fancy-pointers are supported.) ### Linear Sequences: Pointers -An `array_ref` can reference to an arbitrary random access sequence (e.g. memory block defined by pointer and size). -This way, any linear (random access) sequence (e.g. `raw memory`, `std::vector`, `std::queue`) can be efficiently arranged as a multidimensional array. +An `array_ref` can reference an arbitrary random access linear sequence (e.g. memory block defined by pointer and size). +This way, any linear sequence (e.g. `raw memory`, `std::vector`, `std::queue`) can be efficiently arranged as a multidimensional array. ```cpp std::vector buffer(100); multi::array_ref A({10, 10}, buffer.data()); -A[1][1] = 9; +A[1][1] = 9.0; -assert(buffer[11]==9); // the target memory is affected +assert( buffer[11] == 9.0 ); // the target memory is affected ``` -Since `array_ref` does not manage the memory associated with it, the reference can be simply dangle if the `buffer` memory is reallocated (e.g. by `resize` in this case). +Since `array_ref` does not manage the memory associated with it, the reference can be simply dangle if the `buffer` memory is reallocated (e.g. by vector-`resize` in this case). -### Special Memory: Allocators and Fancy Pointers +### Special Memory: Pointers and Views -`array`'s manages its memory behind the scenes through allocators, which can be specified at construction. +`array`s manage their memory behind the scenes through allocators, which can be specified at construction. It can handle special memory, as long as the underlying types behave coherently, these include [fancy pointers](https://en.cppreference.com/w/cpp/named_req/Allocator#Fancy_pointers) (and fancy references). Associated fancy pointers and fancy reference (if any) are deduced from the allocator types. Another use of fancy pointer is to create by-element "projections". +#### Allocators and Fancy Pointers + Specific uses of fancy memory are file-mapped memory or interprocess shared memory. This example illustrates memory persistency by combining with Boost.Interprocess library. The arrays support their allocators and fancy pointers (`boost::interprocess::offset_ptr`). @@ -675,14 +1154,14 @@ The arrays support their allocators and fancy pointers (`boost::interprocess::of using namespace boost::interprocess; using manager = managed_mapped_file; template using mallocator = allocator; -decltype(auto) get_allocator(manager& m){return m.get_segment_manager();} +decltype(auto) get_allocator(manager& m) {return m.get_segment_manager();} template using marray = multi::array>; -int main(){ +int main() { { manager m{create_only, "mapped_file.bin", 1 << 25}; - auto&& arr2d = *m.construct>("arr2d")(std::tuple{1000, 1000}, 0.0, get_allocator(m)); + auto&& arr2d = *m.construct>("arr2d")(marray::extensions_type{1000, 1000}, 0.0, get_allocator(m)); arr2d[4][5] = 45.001; } // imagine execution restarts here, the file "mapped_file.bin" persists @@ -698,6 +1177,8 @@ int main(){ (See also, examples of interactions with the CUDA Thrust library to see more uses of special pointer types to handle special memory.) +#### Transformed views + Another kind of fancy-pointer is one that transforms the underlying values. These are useful to create "projections" or "views" of data elements. In the following example a "transforming pointer" is used to create a conjugated view of the elements. @@ -723,8 +1204,8 @@ auto hermitized(Array2D const& arr) { int main() { using namespace std::complex_literals; multi::array A = { - { 1. + 2.i, 3. + 4.i}, - { 8. + 9.i, 10. + 11.i} + { 1.0 + 2.0i, 3.0 + 4.0i}, + { 8.0 + 9.0i, 10.0 + 11.0i} }; auto const& Ah = hermitized(A); @@ -733,6 +1214,41 @@ int main() { } ``` +To simplify this boilerplate, the library provides the `.element_transformed(F)` method that will apply a transformation `F` to each element of the array. +In this example the original arrays is transformed into a transposed array with duplicated elements. + +```cpp + multi::array A = { + {1.0, 2.0}, + {3.0, 4.0}, + }; + + auto const scale = [](auto x) { return x * 2.0; }; + + auto B = + A.rotated().element_transformed(scale); + assert( B[1][0] == A[0][1] * 2 ); +``` + +([live](https://godbolt.org/z/b7E56Mjc8)) + +Since `elements_transformed` is a reference (transformed view) to the original data, it is important to understand the semantics of evaluation and possible allocations incurred. +As mentioned in other sections using `auto` and/or `+` appropriately can lead to simple and efficient expressions. + +| Construction | Allocation of `T`s | Initialization (of `T`s) | Evaluation (of `fun`) | Notes | +| -------- | ------- | ------- | ------- | ------- | +| `multi::array const B = A.element_transformed(fun);` | Yes | No | Yes | Implicit conversion to `T` if result is different, dimensions must match. B can be mutable. | +| `multi::array const B = + A.element_transformed(fun);` | Yes (and move, or might allocate twice if types don't match) | No | Yes | Not recommended | +| `multi::array const B{A.element_transformed(fun)};` | Yes | No | Yes | Explicit conversion to `T` if result is different, dimensions must match | +| `auto const B = + A.elements_transformed(fun);` | Yes | No | Yes | Types and dimension are deduced, result is contiguous, preferred | +| `auto const B = A.element_transformed(fun);` | No | No | No (delayed) | Result is effective a reference, may dangle with `A`, usually `const`, not recommended | +| `auto const& B = A.elements_transformed(fun);` | No | No | No (delayed) | Result is effective a reference, may dangle with `A`. Preferred way. | +| `multi::array B(A.extensions()); B = A.element_transformed(fun);` | Yes | Yes (during construction) | Yes | "Two-step" construction. `B` is mutable. Not recommended | + +| Assigment | Allocation of `T`s | Initialization (of `T`s) | Evaluation (of `fun`) | Notes | +| -------- | ------- | ------- | ------- | ------- | +| `B = A.elements_transformed(fun);` | No, if sizes match | Possibly (when `B` was initialized) | Yes | `B` can't be declared `const`, it can be a writable subarray, preferred | +| `B = + A.elements_transformed(fun);` | Yes | Possibly (when `B` was initialized) | Yes | Not recommended. | + # Interoperability with other software ## STL (Standard Template Library) @@ -744,14 +1260,129 @@ Along with STL itself, the library tries to interact with other existing quality ### Ranges (C++20) -Although no exhaustive test has been performed, the library is expected to work with STL ranges. -The library works well with Ranges-v3 which is approximately a superset of STL ranges (see example below). +[Standard ranges](https://en.cppreference.com/w/cpp/ranges) extend standard algorithms, reducing the need for iterators, in favor of more composability and a less error-prone syntax. + +In this example, we replace the values of the first row for which the sum of the elements is odd: + +```cpp + static constexpr auto accumulate = [](auto const& R) {return std::ranges::fold_left(R, 0, std::plus<>{});}; + + auto arr = multi::array{ + {2, 0, 2, 2}, + {2, 7, 0, 2}, // this row adds to an odd number + {2, 2, 0, 4}, + }; + + auto const row = std::ranges::find_if(arr, [](auto const& r) { return accumulate(r) %2 == 1; }); + if(row != arr.end()) std::ranges::fill(*row, 9); + + assert(arr[1][0] == 9 ); +``` +[(live)](https://godbolt.org/z/cT9WGffM3) + +Together with the array constructors, the ranges library enables a more functional programming style; +this allows us to work with immutable variables in many cases. + +```cpp + multi::array const A = {{...}}; + multi::array const V = {...}; + + multi::array const R = std::views::zip_transform(std::plus<>{}, A[0], V); + + // Alternative imperative mutating code: + // multi::array R(V.size()); // R is created here... + // for(auto i : R.extension()) {R[i] = A[0][i] + V[i];} // ...and then mutated here +``` +[(live)](https://godbolt.org/z/M84arKMnT) + + +The "pipe" (`|`) notation of standard ranges allows one-line expressions. +In this example, the expression will yield the maximum value of the rows sums: +[`std::ranges::max(arr | std::views::transform(accumulate))`](https://godbolt.org/z/hvqnsf4xb) + +Like in classic STL, standard range algorithms acting on sequences operate in the first dimension by default, +for example, lexicographical sorting on rows can be performed with the `std::ranges::sort` algorithm. + +```cpp + auto A = multi::array{ + {'S', 'e', 'a', 'n', ' ', ' '}, + {'A', 'l', 'e', 'x', ' ', ' '}, + {'B', 'j', 'a', 'r', 'n', 'e'}, + }; + assert(not std::ranges::is_sorted(A)); + + std::ranges::sort(A); + + assert( std::ranges::is_sorted(A)); + + assert( + A == multi::array{ + {'A', 'l', 'e', 'x', ' ', ' '}, + {'B', 'j', 'a', 'r', 'n', 'e'}, + {'S', 'e', 'a', 'n', ' ', ' '}, + } + ); +``` + +To operate on the second dimension (sort by columns), use `std::ranges::sort(~A)` (or `std::ranges::sort(A.transposed())`). + +### Execution policies (parallel algorithms) + +Multi's iterators can exploit parallel algorithms by specifying execution policies. +This code takes every row of a two-dimensional array and sums its elements, putting the results in a one-dimensional array of compatible size. +The execution policy (`par`) selected is passed as the first argument. + +```cpp + multi::array const A = ...; + multi::array v(size(A)); + + std::transform(std::execution::par, arr.begin(), arr.end(), vec.begin(), [](auto const& row) {return std::reduce(row.begin(), row.end());} ); +``` +[(live)](https://godbolt.org/z/63jEdY7zP) + +For an array of 10000x10000 elements, the execution time decreases to 0.0288 sec, compared to 0.0526 sec for the non-parallel version (i.e. without the `par` argument). + +Note that parallelization is, in this context, inherently one-dimensional. +For example, parallelization happens for the transformation operation, but not to the summation. + +The optimal way to parallelize specific operations strongly depends on the array's size and shape. +Generally, straightforward parallelization without exploiting the n-dimensional structure of the data has a limited pay-off; +and nesting parallelization policies usually don't help either. + +Flattening the n-dimensional structure for certain algorithms might help, but such techniques are beyond the scope of this documentation. + +Some member functions internally perform algorithms and that can benefit from execution policies; +in turn, some of these functions have the option to pass a policy. +For example, this copy construction can initialize elements in parallel from the source: + +```cpp + multi::array const A = ...; + multi::array const B(std::execution::par, A); // copies A into B, in parallel, same effect as multi::array const B(A); or ... B = A; +``` + +Execution policies are not limited to STL; +Thrust and oneAPI also offer execution policies that can be used with the corresponding algorithms. + +Execution policies and ranges can be mixed (`x` and `y` can be 1D dimensional arrays, of any arithmetic element type) +```cpp +template +auto dot_product(X1D const& x, Y1D const& y) { + assert(x.size() == y.size()); + auto const& z = std::ranges::views::zip(x, y) + | std::ranges::views::transform([](auto const& ab) { auto const [a, b] = ab; + return a * b; + }) + ; + return std::reduce(std::execution::par_unseq, z.begin(), z.end()); +} +``` +[(live)](https://godbolt.org/z/cMq87xPvb) ### Polymorphic Memory Resources -The library is compatible with C++17's polymorphic memory resources (PMR) which allows using preallocated buffers as described in this example. -This enables the use of stack memory, with many performance advantaneges. -For example, this code uses a buffer to allocate memory for two arrays, we will see how this buffer ends up containing the data of the arrays `"aaaabbbbbbXX"`. +In addition to supporting classic allocators (`std::allocator` by default), the library is compatible with C++17's [polymorphic memory resources (PMR)](https://en.cppreference.com/w/cpp/header/memory_resource) which allows using advanced allocation strategies, including preallocated buffers. +This example code uses a buffer as memory for two arrays; +in it a predefined buffer ends up containing the data of the arrays, something like `"aaaabbbbbbXX"`. ```cpp #include // for polymorphic memory resource, monotonic buffer @@ -760,85 +1391,80 @@ int main() { char buffer[13] = "XXXXXXXXXXXX"; // a small buffer on the stack std::pmr::monotonic_buffer_resource pool{std::data(buffer), std::size(buffer)}; - multi::array> A({2, 2}, 'a', &pool); - multi::array> B({3, 2}, 'b', &pool); + multi::pmr::array A({2, 2}, 'a', &pool); + multi::pmr::array B({3, 2}, 'b', &pool); - assert( buffer == std::string{"aaaabbbbbbXX"} ); + assert( buffer != std::string{"XXXXXXXXXXXX"} ); // overwritten w/elements, implementation-dependent (libstd consumes from left, and libc++, from the right) } ``` -The library supports classic allocators (`std::allocator` by default) and also allocators from other libraries (see Thurst section). - -### Comparison with `mdspan` (projected for C++23) - -C++23 will provide `mdspan`, a non-owning multidimensional array. -This is a good point to compare the two libraries. -Although the goals are similar, the two libraries differ in their generality and approach; in a few words: - -The Multi library concentrates in _well defined value- and reference-semantics of arbitrary memory types with regularly arranged elements_ (distributions described by strides and offsets) and _extreme compatibility with STL algorithms_ (via iterators) and other fundamental libraries. - -`mdspan` concetrates in _arbitrary layouts_ for non-owining memory of a single type (described by raw pointers). -Due to the priority of arbitrary layouts, the `mdspan` research team didn't find efficient ways to introduce iterators into the library and the compatibility with respect to the rest of the STL is therefore lacking. -The ultimate reason is that arbitrary layouts do not compose well across subdimensions. -This imposes certain limitations in the library such as ad-hoc slicing and subarray. - -Here it is a table with comparison, also the libraries can be compare [here](https://godbolt.org/z/5Pbrs5fEd) - -| | Multi | mdspan | -|--- | --- | --- | -| External Deps | no (Standard Library C++17) | no (Standard Library) | -| Non-owning view of data | **yes**, via `multi::array_ref(Tptr, {x1, x2, ..., xD})` | **yes**, via `mdspan m{T*, extents{x1, x2, ..., xD}};` | -| Arbritary number of dim | **yes**, via positive dimension (compile-time) parameter `D` | **yes**, same | -| Compile-time dim size | no | **yes**, via template paramaters `mdspan{T*, extent<16, dynamic_extents>{32} }` | | -| Array values (owning data) | **yes**, via `multi::array({x1, x2, ..., xD})` | no, (planned `mdarray`) | -| Value semantic (Regular) | **yes**, via cctor, mctor, assign, massign, auto decay of views | no, and not planned | -| const-propagation Reference semantic | **yes**, via `const` or `const&` | no, const mdspan elements are assignable! | -| References w/no-rebinding | **yes**, assignment is deep | no, assignment of mdspan rebinds! | -| Element access | **yes**, via `A(i, j, ...)` | **yes**, via `A(i, j, ...)` | -| Partial element access | **yes**, via `A[i]` or `A(i, multi::all)` | **yes**, via `submdspan(A, i, full_extent)` | -| Subarray views | **yes**, via `A({0, 2}, {1, 2})` or `A(1, {1, 2})` | **yes**, via `submdspan(A, std::tuple{0, 2}, std::tuple{0, 2})` | -| Subarray with lower dim | **yes**, via `A(1, {1, 2})` | **yes**, via `submdspan(A, 1, std::tuple{0, 2})` | -| Subarray w/well def layout | **yes** (strided layout) | no | -| Custom Alloctors | **yes**, via `multi::array` | no (no allocation or ownership) | -| PMR Alloctors | **yes**, via `multi::pmr::array` | no (no allocation or ownership) | -| Fancy pointers / references | **yes**, via `multi::array` or views | no | -| Strided Layout | **yes** | **yes** | -| Fortran-ordering | **yes**, for views, e.g. resulted from transposed views | **yes** (only views are supported) | -| Zig-zag / Hilbert ordering | no | **yes**, via arbitrary layouts (but no inverse or flattening) | -| Arbitrary layout | no | **yes**, possibly inneficient, no efficient slicing | -| Flattening of elements | **yes**, via `A.elements()` range (efficient representation) | **yes**, but via indices roundtrip (inefficient) | -| Iterators | **yes**, standard compliant, random-access-iterator | no, or very limited | -| Multidimensional iterators (cursors) | **yes** (experimental) | no | -| STL algorithms or Ranges | **yes** | no, limited via `std::cartesian_product` | -| Compatibility with Boost | **yes**, serialization, interprocess (see below) | no | -| Compatibility with Thrust | **yes**, via flatten views (ad-hoc loop fusion), fancy-pointers/-references | no | -| Used in production | [QMCPACK](https://qmcpack.org/), [INQ](https://gitlab.com/npneq/inq) | ? , experience from Kokkos incarnation | +`multi::pmr::array` is a synonym for `multi::array>`. +In this particular example, the technique can be used to avoid dynamic memory allocations of small local arrays. [(live)](https://godbolt.org/z/fP9P5Ksvb) + +The library also supports memory resources from other libraries, including those returning special pointer types (see [CUDA Thrust](#cuda-thrust) Thurst section, and Boost.Interprocess section). + +### Substitutability with standard vector and span + +The one-dimensional case `multi::array` is special and overlaps functionality with other dynamic array implementations, such as `std::vector`. +Indeed, both types of containers are similar and usually substitutable, with no or minor modifications. +For example, both can be constructed from a list of elements (`C c = {x0, x2, ...};`) or from a size `C c(size);`, where `C` is either type. + +Both values are assignable, have the same element access patterns and iterator interface, and implement all (lexical) comparisons. + +They differ conceptually in their resizing operations: `multi::array` doesn't insert or push elements and resizing works differently. +The difference is that the library doesn't implement *amortized* allocations; therefore, these operations would be of a higher complexity cost than the `std::vector`. +For this reason, `resize(new_size)` is replaced with `reextent({new_size})` in `multi::array`, whose primary utility is for element preservation when necessary. + +In a departure from standard containers, elements are left initialized if they have trivial constructor. +So, while `multi::array A({N}, T{})` is equivalent to `std::vector V(N, T{})`, `multi::array A(N)` will leave elements `T` uninitialized if the type allows this (e.g. built-ins), unlike `std::vector V(N)` which will initialize the values. +RAII types (e.g. `std::string`) do not have trivial default constructor, therefore they are not affected by this rule. + +With the appropriate specification of the memory allocator, `multi::array` can refer to special memory not supported by `std::vector`. + +Finally, an array `A1D` can be copied by `std::vector v(A1D.begin(), A1D.end());` or `v.assign(A1D.begin(), A1D.end());` or vice versa. +Without copying, a reference to the underlying memory can be created `auto&& R1D = multi::array_ref(v.data(), v.size());` or conversely `std::span(A1D.data_elements(), A1D.num_elements());`. +(See examples [here](https://godbolt.org/z/n4TY998o4).) + +The `std::span` (C++20) has not a well defined reference- or pointer-semantics; it doesn't respect `const` correctness in generic code. +This behavior is contrary to the goals of this library; +and for this reason, there is no single substitute for `std::span` for all cases. +Depending on how it is used, either `multi::array_ref [const& | &&]` or `multi::array_ptr` may replace the features of `std::span`. +The former typically works when using it as function argument. + +Multi-dimensinal arrays can interoperate with C++23's non-owning `mdspan`. +[Preliminarily](https://godbolt.org/z/aWW3vzfPj), Multi's subarrays (arrays) can be converted (viewed as) `mdspan`. + +A detailed comparison with other array libraries (mspan, Boost.MultiArray, Eigen) is explained in an Appendix. ## Serialization -The capability of serializing arrays is important to save/load data to/from disk and also to communicate values via streams or networks (including MPI). +The ability of serializing arrays is important to save/load data to/from disk and also to communicate values via streams or networks (including MPI). The C++ language does not give any facilities for serialization and unfortunately the standard library doesn't either. However there are a few libraries that offer a certain common protocol for serialization, such as [Boost.Serialization](https://www.boost.org/doc/libs/1_76_0/libs/serialization/doc/index.html) and [Cereal](https://uscilab.github.io/cereal/). -The Multi library is compatible with both of them, and yet it doesn't depend on any of them. -The user can choose one or the other, or none if serialization is not needed. +The Multi library is compatible with both of them (and yet it doesn't depend on any of them). +The user can choose one or the other, or none, if serialization is not needed. The generic protocol is such that variables are (de)serialized using the (`>>`)`<<` operator with the archive; operator `&` can be used to have single code for both. Serialization can be binary (efficient) or text-based (human readable). Here it is a small implementation of save and load functions for array to JSON format with Cereal. -The example can be easily adapted to other formats or libries (XML with Boost.Serialization are commented on the right). +The example can be easily adapted to other formats or libraries (XML with Boost.Serialization are commented on the right). ```cpp -#include // our library -#include // saving to files in example -#include // #include - // #include +#include // this library + +#include // or #include // #include + // #include // for serialization of array elements (in this case strings) -#include // #include -using input_archive = cereal::JSONInputArchive ; // boost::archive::xml_iarchive; -using output_archive = cereal::JSONOutputArchive; // boost::archive::xml_oarchive; -using cereal::make_nvp; // boost::serialization::make_nvp; +#include // #include + +#include // saving to files in example + +using input_archive = cereal::JSONInputArchive ; // or ::XMLInputArchive ; // or boost::archive::xml_iarchive; +using output_archive = cereal::JSONOutputArchive; // or ::XMLOutputArchive; // or boost::archive::xml_oarchive; + +using cereal::make_nvp; // or boost::serialization::make_nvp; namespace multi = boost::multi; @@ -862,19 +1488,20 @@ int main() { assert(A == B); } ``` +[(online)](https://godbolt.org/z/9j9avjh8M) These templated functions work for any dimension and element type (as long as the element type is serializable in itself; all basic types are serializable by default). -However note that it is responsibility of the user to make sure that data is serialized and deserialized into the same type and also assuming the same format. -This is because the underlying serialization library only do minimal consistency checks for efficiency reasons and doesn't try to second guess file formats or contained types. -Serialization is a relatively low level feature for which efficiency and economy of bytes is priority. -Cryptic errors and crashes can occur if serialization libraries, file formats or C++ types are mixed between writes and reads. -On top of serialization checks can be added by the user before and after loading a file. +However, note that the user must ensure that data is serialized and deserialized into the same type; +the underlying serialization libraries only do minimal consistency checks for efficiency reasons and don't try to second-guess file formats or contained types. +Serialization is a relatively low-level feature for which efficiency and economy of bytes is a priority. +Cryptic errors and crashes can occur if serialization libraries, file formats, or C++ types are mixed between writes and reads. +Some formats are human-readable, but not particularly pretty for showing as output (see section on Formatting on how to print to the screen). -References to subarrays can also be serialized, however, in such case size information is not saved. -The reason is that references to subarrays cannot be resized in their number of elements if there is size mismatch during deserialization. +References to subarrays (views) can also be serialized; however, size information is not saved in such cases. +The reasoning is that references to subarrays cannot be resized in their number of elements if there is a size mismatch during deserialization. +Therefore, array views should be deserialized as other array views, with matching sizes. -The output JSON file of the previous example looks like this. -(The XML would have a similar structure.) +The output JSON file created by Cereal in the previous example looks like this. ```json { @@ -902,6 +1529,7 @@ The output JSON file of the previous example looks like this. } } ``` +(The [Cereal XML](https://godbolt.org/z/de814Ycar) and Boost XML output would have a similar structure.) Large datasets tend to be serialized slowly for archives with heavy formatting. Here it is a comparison of speeds when (de)serializing a 134 MB 4-dimensional array of with random `double`s. @@ -921,7 +1549,7 @@ Here it is a comparison of speeds when (de)serializing a 134 MB 4-dimensional ar ## Range-v3 -The library works out of the box with Eric Niebler's Range-v3 library. +The library works out of the box with Eric Niebler's Range-v3 library, a precursor to the standard Ranges library (see above). The library helps removing explicit iterators (e.g. `begin`, `end`) from the code when possible. Every Multi array object can be regarded as range. @@ -1001,35 +1629,39 @@ int main(){ manager m{bip::open_only, "bip_mapped_file.bin"}; auto&& arr2d = *m.find>("arr2d").first; assert( arr2d[4][5] == 45.001 ); - m.destroy>("arr2d");// eliminate>(m, "arr2d");} + m.destroy>("arr2d");// eliminate>(m, "arr2d");} } } ``` (Similarly works with [LLNL's Meta Allocator](https://github.com/llnl/metall)) -## CUDA Thrust +## CUDA Thrust (and HIP Thrust) -The library works out-of-the-box in combination with the CUDA Thrust library. +The library works out-of-the-box in combination with the Thrust library. ```cpp -#include -#include +#include // this library + +#include // from CUDA or ROCm distributions namespace multi = boost::multi; + int main() { multi::array> A({10,10}); multi::array> B({10,10}); - A[5][0] = 50.; - thrust::copy(begin(rotated(A2)[0]), end(rotated(A2)[0]), begin(rotated(B2)[0])); - assert( B2[5][0] == 50. ); + A[5][0] = 50.0; + + thrust::copy(A.rotated()[0].begin(), A.rotated()[0].end(), B.rotated()[0].begin()); // copy row 0 + assert( B[5][0] == 50.0 ); } ``` +[(live)](https://godbolt.org/z/e7bjKqh69) -which uses the default Thrust backend (CUDA, OpenMP or TBB). -Universal memory (accessible from normal CPU code) can be used with `thrust::universal_allocator` instead. +which uses the default Thrust device backend (i.e. CUDA when compiling with `nvcc`, HIP/ROCm when compiling with a HIP/ROCm compiler, or OpenMP or TBB in other cases). +Universal memory (accessible from normal CPU code) can be used with `thrust::universal_allocator` (from ``) instead. -More specific allocators can be used to force certain Thrust backends, for example CUDA managed memory: +More specific allocators can be used ensure CUDA backends, for example CUDA managed memory: ```cpp #include @@ -1037,26 +1669,263 @@ More specific allocators can be used to force certain Thrust backends, for examp multi::array> A({10,10}); ``` -Multi doesn't have a dependency on Thrust (or viseversa); +In the same way, to *ensure* HIP backends please replace the `cuda` namespace by the `hip` namespace, and in the directory name ``. +`` is provided by the ROCm distribution (in `/opt/rocm/include/thrust/system/hip/`, and not by the NVIDIA distribution.) + +Multi doesn't have a dependency on Thrust (or vice versa); they just work well together, both in terms of semantics and efficiency. -Certain "patches" (to correct Thrust behavior) can be applied to Thrust to gain extra efficiency and achieve near native speed by adding the `#include`. +Certain "patches" (to improve Thrust behavior) can be applied to Thrust to gain extra efficiency and achieve near native speed by adding the `#include`. Multi can be used on existing memory in a non-invasive way via (non-owning) reference arrays: ```cpp - // assumes raw_pointer was allocated with cudaMalloc - using cuda_ptr = thrust::cuda::pointer; + // assumes raw_pointer was allocated with cudaMalloc or hipMalloc + using gpu_ptr = thrust::cuda::pointer; // or thrust::hip::pointer multi::array_ref Aref({n, n}, gpu_ptr{raw_pointer}); ``` -## TotalView +Finally, the element type of the device array has to be device-friendly to work correctly; +this includes all build in types, and classes with basic device operations, such as construction, destruction, and assigment. +They notably do not include `std::complex`, in which can be replaced by the device-friendly `thrust::complex` can be used as replacement. + +### OpenMP via Thrust + +In an analogous way, Thrust can also handle OpenMP (omp) allocations and multi-threaded algorithms of arrays. +The OMP backend can be enabled by the compiler flags `-DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_BACKEND_OMP` or by using the explicit `omp` system types: + +```cpp +#include +#include + +namespace multi = boost::multi; + +int main() { + multi::array> A({10,10}); + multi::array> B({10,10}); + + A[5][0] = 50.0; + + // copy row 0 + thrust::copy(A.rotated()[0].begin(), A.rotated()[0].end(), B.rotated()[0].begin()); + + assert( B[5][0] == 50.0 ); + + auto C = B; // uses omp automatically for copying behind the scenes +} +``` +https://godbolt.org/z/e3cGbY87r + +Compilation might need to link to an omp library, `-fopenmp -lgomp`. + +### Thrust memory resources + +GPU memory is relative expensive to allocate, therefore any application that allocates and deallocates arrays often will suffer performance issues. +This is where special memory management is important, for example for avoiding real allocations when possible by caching and reusing memory blocks. + +Thrust implements both polymorphic and non-polymorphic memory resources via `thrust::mr::allocator`; +Multi supports both. + +```cpp +auto pool = thrust::mr::disjoint_unsynchronized_pool_resource( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() +); + +// memory is handled by pool, not by the system allocator +multi::array> arr({1000, 1000}, &pool); +``` + +The associated pointer type for the array data is deduced from the _upstream_ resource; in this case, `thrust::universal_ptr`. + +As as quick way to improve performance in many cases, here it is a recipe for a `caching_allocator` which uses a global (one per thread) memory pool that can replace the default Thrust allocator. +The requested memory resides in GPU (managed) memory (`thrust::cuda::universal_memory_resource`) while the cache _bookkeeping_ is held in CPU memory (`new_delete_resource`). + +```cpp +template>>> +struct caching_allocator : Base_ { + caching_allocator() : + Base_{&thrust::mr::tls_disjoint_pool( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() + )} {} + caching_allocator(caching_allocator const&) : caching_allocator{} {} // all caching allocators are equal + template struct rebind { using other = caching_allocator; }; +}; +... +int main() { + ... + using array2D = multi::array>; + + for(int i = 0; i != 10; ++i) { array2D A({100, 100}); /*... use A ...*/ } +} +``` +https://godbolt.org/z/rKG8PhsEh + +In the example, most of the frequent memory requests are handled by reutilizing the memory pool avoiding expensive system allocations. +More targeted usage patterns may require locally (non-globally) defined memory resources. + +## CUDA C++ + +CUDA is a dialect of C++ that allows writing pieces of code directly for GPU execution, known as "CUDA kernels". +CUDA code is generally "low level" (less abstracted) but it can be used in combination with CUDA Thrust or the CUDA runtime library, specially to implement custom algorithms. +Although code inside kernels has certain restrictions, most Multi expressions can be used. +(Most functions in Multi, except those involving memory allocations, are marked `__device__` to allow this.) + +Calling kernels involves a special syntax (`<<< ... >>>`), and they cannot take arguments by reference (or by values that are not trivial, e.g. not entirely contained in the stack). +Since arrays are usually passed by reference (e.g. `multi::array&` or `Array&&`), a different idiom needs to be used. +(Large arrays are not passed by value to avoid copies, but even if a copy would be fine, kernel arguments cannot allocate memory themselves.) +Iterators (e.g. `.begin()/.end()`) and "cursors" (e.g. `.home()`) are "trivial to copy" and can be passed by value and represent a "proxy" to an array, including allowing the normal index syntax and other transformations. + +Cursors are a generalization of iterators for multiple dimensions. +They are cheaply copied (like iterators) and they allow indexing. +Also, they have no associated `.size()` or `.extensions()`, but this is generally fine for kernels. + +Here it is an example implementation for matrix multiplication, in combination with Thrust and Multi, + +```cpp +#include // from https://gitlab.com/correaa/boost-multi +#include // for thrust::cuda::allocator + +template +__global__ void Kernel(ACursor A, BCursor B, CCursor C, int N) { + int x = threadIdx.x + blockIdx.x * blockDim.x; + int y = threadIdx.y + blockIdx.y * blockDim.y; + + typename CCursor::element_type value{0.0}; + for (int k = 0; k != N; ++k) { value += A[y][k] * B[k][x]; } + C[y][x] = value; +} + +namespace multi = boost::multi; + +int main() { + int N = 1024; + + // declare 3 square arrays + multi::array> A({N, N}); A[0][0] = ...; + multi::array> B({N, N}); B[0][0] = ...; + multi::array> C({N, N}); + + // kernel invocation code + assert(N % 32 == 0); + dim3 dimBlock(32, 32); + dim3 dimGrid(N/32, N/32); + Kernel<<>>(A.home(), B.home(), C.home(), N); + cudaDeviceSynchronize(); + + // now C = A x B +} +``` +[(live)](https://godbolt.org/z/eKbeosrWa) + +## SYCL + +The SYCL library promises the unify CPU, GPU and FPGA code. +At the moment, the array containers can use the Unified Shared Memory (USM) allocator, but no other tests have been investigated. + +```cpp + sycl::queue q; + + sycl::usm_allocator q_alloc(q); + multi::array data(N, 1.0, q_alloc); -TotalView visual debugger (commercial) can display arrays in human-readable form (for simple types, like `double` or `std::complex`). + //# Offload parallel computation to device + q.parallel_for(sycl::range<1>(N), [=,ptr = data.base()] (sycl::id<1> i) { + ptr[i] *= 2; + }).wait(); +``` +https://godbolt.org/z/8WG8qaf4s + +Algorithms are expected to work with oneAPI execution policies as well (not tested) + +```cpp + auto policy = oneapi::dpl::execution::dpcpp_default; + sycl::usm_allocator alloc(policy.queue()); + multi::array vec(n, alloc); + + std::fill(policy, vec.begin(), vec.end(), 42); +``` + +## Formatting ({fmt} pretty printing) + +The library doesn't have a "pretty" printing facility to display arrays; +fortunatelly it automatically works with the external library [{fmt}](https://fmt.dev/latest/index.html), both for arrays and subarrays. +The fmt library is not a dependency of the Multi library; they simply work well together using the "ranges" part of the formatting library. + +This example prints a 2-dimensional subblock of a larger array. + +```cpp +#include "fmt/ranges.h" +... + multi::array A2 = { + {1.0, 2.0, 3.0}, + /*-subblock-**/ + {3.0, 4.0, /**/ 5.0}, + {6.0, 7.0, /**/ 8.0}, + }; + + fmt::print("A2 subblock = {}", A2({1, 3}, {0, 2})); // second and third row, first and second column +``` +with the "flat" output `A2 subblock = [[3, 4], [6, 7]]` + +https://godbolt.org/z/EE5sqTdvf + +For 2 or more dimensions the output can be conveniently structured in different lines using the `fmt::join` facility: + +```cpp + fmt::print("{}\n", fmt::join(A2({1, 3}, {0, 2}), "\n")); // first dimension rows are printer are in different lines +``` +with the output: + +> ``` +> [3, 4] +> [6, 7] +> ``` + +When saving arrays to files, consider using serialization (see section) instead. + +## Legacy libraries (C-APIs) + +Multi dimensional array data structures exists in all languages, whether implicitly defined by its strided structure or at the language level. +Functions written in C tend to receive arrays by pointer arguments (e.g. to "first" element) and memory layout (sizes and strides). + +A C-function taking a 2D array with a concrete type might look like this in the general case: + +```c +void fun(double* data, int size1, int size2, int stride1, int stride2); +``` + +The function can be called from C++ using arguments derived from Multi arrays: + +```cpp +fun(arr.base(), std::get<0>(arr.sizes()), std::get<1>(arr.sizes()), std::get<0>(arr.strides()), std::get<1>(arr.strides()); +``` + +or + +```cpp +auto const [size1, size2] = arr.sizes(); +auto const [stride1, stride2] = arr.strides(); + +fun(arr.base(), size1, size2, stride1, stride2); +``` +Although the recipe can be applied straightforward, different libraries have various assumptions about memory layouts (e.g. BLAS 2D-arrays assume that the second stride is 1) and some might take stride information in a different way (e.g. FFTW doesn't use strides but stride-products). +Furthermore, some arguments may need to be permutted if the function expects arrays in column-major (Fortran) ordering. +For this reason the library is acompained with a series of adaptor libraries to popular C-based libraries, that can be found in the `include/multi/adaptors/` directory. + +* BLAS +* Lapack +* FFTW/cuFFT + +* cuBLAS +* cuFFT + +* TotalView: visual debugger (commercial), popular in HPC environments, can display arrays in human-readable form (for simple types, like `double` or `std::complex`). To use it, simply `#include "multi/adaptors/totalview.hpp"` and link to the TotalView libraries, compile and run the code with the TotalView debugger. # Technical points -### What's up with the multiple bracket notation? +### What's up with the multiple bracket (vs. parenthesis) notation? The chained bracket notation (`A[i][j][k]`) allows to refer to elements and subarrays lower dimensional subarrays in a consistent and _generic_ manner and it is the recommended way to access the array objects. It is a frequently raised question whether the chained bracket notation is good for performance, since it appears that each utilization of the bracket leads to the creation of a temporary object which in turn generates a partial copy of the layout. @@ -1076,42 +1945,89 @@ As a result, these two loops lead to the [same machine code](https://godbolt.org ``` Incidentally, the library also supports parenthesis notation with multiple indices `A(i, j, k)` for element or partial access, but it does so as part of a more general syntax to generate sub-blocks. -In any case `A(i, j, k)` is expanded to `A[i][j][k]` internally in the library when `i, j, k` are normal integer indices. -Additionally, array coordinates can be directly stored in tuple-like data structures, allowing this functional syntax: +In any case `A(i, j, k)` is expanded to `A[i][j][k]` internally in the library when `i`, `j`, `k` are normal integer indices. +For this reason, `A(i, j, k)`, `A(i, j)(k)`, `A(i)(j)(k)`, `A[i](j)[k]` are examples of equivalent expressions. -```cpp -std::array p = {2, 3, 4}; -std::apply(A, p) = 234; // same as A(2, 3, 4) = 234; and same as A[2][3][4] = 234; -``` - -### Customizing recursive operations: SCARY iterators - -A level of customization can be achieved by intercepting internal recursive algorithms. -Multi iterators are [SCARY](http://www.open-std.org/jtc1/sc22/WG21/docs/papers/2009/n2980.pdf). -SCARY means that they are independent of any container and can be accessed generically through their dimension and underlying pointer types: - -For example, `boost::multi::array_iterator it` is a row (or column) iterator of an array of dimension 2 or higher, whose underlying pointer type is `double*`. -This row (or column) and subsequent ones can be accessed by the normal iterator(pointer) notation `*it` and `it[n]` respectively. -Indirection `it->...` is supported (even for iterators if high dimension). -The base pointer, the strides and the size of the arrow can be accessed by `base(it)`, `stride(it)`, `it->size()`. +Sub-block notation, when at least one argument is an index ranges, e.g. `A({i0, i1}, j, k)` has no equivalent square-bracket notation. +Note also that `A({i0, i1}, j, k)` is not equivalent to `A({i0, i1})(j, k)`; their resulting sublocks have different dimensionality. -The template arguments of the iterator can be used to customize operations that are recursive (and possibly inefficient in certain context) in the library: +Additionally, array coordinates can be directly stored in tuple-like data structures, allowing this functional syntax: ```cpp -namespace boost{namespace multi{ -template // custom copy 1D (aka strided copy) -void copy(It first, It last, multi::array_iterator > dest){ - assert( stride(first) == stride(last) ); - std::cerr<<"1D copy(it1D, it1D, it1D) with strides "<< stride(first) <<" "<< stride(dest) < // custom copy 2D (aka double strided copy) -void copy(It first, It last, multi::array_iterator > dest){ - assert( stride(first) == stride(last) ); - std::cerr<<"2D copy(It, It, it2D) with strides "<< stride(first) <<" "<< stride(dest) <(ptr, {n1, n2, ..., nD})` | **yes**, via `mdspan m{T*, extents{n1, n2, ..., nD}};` | **yes**, via `boost::multi_array_ref(T*, boost::extents[n1][n2]...[nD])` | **yes**, via `Eigen::Map>(ptr, n1, n2)` | +| Compile-time dim size | no | **yes**, via template paramaters `mdspan{T*, extent<16, dynamic_extents>{32} }` | no | **yes**, via `Eigen::Array` | +| Array values (owning data) | **yes**, via `multi::array({n1, n2, ..., nD})` | no, (planned `mdarray`) | **yes**, via `boost::multi_array(boost::extents[n1][n2]...[nD])` | **yes**, via `Eigen::Array(n1, n2)` | +| Value semantic (Regular) | **yes**, via cctor, mctor, assign, massign, auto decay of views | no, and not planned | partial, assigment on equal extensions | **yes** (?) | +| Move semantic | **yes**, via mctor and massign | no | no (C++98 library) | **yes** (?) | +| const-propagation semantics | **yes**, via `const` or `const&` | no, const mdspan elements are assignable! | no, inconsistent | (?) | +| Element initialization | **yes**, via nested init-list | no | no | no, only delayed init via `A << v1, v2, ...;` | +| References w/no-rebinding | **yes**, assignment is deep | no, assignment of mdspan rebinds! | **yes** | **yes** (?) | +| Element access | **yes**, via `A(i, j, ...)` or `A[i][j]...` | **yes**, via `A(i, j, ...)` | **yes**, via `A[i][j]...` | **yes**, via `A(i, j)` (2D only) | +| Partial element access | **yes**, via `A[i]` or `A(i, multi::all)` | **yes**, via `submdspan(A, i, full_extent)` | **yes**, via `A[i]` | **yes**, via `A.row(i)` | +| Subarray views | **yes**, via `A({0, 2}, {1, 3})` or `A(1, {1, 3})` | **yes**, via `submdspan(A, std::tuple{0, 2}, std::tuple{1, 3})` | **yes**, via `A[indices[range(0, 2)][range(1, 3)]]` | **yes**, via `A.block(i, j, di, dj)` | +| Subarray with lower dim | **yes**, via `A(1, {1, 3})` | **yes**, via `submdspan(A, 1, std::tuple{1, 3})` | **yes**, via `A[1][indices[range(1, 3)]]` | **yes**, via `A(1, Eigen::placeholders::all)` | +| Subarray w/well def layout | **yes** (strided layout) | no | **yes** (strided layout) | **yes** (strided) | +| Recursive subarray | **yes** (layout is stack-based and owned by the view) | **yes** (?) | no (subarray may dangle layout, design bug?) | **yes** (?) (1D only) | +| Custom Alloctors | **yes**, via `multi::array` | no (no allocation or ownership) | **yes** (stateless?) | no | +| PMR Alloctors | **yes**, via `multi::pmr::array` | no (no allocation or ownership) | no | no | +| Fancy pointers / references | **yes**, via `multi::array` or views | no | no | no | +| Strided Layout | **yes** | **yes** | **yes** | **yes** | +| Fortran-ordering | **yes**, only for views, e.g. resulted from transposed views | **yes** (only views are supported) | **yes** | **yes** | +| Zig-zag / Hilbert ordering | no | **yes**, via arbitrary layouts (no inverse or flattening) | no | no | +| Arbitrary layout | no | **yes**, possibly inneficient, no efficient slicing | no | no | +| Flattening of elements | **yes**, via `A.elements()` range (efficient representation) | **yes**, but via indices roundtrip (inefficient) | no, only for allocated arrays | no, not for subblocks (?) | +| Iterators | **yes**, standard compliant, random-access-iterator | no, or very limited | **yes**, limited | no | +| Multidimensional iterators (cursors) | **yes** (experimental) | no | no | no | +| STL algorithms or Ranges | **yes** | no, limited via `std::cartesian_product` | **yes**, some do not work | no | +| Compatibility with Boost | **yes**, serialization, interprocess (see below) | no | no | no | +| Compatibility with Thrust or GPUs | **yes**, via flatten views (loop fusion), thrust-pointers/-refs | no | no | no | +| Used in production | [QMCPACK](https://qmcpack.org/), [INQ](https://gitlab.com/npneq/inq) | (?) , experience from Kokkos incarnation | **yes** (?) | [**yes**](https://eigen.tuxfamily.org/index.php?title=Main_Page#Projects_using_Eigen) | diff --git a/external_codes/boost_multi/multi/benchmark/array_ref_timing.cpp b/external_codes/boost_multi/multi/benchmark/array_ref_timing.cpp index e91d4dac21..f8f0b59345 100644 --- a/external_codes/boost_multi/multi/benchmark/array_ref_timing.cpp +++ b/external_codes/boost_multi/multi/benchmark/array_ref_timing.cpp @@ -6,7 +6,7 @@ clang++ -O3 -Ofast -std=c++14 -DNDEBUG -Wall -Wextra -Wpedantic -Werror $0 -o $0 #include -#include +#include #include #include #include @@ -16,7 +16,7 @@ namespace multi = boost::multi; int main(){ - assert(0); // check that NDEBUG is on + assert(0); // check that NDEBUG is off { std::ptrdiff_t NX = 20000; std::ptrdiff_t NY = 20000; @@ -25,12 +25,12 @@ int main(){ std::iota(begin(data), end(data), 0.); multi::array_cref data2D_cref({NX, NY}, data.data()); - cout << "size " << data.size()*sizeof(double)/1e6 << "MB\n"; + cout << "size " << static_cast(data.size()*sizeof(double))/1.0e6 << "MB\n"; iota(begin(data), end(data), 1.2); iota(begin(data), end(data), 10.1); data[1234] = 929.1; double sum_raw; { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: raw %t seconds\n"}; for(auto const& e : data) sum += e; sum_raw = sum; @@ -38,12 +38,10 @@ int main(){ iota(begin(data), end(data), 1.2); iota(begin(data), end(data), 202.2); data[1234] = 399.1; double sum_2D; { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 2D %t seconds\n"}; auto ext = extensions(data2D_cref); for(auto i : std::get<0>(ext)){ - // auto const& data2D_crefi = data2D_cref[i]; // not necessary in any clang or gcc - // for(auto j : std::get<1>(ext)) sum += data2D_crefi[j]; for(auto j : std::get<1>(ext)) sum += data2D_cref(i, j); } sum_2D = sum; @@ -51,11 +49,11 @@ int main(){ iota(begin(data), end(data), 1.2); iota(begin(data), end(data), 2.21); data[1234] = 3299.1; double sum_2D_acc; { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{cerr, 3, "sum: 2D acc %t seconds\n"}; sum = std::accumulate( - begin(data2D_cref), end(data2D_cref), 0., - [](auto&& a, auto&& b){return a + std::accumulate(begin(b), end(b), 0.);} + begin(data2D_cref), end(data2D_cref), 0.0, + [](auto const& a, auto const& b){return a + std::accumulate(begin(b), end(b), 0.0);} ); sum_2D_acc = sum; } @@ -64,20 +62,18 @@ int main(){ { boost::timer::auto_cpu_timer t{cerr, 3, "sum: 2Dwrong acc %t seconds\n"}; sum_2Dwrong_acc = std::accumulate( - begin(rotated(data2D_cref)), end(rotated(data2D_cref)), 0., - [](auto&& a, auto&& b){return a + std::accumulate(begin(b), end(b), 0.);} + begin(rotated(data2D_cref)), end(rotated(data2D_cref)), 0.0, + [](auto const& a, auto const& b){return a + std::accumulate(begin(b), end(b), 0.0);} ); } iota(begin(data), end(data), 1.2); iota(begin(data), end(data), 11.2); data[1234] = 199.1; double sum_2Dwrong; { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 2Dwrong sum %t seconds\n"}; - auto ext = extensions(data2D_cref); - for(auto j : std::get<1>(ext)){ - // auto const& data2D_crefXj = data2D_cref.rotated(1)[j]; // not necessary in clang or gcc - for(auto i : std::get<0>(ext)){ - // sum += data2D_crefXj[i]; + auto const [is, js] = extensions(data2D_cref); + for(auto j : js){ + for(auto i : is){ sum += data2D_cref[i][j]; } } @@ -86,7 +82,7 @@ int main(){ iota(begin(data), end(data), 1.2); iota(begin(data), end(data), 10.112); data[1234] = 99.1; double sum_raw2; { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{cerr, 3, "sum: raw %t seconds\n"}; for(auto const& e : data) sum += e; sum_raw2 = sum; @@ -99,78 +95,86 @@ int main(){ } cout<< sum_2D + sum_2D_acc + sum_2Dwrong_acc + sum_2Dwrong + sum_raw + sum_raw2 + sum_raw_acc < v(NX*NY*NZ); - cout<<"3D data "<< v.size()*sizeof(double)/1e6 <<"MB\n"; + cout<<"3D data "<< static_cast(v.size()*sizeof(double))/1.0e6 <<"MB\n"; iota(begin(v), end(v), 0.1); multi::array_cref v3D_cref({NX, NY, NZ}, v.data()); assert( num_elements(v3D_cref) == std::ptrdiff_t(v.size()) ); { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 3D raw %t seconds\n"}; for(auto const& e : v) sum += e; cout << sum << '\n'; } iota(begin(v), end(v), 1.2); { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 3D indexed %t seconds\n"}; - auto ext = extensions(v3D_cref); - for(auto i : std::get<0>(ext)){ + auto const [is, js, ks] = extensions(v3D_cref); + for(auto i : is) { auto const& v3D_crefi = v3D_cref[i]; - for(auto j : std::get<1>(ext)){ + for(auto j : js) { auto const& v3D_crefij = v3D_crefi[j]; - for(auto k : std::get<2>(ext)) + for(auto k : ks) { sum += v3D_crefij[k]; + } } } cout << sum << '\n'; } iota(begin(v), end(v), 4444.5); { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 3Dwrong indexed %t seconds\n"}; - for(auto k : v3D_cref.extension(2)) + for(auto k : v3D_cref.extension(2)) // TODO(correaa) this doesn't work anymore for(auto j : v3D_cref.extension(1)) for(auto i : v3D_cref.extension(0)) sum += v3D_cref[i][j][k]; cout << sum << '\n'; } } -{ - std::ptrdiff_t NX = 150, NY = 150, NZ = 150, NA = 150; +{ + std::ptrdiff_t NX = 150; + std::ptrdiff_t NY = 150; + std::ptrdiff_t NZ = 150; + std::ptrdiff_t NA = 150; + std::vector v(NX*NY*NZ*NA); multi::array_cref v4D_cref({NX, NY, NZ, NA}, v.data()); assert( v4D_cref.num_elements() == std::ptrdiff_t(v.size()) ); cout<<"4D data "<< num_elements(v4D_cref)*sizeof(double)/1e6 <<"MB\n"; iota(begin(v), end(v), 0.1); { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 4D raw %t seconds\n"}; for(auto const& e : v) sum += e; cout<< sum <<'\n'; } iota(begin(v), end(v), 1222.1); { - double sum = 0.; + double sum = 0.0; boost::timer::auto_cpu_timer t{std::cerr, 3, "sum: 4D indexed %t seconds\n"}; - auto ext = extensions(v4D_cref); - for(auto i : std::get<0>(ext)){ + auto const [is, js, ks, ls] = extensions(v4D_cref); + for(auto i : is) { // auto const& v4D_crefi = v4D_cref[i]; // not necessary in clang or gcc - for(auto j : std::get<1>(ext)){ + for(auto j : js) { // auto const& v4D_crefij = v4D_crefi[j]; // not necessary in clang or gcc - for(auto k : std::get<2>(ext)){ + for(auto k : ks) { // auto const& v4D_crefijk = v4D_crefij[k]; // not necessary in clang or gcc - for(auto l : std::get<3>(ext)) + for(auto l : ls) { sum += v4D_cref[i][j][k][l]; + } } } } cout<< std::to_string(sum)[0] <<'\n'; } } - return 0; } diff --git a/external_codes/boost_multi/multi/benchmark/assignment.cpp b/external_codes/boost_multi/multi/benchmark/assignment.cpp index 7ebeea0e5a..e372dce39e 100644 --- a/external_codes/boost_multi/multi/benchmark/assignment.cpp +++ b/external_codes/boost_multi/multi/benchmark/assignment.cpp @@ -12,7 +12,7 @@ namespace multi = boost::multi; using complex = std::complex; -MAYBE_UNUSED constexpr complex I{0, 1}; +[[maybe_unused]] constexpr complex I{0.0, 1.0}; using T = complex; constexpr std::size_t N = 1 << 24; diff --git a/external_codes/boost_multi/multi/benchmark/element_access.cpp b/external_codes/boost_multi/multi/benchmark/element_access.cpp index e841f354f1..4ddd8cc3ba 100644 --- a/external_codes/boost_multi/multi/benchmark/element_access.cpp +++ b/external_codes/boost_multi/multi/benchmark/element_access.cpp @@ -11,7 +11,7 @@ clang++-9 -std=c++17 -O3 -DNDEBUG -DBOOST_DISABLE_ASSERTS $0 -o $0x -lbenchmark const long X_SIZE = 4000; const long Y_SIZE = 4000; -typedef boost::multi_array ImageArrayType; +using ImageArrayType = boost::multi_array; static void MeasureNative2(benchmark::State& state) { @@ -151,4 +151,3 @@ static void MeasureMultiRaw(benchmark::State& state) { BENCHMARK(MeasureBoostMultiArrayRaw); BENCHMARK_MAIN(); - diff --git a/external_codes/boost_multi/multi/benchmark/tr_prod.cpp b/external_codes/boost_multi/multi/benchmark/tr_prod.cpp index 74770c363f..178adcebf6 100644 --- a/external_codes/boost_multi/multi/benchmark/tr_prod.cpp +++ b/external_codes/boost_multi/multi/benchmark/tr_prod.cpp @@ -239,7 +239,7 @@ int main() { cout << tr_square_direct(A) << '\n'; cout << tr_square_block2(A) << '\n'; { - int const N = 32768/2; + multi::array::size_type const N = 32768/2; multi::array A({N, N}); std::iota(A.data_elements(), A.data_elements() + N*N, 1.11); double warm = tr_square_direct(A); cout << warm << '\n'; diff --git a/external_codes/boost_multi/multi/benchmark/traceAA.cpp b/external_codes/boost_multi/multi/benchmark/traceAA.cpp index ff1c4961c9..60239cde1b 100644 --- a/external_codes/boost_multi/multi/benchmark/traceAA.cpp +++ b/external_codes/boost_multi/multi/benchmark/traceAA.cpp @@ -170,10 +170,10 @@ int main(int, char*[]){ // cerr<<" test N ni nj\n"; // exit(1); // } - int const N = 32768;// atoi(argv[1]); + std::ptrdiff_t const N = 32768; // atoi(argv[1]); cout << "N = " << N << '\n'; - int const ni = 64; // atoi(argv[2]); - int const nj = 64; //atoi(argv[3]); + int const ni = 64; // atoi(argv[2]); + int const nj = 64; //atoi(argv[3]); { cout << "Miguel traceAA\n"; double* A = new double[N*N]; diff --git a/external_codes/boost_multi/multi/cmake/multi-config.cmake.in b/external_codes/boost_multi/multi/cmake/multi-config.cmake.in new file mode 100644 index 0000000000..a728a5a81f --- /dev/null +++ b/external_codes/boost_multi/multi/cmake/multi-config.cmake.in @@ -0,0 +1,3 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/multi-targets.cmake") \ No newline at end of file diff --git a/external_codes/boost_multi/multi/codecov.yml b/external_codes/boost_multi/multi/codecov.yml new file mode 100644 index 0000000000..7a9c0ba9c5 --- /dev/null +++ b/external_codes/boost_multi/multi/codecov.yml @@ -0,0 +1,3 @@ +codecov: + notify: + after_n_builds: 2 diff --git a/external_codes/boost_multi/multi/conanfile.py b/external_codes/boost_multi/multi/conanfile.py new file mode 100644 index 0000000000..dce32ecda0 --- /dev/null +++ b/external_codes/boost_multi/multi/conanfile.py @@ -0,0 +1,29 @@ +from conan import ConanFile +from conan.tools.files import copy + + +class MultiConan(ConanFile): + name = "b-multi" + version = "0.80.1" + homepage = "https://gitlab.com/correaa/boost-multi" + description = "Multidimensional array access to contiguous or regularly contiguous memory. (Not an official Boost library)" + topics = ( + "array", + "multidimensional", + "library", + ) + license = "Boost" + url = "https://gitlab.com/correaa/boost-multi" + # No settings/options are necessary, this is header only + exports_sources = "include/*" + no_copy_source = True + + def package(self): + # This will also copy the "include" folder + copy(self, "*.hpp", self.source_folder, self.package_folder) + + def package_info(self): + # For header-only packages, libdirs and bindirs are not used + # so it's recommended to set those as empty. + self.cpp_info.bindirs = [] + self.cpp_info.libdirs = [] diff --git a/external_codes/boost_multi/multi/docs/Makefile b/external_codes/boost_multi/multi/docs/Makefile new file mode 100644 index 0000000000..d0c3cbf102 --- /dev/null +++ b/external_codes/boost_multi/multi/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/external_codes/boost_multi/multi/docs/conf.py b/external_codes/boost_multi/multi/docs/conf.py new file mode 100644 index 0000000000..a85c8e5738 --- /dev/null +++ b/external_codes/boost_multi/multi/docs/conf.py @@ -0,0 +1,35 @@ +# Configuration file for the Sphinx documentation builder. + +# -- Project information + +project = 'Multi' +copyright = '2023, Alfredo A. Correa' +author = 'Alfredo' + +release = '0.80.1' +version = '0.80.1' + +# -- General configuration + +extensions = [ + 'sphinx.ext.duration', + 'sphinx.ext.doctest', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.intersphinx', +] + +intersphinx_mapping = { + 'python': ('https://docs.python.org/3/', None), + 'sphinx': ('https://www.sphinx-doc.org/en/master/', None), +} +intersphinx_disabled_domains = ['std'] + +templates_path = ['_templates'] + +# -- Options for HTML output + +html_theme = 'sphinx_rtd_theme' + +# -- Options for EPUB output +epub_show_urls = 'footnote' diff --git a/external_codes/boost_multi/multi/docs/requirements.txt b/external_codes/boost_multi/multi/docs/requirements.txt new file mode 100644 index 0000000000..fc32dacc85 --- /dev/null +++ b/external_codes/boost_multi/multi/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx==7.1.2 +sphinx-rtd-theme==1.3.0rc1 +breathe diff --git a/external_codes/boost_multi/multi/examples/CMakeLists.txt b/external_codes/boost_multi/multi/examples/CMakeLists.txt index 11bb0941ef..3184e2925a 100644 --- a/external_codes/boost_multi/multi/examples/CMakeLists.txt +++ b/external_codes/boost_multi/multi/examples/CMakeLists.txt @@ -10,12 +10,18 @@ cmake_minimum_required(VERSION 3.18) project( - boost-multi-test + boost-multi-examples VERSION 0.1 LANGUAGES CXX ) -find_package(Boost REQUIRED COMPONENTS serialization unit_test_framework) +include_directories(../include) + +#find_package(multi) # see https://gitlab.com/correaa/boost-multi#using-the-library-installation-and-tests +#... +#target_link_library(my_target PUBLIC multi) + +find_package(Boost REQUIRED COMPONENTS serialization unit_test_framework timer) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -27,3 +33,6 @@ add_executable(serialization.cpp.x serialization.cpp) target_link_libraries(serialization.cpp.x PRIVATE ${Boost_LIBRARIES} stdc++fs) add_test(NAME serialization.cpp.x COMMAND serialization.cpp.x) +add_executable(gj_solve.cpp.x gj_solve.cpp) +target_link_libraries(gj_solve.cpp.x PRIVATE ${Boost_LIBRARIES} stdc++fs) +add_test(NAME gj_solve.cpp.x COMMAND gj_solve.cpp.x) diff --git a/external_codes/boost_multi/multi/examples/bip_mapped_file.cpp b/external_codes/boost_multi/multi/examples/bip_mapped_file.cpp index 74b1ca1c74..79b8602c81 100644 --- a/external_codes/boost_multi/multi/examples/bip_mapped_file.cpp +++ b/external_codes/boost_multi/multi/examples/bip_mapped_file.cpp @@ -1,21 +1,22 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $CXXFLAGS -std=c++17 $0 -o $0x -lstdc++fs -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2022 +#ifdef COMPILATION // clang-format off +${CXX:-c++} -std=c++17 $CXXFLAGS -I../include $0 -o $0.$X -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit +#endif // clang-format on +// Copyright 2019-2024 Alfredo A. Correa #define BOOST_TEST_MODULE "C++ Unit Tests for Multi interacting with Boost Interprocess" #define BOOST_TEST_DYN_LINK -#include -#include +#include -#include -#include +#include -#include -#include +#include +#include -#include +#include +#include + +#include namespace bip = boost::interprocess; @@ -25,86 +26,91 @@ using std::filesystem::path; using std::filesystem::remove; template using mallocator = bip::allocator; -static auto get_allocator(manager& m) {return m.get_segment_manager();} -static void mremove(path f) {remove(f);} +static auto get_allocator(manager& m) { return m.get_segment_manager(); } +static void mremove(path f) { remove(f); } -std::set candidates(manager& m) { +auto objects_directory(manager& m) { std::set ret; - for(auto it = get_allocator(m)->named_begin(); it != get_allocator(m)->named_end(); ++it) { - ret.insert(std::string(it->name(), it->name_length())); - } + std::transform( + get_allocator(m)->named_begin(), get_allocator(m)->named_end(), + std::inserter(ret, ret.end()), + [](auto const& e) { return e.name(); } + ); return ret; } -#include "../include/multi/array.hpp" +#include -#include // iota +#include // iota namespace multi = boost::multi; template using marray = multi::array>; -BOOST_AUTO_TEST_CASE(multi_test_bip) { +BOOST_AUTO_TEST_CASE(const multi_test_bip) { -path file = "bip_mapped_file.bin"; -mremove(file); -{ - manager m{bip::create_only, file.c_str(), 1 << 25}; - auto&& arr1d = - *m.construct>("arr1d")(multi::extensions_t<1>( 10 ), 99 , get_allocator(m)); - auto&& arr2d = - *m.construct>("arr2d")(multi::extensions_t<2>({10, 10 }), 0.0, get_allocator(m)); - auto&& arr3d = - *m.construct>("arr3d")(multi::extensions_t<3>({10, 10, 10}), 0u , get_allocator(m)); + path const file = "bip_mapped_file.bin"; + { + mremove(file); + manager m{bip::create_only, file.c_str(), 1 << 25}; // objects with same name produce boost::interprocess_exception::library_error + auto&& arr1d = *m.construct>("arr1d")(std::tuple{10}, 99, get_allocator(m)); + auto&& arr2d = *m.construct>("arr2d")(std::tuple{10, 10}, 0.0, get_allocator(m)); + auto&& arr3d = *m.construct>("arr3d")(std::tuple{10, 10, 10}, 0u, get_allocator(m)); - arr1d[3] = 33; - arr2d[4][5] = 45.001; + arr1d[3] = 33; + arr2d[4][5] = 45.001; - std::iota(arr3d[6][7].begin(), arr3d[6][7].end(), 100); + std::iota(arr3d[6][7].begin(), arr3d[6][7].end(), 100); -// m.flush(); // this produces uninitialized access in icpc 19.1 and might not be necessary -} -{ - manager m{bip::open_only, file.c_str()}; + auto const& arr3d_copy = *m.construct>("arr3d_copy")(arr3d, get_allocator(m)); + BOOST_REQUIRE( arr3d == arr3d_copy ); - auto s = candidates(m); - BOOST_REQUIRE( s.find("arr1d") != s.end() ); - BOOST_REQUIRE( s.find("arr2d") != s.end() ); - BOOST_REQUIRE( s.find("arr3d") != s.end() ); + // m.flush(); // this produces uninitialized access in icpc 19.1 and might not be necessary + } + { + manager m{bip::open_only, file.c_str()}; - auto&& arr1d = *m.find>("arr1d").first; - BOOST_REQUIRE(std::addressof(arr1d)); + auto const s = objects_directory(m); + BOOST_REQUIRE( s.find("arr1d") != s.end() ); + BOOST_REQUIRE( s.find("arr2d") != s.end() ); + BOOST_REQUIRE( s.find("arr3d") != s.end() ); - auto&& arr2d = *m.find>("arr2d").first; - BOOST_REQUIRE(std::addressof(arr2d)); + auto&& arr1d = *m.find>("arr1d").first; + BOOST_REQUIRE(std::addressof(arr1d)); - auto&& arr3d = *m.find>("arr3d").first; - BOOST_REQUIRE(std::addressof(arr3d)); + auto&& arr2d = *m.find>("arr2d").first; + BOOST_REQUIRE(std::addressof(arr2d)); - BOOST_REQUIRE( arr1d[5] == 99 ); - BOOST_REQUIRE( arr1d[3] == 33 ); + auto&& arr3d = *m.find>("arr3d").first; + BOOST_REQUIRE(std::addressof(arr3d)); - BOOST_REQUIRE( arr2d[7][8] == 0. ); - BOOST_REQUIRE( arr2d[4][5] == 45.001 ); + BOOST_REQUIRE( arr1d[5] == 99 ); + BOOST_REQUIRE( arr1d[3] == 33 ); - BOOST_REQUIRE( arr3d[6][7][3] == 103 ); + BOOST_REQUIRE( arr2d[7][8] == 0.0 ); + BOOST_REQUIRE( arr2d[4][5] == 45.001 ); - m.destroy>("arr1d"); - m.destroy>("arr2d"); - m.destroy>("arr3d"); -} -mremove(file); + BOOST_REQUIRE( arr3d[6][7][3] == 103 ); + + auto&& arr3d_copy = *m.find>("arr3d_copy").first; + BOOST_REQUIRE(std::addressof(arr3d_copy)); + + BOOST_REQUIRE( arr3d == arr3d_copy ); + m.destroy>("arr1d"); + m.destroy>("arr2d"); + m.destroy>("arr3d"); + mremove(file); + } } template using alloc = bip::adaptive_pool< - T, bip::managed_shared_memory::segment_manager ->; + T, bip::managed_shared_memory::segment_manager>; -BOOST_AUTO_TEST_CASE(scoped_allocator_vector_of_arrays) { +BOOST_AUTO_TEST_CASE(const scoped_allocator_vector_of_arrays) { - using bipc_row = multi::array>; + using bipc_row = multi::array>; using bipc_matrix = std::vector>>; bip::shared_memory_object::remove("Demo"); @@ -113,24 +119,24 @@ BOOST_AUTO_TEST_CASE(scoped_allocator_vector_of_arrays) { bipc_matrix v(s.get_segment_manager()); - v.emplace_back( multi::extensions_t<1>( 3 ), 99. ); - std::iota( v[0].begin(), v[0].end() , 42 ); + v.emplace_back(multi::extensions_t<1>(3), 99.); + std::iota(v[0].begin(), v[0].end(), 42); - assert( v[0][1] == 43 ); + assert(v[0][1] == 43); bip::shared_memory_object::remove("Demo"); } } -BOOST_AUTO_TEST_CASE(scoped_allocator_arrays_of_vector) { +BOOST_AUTO_TEST_CASE(const scoped_allocator_arrays_of_vector) { - using bipc_row = std::vector>; + using bipc_row = std::vector>; using bipc_matrix = multi::array>>; bip::shared_memory_object::remove("Demo"); { bip::managed_shared_memory s{bip::create_only, "Demo", 65536}; - bipc_matrix v(bipc_matrix::extensions_type(10), bipc_row{s.get_segment_manager()}, s.get_segment_manager()); + bipc_matrix v(bipc_matrix::extensions_type(10), bipc_row{s.get_segment_manager()}, s.get_segment_manager()); std::vector row(3, 99); v[0].assign(row.begin(), row.end()); @@ -139,25 +145,24 @@ BOOST_AUTO_TEST_CASE(scoped_allocator_arrays_of_vector) { } } -BOOST_AUTO_TEST_CASE(scoped_allocator_arrays_of_array) { +BOOST_AUTO_TEST_CASE(const scoped_allocator_arrays_of_array) { - using bipc_row = multi::array>; + using bipc_row = multi::array>; using bipc_matrix = multi::array>>; bip::shared_memory_object::remove("Demo"); { bip::managed_shared_memory s{bip::create_only, "Demo", 165536}; - bipc_matrix v(bipc_matrix::extensions_type(10), bipc_row{bipc_matrix::extensions_type(3), 5, s.get_segment_manager()}, s.get_segment_manager()); + bipc_matrix v(bipc_matrix::extensions_type(10), bipc_row{bipc_matrix::extensions_type(3), 5, s.get_segment_manager()}, s.get_segment_manager()); multi::array row = {97, 98, 99}; std::copy(row.begin(), row.end(), v[0].begin()); BOOST_REQUIRE( v[0][1] == 98 ); BOOST_REQUIRE( v[1][1] == 5 ); - v.reextent( bipc_matrix::extensions_type(12) , bipc_row{bipc_matrix::extensions_type(3), 5, s.get_segment_manager()} ); + v.reextent(bipc_matrix::extensions_type(12), bipc_row{bipc_matrix::extensions_type(3), 5, s.get_segment_manager()}); bip::shared_memory_object::remove("Demo"); } } - diff --git a/external_codes/boost_multi/multi/examples/boost_iterator_transform.cpp b/external_codes/boost_multi/multi/examples/boost_iterator_transform.cpp index 79a78d40a5..633796ae42 100644 --- a/external_codes/boost_multi/multi/examples/boost_iterator_transform.cpp +++ b/external_codes/boost_multi/multi/examples/boost_iterator_transform.cpp @@ -1,137 +1,124 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- +#ifdef COMPILATION // clang-format off ${CXX:-c++} -std=c++17 $CXXFLAGS -I../include $0 -o $0.$X&&$0.$X&&rm $0.$X;exit -#endif -// Copyright 2018-2022 Alfredo A. Correa +#endif // clang-format on +// Copyright 2018-2023 Alfredo A. Correa #include "./multi/array.hpp" -#include -#include +// #include +// #include -#include #include +#include #include -#include +#include -namespace multi = boost::multi; + namespace multi = boost::multi; -constexpr auto conj = [](auto const& c) -> auto const {return std::conj(c);}; +constexpr auto conj = [](auto const& c) -> auto const { return std::conj(c); }; template struct conjr : boost::transform_iterator { - template conjr(As const&... as) : boost::transform_iterator{as...} {} + template conjr(As const&... as) : boost::transform_iterator{as...} {} // TODO(correaa) not working here }; template auto hermitized(Array2D const& arr) { return arr - .transposed() // lazily tranposes the array + .transposed() // lazily tranposes the array .template static_array_cast>(conj) // lazy conjugate elements - ; -} - -int main(){ -{ - using namespace std::complex_literals; - multi::array A = { - { 1. + 2.i, 3. + 4.i}, - { 8. + 9.i, 10. + 11.i} - }; - - auto const& Ah = hermitized(A); - - assert( Ah[1][0] == std::conj(A[0][1]) ); -} - -{ - auto r = multi::make_range(5, 10); - auto f = [](auto x) {return x + 1;}; - std::vector v( - boost::make_transform_iterator(r.begin(), f), - boost::make_transform_iterator(r.end() , f) - ); - assert( v[1] == 7. ); -} -{ - auto r = multi::make_range(5, 10); - auto f = [](auto x) {return x + 1;}; - multi::array v( - boost::make_transform_iterator(r.begin(), f), - boost::make_transform_iterator(r.end() , f) - ); - assert( v[1] == 7. ); -} -{ - multi::array v(10); - auto r = extension(v); - auto f = [](auto x) {return x * 2;}; - v.assign( - boost::make_transform_iterator(r.begin(), f), - boost::make_transform_iterator(r.end() , f) - ); - assert( v[1] == 2. ); -} -{ - multi::array v(10); - multi::array r = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}; - auto f = [](auto x) {return x * 2;}; - v.assign( - boost::make_transform_iterator(r.base() , f), - boost::make_transform_iterator(r.base() + r.size(), f) - ); - assert( v[1] == 4. ); -} -{ - auto r = multi::make_extension_t(10l); - auto f = [](auto x){ - std::size_t seed = 1234; - // boost::hash_combine(seed, ); - seed ^= boost::hash{}(x) + 0x9e3779b9 + (seed<<6) + (seed>>2); - return static_cast(seed)/static_cast(std::numeric_limits::max()); - }; - multi::array v( - boost::make_transform_iterator(r.begin(), f), - boost::make_transform_iterator(r.end() , f) - ); - - std::size_t seed = 12349l; - // boost::hash_combine(seed, ); -// seed ^= boost::hash{}(13) + 0x9e3779b9 + (seed<<6) + (seed>>2); - boost::hash_combine(seed, 13); - - assert( v.size() == r.size() ); - assert( v[1] >= 0. ); - assert( v[1] < 1. ); - assert( std::all_of(begin(v), end(v), [](auto x){ - return x >= 0. and x < 1.; - }) ); -} - -//struct conj_t : thrust::unary_function, std::complex> { -// std::complex operator()(std::complex const& e) const {return std::conj(e);} -//} conj; - -{ - using namespace std::complex_literals; - multi::array, 1> A = { 1. + 2.i, 3. + 4.i, 5. + 7.i}; - - auto const conj = [](auto e) {return std::conj(e);}; - - std::vector> v(thrust::make_transform_iterator(A.elements().begin(), conj), thrust::make_transform_iterator(A.elements().end(), conj)); - std::cout << v[1] << std::endl; - assert( v[1] == 3. - 4.i ); - -// using ittc = std::iterator_traits>>::iterator_category; - -// using rt = boost::result_of &(std::complex &)>::type; -// using conjugater = decltype(boost::make_transform_iterator(A.data_elements(), conj)); -// conjugater ll(A.data_elements(), conj); -// auto conjA = A.template static_array_cast, thrust::transform_iterator*> >(); -// A.static_array_cast, transformer, decltype(conj)> >(conj); - - -// return {this->layout(), P2{this->base(), std::forward(args)...}}; -// boost::multi::basic_array, 1, std::decay_t> bb{A.layout(), thrust::make_transform_iterator(A.base(), conj)}; + ; } +int main() { + { + using namespace std::complex_literals; + multi::array A = { + {1.0 + 2.0i, 3.0 + 4.0i}, + {8.0 + 9.0i, 10.0 + 11.0i}, + }; + + auto const& Ah = hermitized(A); + + assert(Ah[1][0] == std::conj(A[0][1])); + } + + { + auto r = multi::make_range(5, 10); + auto f = [](auto x) { return x + 1; }; + + std::vector v( + boost::make_transform_iterator(r.begin(), f), + boost::make_transform_iterator(r.end(), f) + ); + assert(v[1] == 7.); + } + { + auto r = multi::make_range(5, 10); + auto f = [](auto x) { return x + 1; }; + + multi::array v( + boost::make_transform_iterator(r.begin(), f), + boost::make_transform_iterator(r.end(), f) + ); + assert(v[1] == 7.0); + } + { + multi::array v(10); + + auto r = extension(v); + auto f = [](auto x) { return x * 2; }; + + v.assign( + boost::make_transform_iterator(r.begin(), f), + boost::make_transform_iterator(r.end(), f) + ); + assert(v[1] == 2.0); + } + { + multi::array v(10); + multi::array r = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; + + auto f = [](auto x) { return x * 2; }; + + v.assign( + boost::make_transform_iterator(r.base(), f), + boost::make_transform_iterator(r.base() + r.size(), f) + ); + assert(v[1] == 4.0); + } + { + auto r = multi::make_extension_t(10L); + auto f = [](auto x) { + std::size_t seed = 1234; + // boost::hash_combine(seed, ); + seed ^= boost::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return static_cast(seed) / static_cast(std::numeric_limits::max()); + }; + + multi::array v( + boost::make_transform_iterator(r.begin(), f), + boost::make_transform_iterator(r.end(), f) + ); + + std::size_t seed = 12349L; + boost::hash_combine(seed, 13); + + assert(v.size() == r.size()); + assert(v[1] >= 0.0); + assert(v[1] < 1.0); + assert(std::all_of(begin(v), end(v), [](auto x) { + return x >= 0.0 and x < 1.0; + })); + } + { + using namespace std::complex_literals; + multi::array, 1> A = {1.0 + 2.0i, 3.0 + 4.0i, 5.0 + 7.0i}; + + auto const conj = [](auto e) { return std::conj(e); }; + + std::vector> v(thrust::make_transform_iterator(A.elements().begin(), conj), thrust::make_transform_iterator(A.elements().end(), conj)); + std::cout << v[1] << std::endl; + assert(v[1] == 3.0 - 4.0i); + } } diff --git a/external_codes/boost_multi/multi/examples/gj_solve.cpp b/external_codes/boost_multi/multi/examples/gj_solve.cpp index 3ea1b83746..1ce032ec4b 100644 --- a/external_codes/boost_multi/multi/examples/gj_solve.cpp +++ b/external_codes/boost_multi/multi/examples/gj_solve.cpp @@ -1,9 +1,9 @@ #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- $CXX -DNDEBUG $0 -o $0x -lboost_timer&&$0x&&rm $0x;exit #endif -// © Alfredo A. Correa 2019-2020 +// Copyright 2019-2023 Alfredo A. Correa -#include "../array.hpp" +#include #include #include @@ -14,42 +14,51 @@ namespace multi = boost::multi; using std::cout; template::difference_type> -auto gj_solve(Matrix&& A, Vector&& y)->decltype(y[0]/=A[0][0], y){ +auto gj_solve(Matrix&& A, Vector&& y) -> decltype(y[0] /= A[0][0], y) { idx Asize = size(A); - for(idx r = 0; r != Asize; ++r){ - auto&& Ar = A[r]; auto const& Arr = Ar[r]; - for(idx c = r + 1; c != Asize; ++c) Ar[c] /= Arr; + for(idx r = 0; r != Asize; ++r) { + auto&& Ar = A[r]; + auto const& Arr = Ar[r]; + for(idx c = r + 1; c != Asize; ++c) + Ar[c] /= Arr; auto const& yr = (y[r] /= Arr); - for(idx r2 = r + 1; r2 != Asize; ++r2){ - auto&& Ar2 = A[r2]; auto const& Ar2r = A[r2][r]; - for(idx c = r + 1; c != Asize; ++c) Ar2[c] -= Ar2r*Ar[c]; - y[r2] -= Ar2r*yr; + for(idx r2 = r + 1; r2 != Asize; ++r2) { + auto&& Ar2 = A[r2]; + auto const& Ar2r = A[r2][r]; + for(idx c = r + 1; c != Asize; ++c) + Ar2[c] -= Ar2r * Ar[c]; + y[r2] -= Ar2r * yr; } } - for(idx r = Asize - 1; r > 0; --r){ + for(idx r = Asize - 1; r > 0; --r) { auto const& yr = y[r]; - for(idx r2 = r-1; r2 >=0; --r2) y[r2] -= A[r2][r]*yr; + for(idx r2 = r - 1; r2 >= 0; --r2) + y[r2] -= A[r2][r] * yr; } return y; } template::difference_type> -auto gj_solve2(Matrix&& A, Vector&& y)->decltype(y[0]/=A[0][0], y){ +auto gj_solve2(Matrix&& A, Vector&& y) -> decltype(y[0] /= A[0][0], y) { idx Asize = size(A); - for(idx r = 0; r != Asize; ++r){ - auto&& Ar = A[r]; auto const& Arr = Ar[r]; - // std::transform(Ar.begin() + r + 1, Ar.end(), Ar.begin() + r + 1, [&](auto const& a){return a/Arr;}); - for(idx c = r + 1; c != Asize; ++c) Ar[c] /= Arr; + for(idx r = 0; r != Asize; ++r) { + auto&& Ar = A[r]; + auto const& Arr = Ar[r]; + // std::transform(Ar.begin() + r + 1, Ar.end(), Ar.begin() + r + 1, [&](auto const& a){return a/Arr;}); + for(idx c = r + 1; c != Asize; ++c) + Ar[c] /= Arr; auto const& yr = (y[r] /= Arr); - for(idx r2 = r + 1; r2 != Asize; ++r2){ - auto&& Ar2 = A[r2]; auto const& Ar2r = A[r2][r]; - std::transform(std::move(Ar2).begin() + r + 1, std::move(Ar2).end(), std::move(Ar).begin() + r + 1, std::move(Ar2).begin() + r + 1, [&](auto&& a, auto&& b){return a - Ar2r*b;}); - y[r2] -= Ar2r*yr; + for(idx r2 = r + 1; r2 != Asize; ++r2) { + auto&& Ar2 = A[r2]; + auto const& Ar2r = A[r2][r]; + std::transform(std::move(Ar2).begin() + r + 1, std::move(Ar2).end(), std::move(Ar).begin() + r + 1, std::move(Ar2).begin() + r + 1, [&](auto&& a, auto&& b) { return a - Ar2r * b; }); + y[r2] -= Ar2r * yr; } } - for(idx r = Asize - 1; r > 0; --r){ + for(idx r = Asize - 1; r > 0; --r) { auto const& yr = y[r]; - for(idx r2 = r-1; r2 >=0; --r2) y[r2] -= A[r2][r]*yr; + for(idx r2 = r - 1; r2 >= 0; --r2) + y[r2] -= A[r2][r] * yr; } return y; } @@ -58,15 +67,21 @@ auto gj_solve2(Matrix&& A, Vector&& y)->decltype(y[0]/=A[0][0], y){ int main(){ { - multi::array A = {{-3., 2., -4.},{0., 1., 2.},{2., 4., 5.}}; - multi::array y = {12.,5.,2.}; //(M); assert(y.size() == M); iota(y.begin(), y.end(), 3.1); + multi::array A = { + {-3.0, 2.0, -4.0}, + { 0.0, 1.0, 2.0}, + { 2.0, 4.0, 5.0}, + }; + multi::array y = {12.0, 5.0, 2.0}; //(M); assert(y.size() == M); iota(y.begin(), y.end(), 3.1); gj_solve(A, y); - cout << y[0] <<" "<< y[1] <<" "<< y[2] << std::endl; + cout << y[0] << " " << y[1] << " " << y[2] << std::endl; } { - multi::array A({6000, 7000}); std::iota(A.data(), A.data() + A.num_elements(), 0.1); - std::transform(A.data(), A.data() + A.num_elements(), A.data(), [](auto x){return x/=2.e6;}); - std::vector y(3000); std::iota(y.begin(), y.end(), 0.2); + multi::array A({6000, 7000}); + std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 0.1); + std::transform(A.data_elements(), A.data_elements() + A.num_elements(), A.data_elements(), [](auto x) { return x /= 2.e6; }); + std::vector y(3000); + std::iota(y.begin(), y.end(), 0.2); { boost::timer::auto_cpu_timer t; gj_solve(A({1000, 4000}, {0, 3000}), y); @@ -74,9 +89,11 @@ int main(){ cout << y[45] << std::endl; } { - multi::array A({6000, 7000}); std::iota(A.data(), A.data() + A.num_elements(), 0.1); - std::transform(A.data(), A.data() + A.num_elements(), A.data(), [](auto x){return x/=2.e6;}); - std::vector y(3000); std::iota(y.begin(), y.end(), 0.2); + multi::array A({6000, 7000}); + std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 0.1); + std::transform(A.data_elements(), A.data_elements() + A.num_elements(), A.data_elements(), [](auto x) { return x /= 2.e6; }); + std::vector y(3000); + std::iota(y.begin(), y.end(), 0.2); { boost::timer::auto_cpu_timer t; gj_solve2(A({1000, 4000}, {0, 3000}), y); diff --git a/external_codes/boost_multi/multi/examples/llnl_metall.cpp b/external_codes/boost_multi/multi/examples/llnl_metall.cpp index 4d55e9c9c5..bbd480a0f3 100644 --- a/external_codes/boost_multi/multi/examples/llnl_metall.cpp +++ b/external_codes/boost_multi/multi/examples/llnl_metall.cpp @@ -1,77 +1,85 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX -std=c++17 -I~/https/github.com/LLNL/metall.git/include/ $0 -o $0x -lstdc++fs&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 +#ifdef COMPILATION // clang-format off +${CXX:-c++} -std=c++17 $CXXFLAGS -I../include -I$HOME/metall/include $0 -o$0.$X&&$0.$X&&rm $0.$X;exit +#endif // clang-format on +// Copyright 2019-2023 Alfredo A. Correa -#include -#include // iota -#include +#include +#include +#include // for std::iota -#include +#include -#include "../../multi/array.hpp" +#include -template using mallocator = metall::manager::allocator_type; +template +using mallocator = metall::manager::allocator_type; namespace multi = boost::multi; -template +template using marray = multi::array>; using std::tuple; -int main(){ -std::filesystem::path dir = "llnl_metall_mapped_file.bin/"; -remove_all(dir); -{ - metall::manager m{metall::create_only, dir.c_str(), 1<<25}; - auto&& arr1d = - *m.construct>("arr1d")(tuple{10} , 99 , m.get_allocator()); - auto&& arr2d = - *m.construct>("arr2d")(tuple{1000, 1000}, 1.0, m.get_allocator()); - auto&& arr3d = - *m.construct>("arr3d")(tuple{10, 10, 10}, 1u , m.get_allocator()); - auto&& arr3d_cpy = - *m.construct>("arr3d_cpy")(tuple{0, 0, 0}, m.get_allocator()); - - assert( arr1d[3] == 99 ); - assert( arr2d[4][5] == 1.0 ); - assert( arr3d[2][3][4] == 1u ); - - arr1d[3] = 33; - arr2d[4][5] = 45.001; - std::iota(arr3d[6][7].begin(), arr3d[6][7].end(), 100); - - arr3d_cpy = arr3d; - assert( arr3d_cpy[6][7][8] == arr3d[6][7][8] ); - m.flush(); -} -{ - metall::manager m{metall::open_only, dir.c_str()}; - - auto&& arr1d = - *m.find>("arr1d").first; assert(std::addressof(arr1d)); - auto&& arr2d = - *m.find>("arr2d").first; assert(std::addressof(arr2d)); - auto&& arr3d = - *m.find>("arr3d").first; assert(std::addressof(arr3d)); - auto&& arr3d_cpy = - *m.find>("arr3d_cpy").first; assert(std::addressof(arr3d)); - - assert( arr1d[5] == 99 ); - assert( arr1d[3] == 33 ); - - assert( arr2d[7][8] == 1.0 ); - assert( arr2d[4][5] == 45.001 ); - - assert( arr3d[6][7][3] == 103 ); - assert( arr3d_cpy == arr3d ); - - m.destroy>("arr1d"); - m.destroy>("arr2d"); - m.destroy>("arr3d"); - m.destroy>("arr3d_cpy"); -} -remove_all(dir); -} +int main() { + std::filesystem::path const dir = "llnl_metall_mapped_file.bin/"; + remove_all(dir); + { + metall::manager m{metall::create_only, dir.c_str(), 1 << 25}; + auto&& arr1d = *m.construct>("arr1d")(std::tuple{10}, 5, m.get_allocator()); + auto&& arr2d = *m.construct>("arr2d")(std::tuple{10, 10}, 1.0, m.get_allocator()); + auto&& arr3d = *m.construct>("arr3d")(std::tuple{10, 10, 10}, 1u, m.get_allocator()); + + auto&& arr3d_copy = *m.construct>("arr3d_copy")(arr3d, m.get_allocator()); + + assert(arr1d[3] == 5); + assert(arr2d[4][5] == 1.0); + assert(arr3d[2][3][4] == 1U); + + arr1d[3] = 33; + arr2d[4][5] = 45.001; + std::iota(arr3d[6][7].begin(), arr3d[6][7].end(), 100); + + assert(arr3d_copy[6][7][8] == 1U); + + auto&& arr3d_assign = *m.construct>("arr3d_assign")(m.get_allocator()); + arr3d_assign = arr3d; + + assert(arr3d_assign == arr3d); + + assert(arr3d_assign[6][7][8] == arr3d[6][7][8]); + // m.flush(); + } + { + metall::manager m{metall::open_only, dir.c_str()}; + + auto const& arr1d = *m.find>("arr1d").first; + auto const& arr2d = *m.find>("arr2d").first; + auto const& arr3d = *m.find>("arr3d").first; + + auto const& arr3d_copy = *m.find>("arr3d_copy").first; + assert(std::addressof(arr3d)); + + auto const& arr3d_assign = *m.find>("arr3d_assign").first; + assert(std::addressof(arr3d)); + + assert(arr1d[5] == 5); + assert(arr1d[3] == 33); + + assert(arr2d[7][8] == 1.0); + assert(arr2d[4][5] == 45.001); + + assert(arr3d[6][7][3] == 103); + + assert(arr3d_assign == arr3d); + + m.destroy>("arr1d"); + m.destroy>("arr2d"); + m.destroy>("arr3d"); + m.destroy>("arr3d_copy"); + m.destroy>("arr3d_assign"); + } + + remove_all(dir); +} \ No newline at end of file diff --git a/external_codes/boost_multi/multi/examples/lu_fact.cpp b/external_codes/boost_multi/multi/examples/lu_fact.cpp index 558a2b4b64..8577cb53bf 100644 --- a/external_codes/boost_multi/multi/examples/lu_fact.cpp +++ b/external_codes/boost_multi/multi/examples/lu_fact.cpp @@ -1,13 +1,16 @@ #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- $CXX $0 -o $0x -lboost_timer `pkg-config --libs tbb` &&$0x&&rm $0x;exit #endif -// © Alfredo A. Correa 2018-2020 +// Copyright 2018-2024 Alfredo A. Correa #include "../../multi/array.hpp" -#include // iota -#include // transform +#include // transform #include +#include +#include // iota + +#include namespace multi = boost::multi; @@ -22,8 +25,8 @@ Matrix&& lu_fact(Matrix&& A){ std::for_each(std::execution::par, begin(A) + k + 1, end(A), [&](auto&& Ai){ std::transform( - begin(Ai)+k+1, end(Ai), begin(Ak)+k+1, begin(Ai)+k+1, - [z=(Ai[k]/=Akk)](auto&& a, auto&& b){return a-z*b;} + begin(Ai)+k+1, end(Ai), begin(Ak)+k+1, begin(Ai)+k+1, + [z=(Ai[k]/=Akk)](auto a, auto b){return a - z*b;} ); } ); @@ -34,13 +37,13 @@ Matrix&& lu_fact(Matrix&& A){ template Matrix&& lu_fact2(Matrix&& A){ using multi::size; - auto m = A.size(), n = std::get<1>(sizes(A)); - - for(auto k = 0*m; k != m - 1; ++k){ + auto const [m, n] = A.sizes(); + + for(decltype(m) k = 0; k != m - 1; ++k){ for(auto i = k + 1; i != m; ++i){ auto const z = A[i][k]/A[k][k]; A[i][k] = z; - std::transform(begin(A[i]) + k + 1, begin(A[i]) + std::max(n, k + 1), A[k].begin() + k + 1, begin(A[i]) + k + 1, [&](auto&& a, auto&& b){return a - z*b;}); + std::transform(begin(A[i]) + k + 1, begin(A[i]) + std::max(n, k + 1), A[k].begin() + k + 1, begin(A[i]) + k + 1, [&](auto a, auto b){return a - z*b;}); } } return std::forward(A); @@ -49,10 +52,10 @@ Matrix&& lu_fact2(Matrix&& A){ template Matrix&& lu_fact3(Matrix&& A){ using multi::size; - auto m = A.size(), n = std::get<1>(sizes(A)); + auto const [m, n] = A.sizes(); for(auto k = 0*m; k != m - 1; ++k){ auto&& Ak = A[k]; - std::for_each(std::execution::par, begin(A) + k + 1, end(A), [&](auto&& Ai){ + std::for_each(std::execution::par, begin(A) + k + 1, end(A), [&](auto& Ai){ auto const z = Ai[k]/Ak[k]; Ai[k] = z; assert( k + 1 <= n ); @@ -62,19 +65,16 @@ Matrix&& lu_fact3(Matrix&& A){ return std::forward(A); } -#include -#include - using std::cout; int main(){ { multi::array A = { - {-3., 2., -4.}, - { 0., 1., 2.}, - { 2., 4., 5.} + {-3.0, 2.0, -4.0}, + { 0.0, 1.0, 2.0}, + { 2.0, 4.0, 5.0}, }; - multi::array y = {12.,5.,2.}; - double AA[3][3]; + multi::array y = {12.0, 5.0, 2.0}; + double AA[3][3]; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types using std::copy; copy( begin(A), end(A), begin(*multi::array_ptr(&AA)) ); @@ -82,17 +82,13 @@ int main(){ lu_fact(AA); assert( std::equal(begin(A), end(A), begin(*multi::array_ptr(&AA)), end(*multi::array_ptr(&AA))) ); } -// return 0; { multi::array A({6000, 7000}); std::iota(A.data(), A.data() + A.num_elements(), 0.1); - std::transform(A.data(), A.data() + A.num_elements(), A.data(), [](auto x){return x/=2.e6;}); - // std::vector y(3000); std::iota(y.begin(), y.end(), 0.2); + std::transform(A.data(), A.data() + A.num_elements(), A.data(), [](auto x){return x/=2.0e6;}); { boost::timer::auto_cpu_timer t; lu_fact(A({3000, 6000}, {0, 4000})); cout << A[456][123] << std::endl; } - // cout << y[45] << std::endl; } } - diff --git a/external_codes/boost_multi/multi/examples/mpi3.cpp b/external_codes/boost_multi/multi/examples/mpi3.cpp index a7133151da..0251e85971 100644 --- a/external_codes/boost_multi/multi/examples/mpi3.cpp +++ b/external_codes/boost_multi/multi/examples/mpi3.cpp @@ -14,41 +14,40 @@ namespace multi = boost::multi; void test_1D(mpi3::communicator& comm){ - switch(comm.rank()){ - case 0:{ + auto const rank = comm.rank(); + switch(rank){ + case 0: multi::array v(100); - std::iota(v.begin(), v.end(), 0.); + std::iota(v.begin(), v.end(), 0.0); assert( v.strided(2).size() == 50 and v.strided(2)[9] == 18 ); comm.send(v.strided(2).begin(), v.strided(2).end(), 1); - return; - } - case 1:{ - multi::array v(50); - comm.receive(v.begin(), v.end(), 0); - assert( v[9] == 18 ); - return; - } + break; + case 1: + multi::array w(50); + comm.receive(w.begin(), w.end(), 0); + assert( w[9] == 18 ); + break; + default: assert(0); } - assert(0); - + return; } void test_2D(mpi3::communicator& comm){ - auto const v = []{ + auto const v = std::invoke([] { multi::array v({4, 5}); - std::iota(begin(v.elements()), end(v.elements()), 0.); + std::iota(begin(v.elements()), end(v.elements()), 0.0); return v; - }(); + }); auto&& vpart = v({1, 4}, {1, 3}); - switch(comm.rank()){ + switch(comm.rank()) { case 0: comm.send(begin(vpart), end(vpart), 1); return; case 1: - multi::array w(extensions(vpart), 99.); - comm.receive(begin(w), end(w), 0); + multi::array w(extensions(vpart), 99.0); + comm.receive(begin(w), end(w), 0); assert( w == vpart ); return; } @@ -57,69 +56,70 @@ void test_2D(mpi3::communicator& comm){ } void test_2D_complex(mpi3::communicator& comm){ - using complex = std::complex; multi::array v({4, 5}); using std::get; - if(auto x = v.extensions()) - for(auto i: get<0>(x)) - for(auto j: get<1>(x)) + if(auto const x = v.extensions()) { + auto const [is, js] = x; + for(auto i: is) { + for(auto j: js) { v[i][j] = complex(i, j); + } + } + } - switch(comm.rank()){ + switch(comm.rank()) { case 0: - comm.send(begin(v), end(v), 1); break; + comm.send(begin(v), end(v), 1); + break; case 1: - multi::array w(extensions(v), 99.); + multi::array w(extensions(v), 99.0); comm.receive(begin(w), end(w), 0); - assert( w[2][3] == std::complex(2., 3.) ); + assert( w[2][3] == std::complex(2.0, 3.0) ); break; + default: assert(0); } - } void test_3D(mpi3::communicator& comm){ - auto const v = []{ + auto const v = std::invoke([]{ multi::array v({4, 5, 7}); - std::iota(begin(v.elements()), end(v.elements()), 0.); + std::iota(begin(v.elements()), end(v.elements()), 0.0); return v; - }(); + }); auto&& vpart = v({1, 4}, {1, 3}, {3, 6}); + switch(comm.rank()){ case 0: comm.send(begin(vpart), end(vpart), 1); return; case 1: - multi::array w(extensions(vpart), 99.); + multi::array w(extensions(vpart), 99.0); comm.receive(begin(w), end(w), 0); assert( w == vpart ); return; } assert(0); - } void test_2D_strides(mpi3::communicator& comm){ multi::array v({4, 5}); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); std::cout << std::endl; - switch(comm.rank()){ - case 0:{ + switch(comm.rank()) { + case 0: comm.send_n(v({1, 3}, {2, 4}).begin(), v({1, 3}, {2, 4}).size(), 1); return; - } - case 1:{ - multi::array w({4, 5}, 99.); + case 1: + multi::array w({4, 5}, 99.0); comm.receive(w({1, 3}, {2, 4}).begin(), w({1, 3}, {2, 4}).end(), 0); assert( w({1, 3}, {2, 4}) == v({1, 3}, {2, 4}) ); return; - } } assert(0); - } /* @@ -146,49 +146,21 @@ void test_3D(mpi3::communicator& comm){ } */ -void test_vector_nonpod(mpi3::communicator& comm){ - - switch(comm.rank()){ - case 0:{ - std::vector v(10); - v[2] = "hola"; - comm.send_n(v.begin(), v.size(), 1); - return; - } - case 1:{ - std::vector v(10); - comm.receive_n(v.begin(), v.size(), 0); - assert( v[2] == "hola" ); - return; - } - } - assert(0); - -} - -#if 0 -void test_1D_nonpod(mpi3::communicator& comm){ +void test_vector_nonpod(mpi3::communicator& comm) { + std::vector v(10); - switch(comm.rank()){ - case 0:{ - multi::array v(10); - v[2] = "hola"; - comm.send_n(v.begin(), v.size(), 1); - return; - } - case 1:{ - multi::array v(10); - comm.receive_n(v.begin(), v.size(), 0); - assert( v[2] == "hola" ); - return; - } - } - assert(0); + if(comm.rank() == 0) { + v[2] = "hola"; + comm.send_n(v.begin(), v.size(), 1); + } else if(comm.rank() == 1) { + comm.receive_n(v.begin(), v.size(), 0); + assert( v[2] == "hola" ); + } else { assert(0); } + return; } -#endif -int mpi3::main(int, char*[], mpi3::environment& env){ +int mpi3::main(int, char*[], mpi3::environment& env) { auto world = env.world(); @@ -200,12 +172,12 @@ int mpi3::main(int, char*[], mpi3::environment& env){ { auto self = env.get_self_instance(); - auto const v = []{ + auto const v = std::invoke({ multi::array v({4, 5}); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; - }(); - multi::array w({4, 2}, 0.); + }); + multi::array w({4, 2}, 0.0); self.gather_n(v({0, 4}, {2, 4}).begin(), v({0, 4}, {2, 4}).size(), w.begin()); @@ -225,10 +197,10 @@ int mpi3::main(int, char*[], mpi3::environment& env){ auto self = env.get_self_instance(); auto const v = []{ multi::array v({4, 5}); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; }(); - multi::array w({5, 4}, 0.); + multi::array w({5, 4}, 0.0); self.gather_n(v.rotated().begin(), v.rotated().size(), w.begin()); assert( v.rotated() == w ); } @@ -245,22 +217,21 @@ int mpi3::main(int, char*[], mpi3::environment& env){ assert( t.size() == sizeof(double) ); assert( t.extent() == sizeof(double)*2 ); } - { auto self = env.get_self_instance(); multi::array v(10); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); multi::array w(10); self.gather_n(v.begin(), v.size(), w.begin()); assert( w == v ); } { auto self = env.get_self_instance(); - auto const v = []{ + auto const v = std::invoke([] { multi::array v(60); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; - }(); + }); multi::array w(30); assert( v.strided(2).size() == 30 ); @@ -269,45 +240,44 @@ int mpi3::main(int, char*[], mpi3::environment& env){ } { auto self = env.get_self_instance(); - auto const v = []{ + auto const v = std::invoke([] { multi::array v(30); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; - }(); + }); multi::array w(10); self.gather_n(v({10, 30}).strided(2).begin(), v({10, 30}).strided(2).size(), w.data_elements()); assert( v({10, 30}).strided(2) == w ); } { auto self = env.get_self_instance(); - auto const v = []{ + auto const v = std::invoke([] { multi::array v({4, 5}); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; - }(); - multi::array w({2, 5}, 0.); + }); + multi::array w({2, 5}, 0.0); self.gather_n(v.strided(2).begin(), v.strided(2).size(), w.begin()); assert( v.strided(2) == w ); } - { auto self = env.get_self_instance(); - auto const v = []{ + auto const v = std::invoke([] { multi::array v({6, 4, 5}); - std::iota(v.elements().begin(), v.elements().end(), 0.); + std::iota(v.elements().begin(), v.elements().end(), 0.0); return v; - }(); - multi::array w({3, 4, 5}, 0.); + }); + multi::array w({3, 4, 5}, 0.0); for(auto const& e : v.elements()) std::cout<< e <<','; std::cout << std::endl; self.gather_n(v.strided(2).begin(), v.strided(2).size(), w.begin()); - assert( v.strided(2) == w ); + assert( v.strided(2) == w ); } + return 0; } - diff --git a/external_codes/boost_multi/multi/examples/polymorphic_memory_resource.cpp b/external_codes/boost_multi/multi/examples/polymorphic_memory_resource.cpp index c9b3224e19..a719c4bfaa 100644 --- a/external_codes/boost_multi/multi/examples/polymorphic_memory_resource.cpp +++ b/external_codes/boost_multi/multi/examples/polymorphic_memory_resource.cpp @@ -1,7 +1,7 @@ #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- ${CXX:-c++} -std=c++17 $CXXFLAGS $0 -o $0x&&$0x&&rm $0x;exit #endif -// © Alfredo A. Correa 2020 +// © Alfredo A. Correa 2020-2024 #include "../../multi/array.hpp" @@ -13,6 +13,7 @@ namespace multi = boost::multi; int main() { static_assert( sizeof(multi::array) < sizeof(multi::pmr::array) , "!"); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) make a small buffer TODO(correaa) replace with a std::array char buffer[13] = "____________"; // flawfinder: ignore , a small buffer on the stack or an allocation std::pmr::monotonic_buffer_resource pool{ std::data(buffer), std::size(buffer), @@ -23,12 +24,12 @@ int main() { multi::pmr::array B({3, 2}, 'b', &pool); assert( A.get_allocator() == B.get_allocator() ); - assert( buffer == std::string{"aaaabbbbbb__"} ); + assert( std::data(buffer) == std::string{"aaaabbbbbb__"} ); try { multi::pmr::array C({9, 9}, 'c', &pool); // there is no upstream resource so it throws } catch(std::bad_alloc&) { - assert( buffer == std::string{"aaaabbbbbb__"} ); + assert( std::data(buffer) == std::string{"aaaabbbbbb__"} ); } std::array buffer2; @@ -38,6 +39,7 @@ int main() { }; { multi::pmr::array D = A; + D[0][0] = 'c'; assert(D.get_allocator() != A.get_allocator() ); assert(D.get_allocator().resource() == std::pmr::get_default_resource() ); } diff --git a/external_codes/boost_multi/multi/examples/save.cpp b/external_codes/boost_multi/multi/examples/save.cpp index eabb664e3f..567d535a2c 100644 --- a/external_codes/boost_multi/multi/examples/save.cpp +++ b/external_codes/boost_multi/multi/examples/save.cpp @@ -1,68 +1,78 @@ -#ifdef COMPILATION_INSTRUCTIONS -$CXX $0 -o $0x -lboost_serialization -lstdc++fs &&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 -#include +// Copyright 2020-2024 Alfredo A. Correa + +#include #include #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include #include "../../multi/array.hpp" -#include +#include -enum format {xml, txt, bin}; +enum format { xml, + txt, + bin }; namespace barch = boost::archive; namespace bs11n = boost::serialization; -#define UNSWITCH __builtin_unreachable(); +#define BOOST_MULTI_UNSWITCH __builtin_unreachable template -void save(Array const& a, std::string name, format f){ +void save(Array const& a, std::string const& name, format f) { std::ofstream ofs(name); - *[&]()->std::unique_ptr{switch(f){ - case xml: return std::make_unique(ofs); - case txt: return std::make_unique(ofs); + *[&]() -> std::unique_ptr { + switch(f) { + case xml: return std::make_unique(ofs); + case txt: return std::make_unique(ofs); case bin: return std::make_unique(ofs); - }UNSWITCH;}() << bs11n::make_nvp("root", a); + } + BOOST_MULTI_UNSWITCH(); + }() << bs11n::make_nvp("root", a); assert(ofs); } template -void save(Array const& a, std::experimental::filesystem::path p){ - if(p.extension()==".xml") return save(a, p.string(), xml); - else if(p.extension()==".txt") return save(a, p.string(), txt); - else return save(a, p.string(), bin); +void save(Array const& a, std::experimental::filesystem::path p) { + if(p.extension() == ".xml") + return save(a, p.string(), xml); + else if(p.extension() == ".txt") + return save(a, p.string(), txt); + else + return save(a, p.string(), bin); } template -void load(Array& a, std::string name, format f){ +void load(Array& a, std::string const& name, format f) { std::ifstream ifs(name); - *[&]()->std::unique_ptr{switch(f){ + *[&]() -> std::unique_ptr {switch(f){ case xml: return std::make_unique(ifs); case txt: return std::make_unique(ifs); case bin: return std::make_unique(ifs); - }UNSWITCH;}() >> bs11n::make_nvp("root", a); + }BOOST_MULTI_UNSWITCH(); }() >> bs11n::make_nvp("root", a); assert(ifs); } template -void save_xml(Array const& a, std::string name){ +void save_xml(Array const& a, std::string const& name) { std::ofstream ofs(name); barch::xml_oarchive(ofs) << bs11n::make_nvp("root", a); } namespace multi = boost::multi; -int main(){ - multi::array const arrD2d = {{1., 2., 3.}, {4.,5.,6.}, {7.,8.,9.}}; +int main() { + multi::array const arrD2d = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + }; save(arrD2d, "arrD2d.xml"); multi::array arrD2d_copy; @@ -70,3 +80,4 @@ int main(){ assert(arrD2d_copy == arrD2d); } +#undef BOOST_MULTO_SWITCH diff --git a/external_codes/boost_multi/multi/examples/serialization.cpp b/external_codes/boost_multi/multi/examples/serialization.cpp index 66a4c6d2f6..a69c7ddbdd 100644 --- a/external_codes/boost_multi/multi/examples/serialization.cpp +++ b/external_codes/boost_multi/multi/examples/serialization.cpp @@ -1,219 +1,275 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -$CXX $0 -o $0x -lboost_unit_test_framework -lstdc++fs -lboost_serialization -lboost_iostreams&&$0x&&rm $0x;exit +#if 0 && defined(COMPILATION) +${CXX:-c++} $0 -o $0x -I../include -lboost_unit_test_framework -lstdc++fs -lboost_serialization -lboost_iostreams&& $0x&& rm $0x; +exit #endif -// © Alfredo Correa 2018-2020 +// Copyright 2018-2024 Alfredo A. Correa #define BOOST_TEST_MODULE "C++ Unit Tests for Multi serialization" #define BOOST_TEST_DYN_LINK -#include -#include "../adaptors/serialization/xml_archive.hpp" -//#include -//#include +#include -#include -#include -#include -#include -#include -#include -#include +// #include "../adaptors/serialization/xml_archive.hpp" +#include +#include -#include "../array.hpp" +#include +#include +#include +#include +#include +#include +#include -//#include "../adaptors/cuda.hpp" +#include -#include -#include +#include +#include -#include -#include +#include +#include -#include -#include -#include +#include +#include +#include -#include +#include -#include +#include namespace multi = boost::multi; + namespace fs = std::experimental::filesystem; -struct watch : private std::chrono::high_resolution_clock{ - std::string name_; - time_point start_; +struct watch : private std::chrono::high_resolution_clock { + std::string name_; + time_point start_; mutable bool engaged = true; - watch(std::string name = "") : name_{name}, start_{now()}{} - auto operator*() const{engaged = false; return std::chrono::duration(now() - start_).count();} - ~watch(){ - if(engaged){ + watch(std::string name = "") : name_{std::move(name)}, start_{now()} {} + auto operator*() const { + engaged = false; + return std::chrono::duration(now() - start_).count(); + } + ~watch() { + if(engaged) { auto count = operator*(); - std::cerr<< name_ <<": "<< count <<" sec"< A{ + {"w", "x"}, + {"y", "z"}, + }; + boost::archive::xml_oarchive(std::cout, boost::archive::no_header) + << boost::make_nvp("A", A()); +} + +BOOST_AUTO_TEST_CASE(multi_serialization_static_small_xml) { multi::static_array d2D({10, 10}); - std::mt19937 eng{std::random_device{}()}; - auto gen = [&](){return std::uniform_real_distribution<>{}(eng);}; - std::for_each(begin(d2D), end(d2D), [&](auto&& r){std::generate(begin(r), end(r), gen);}); - auto const name = "serialization-static-small.xml"; + + std::mt19937_64 eng(std::random_device{}()); + + auto gen = [&]() { return std::uniform_real_distribution<>{}(eng); }; + + std::for_each(begin(d2D), end(d2D), [&](auto&& r) { std::generate(begin(r), end(r), gen); }); + std::string const filename = "serialization-static-small.xml"; { - std::ofstream ofs{name}; assert(ofs); + std::ofstream ofs{filename}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); } { - std::ifstream ifs{name}; assert(ifs); decltype(d2D) d2D_copy(extensions(d2D), 99.); + std::ifstream ifs{filename}; + assert(ifs); + decltype(d2D) d2D_copy(extensions(d2D), 99.); boost::archive::xml_iarchive{ifs} >> BOOST_SERIALIZATION_NVP(d2D_copy); BOOST_REQUIRE( d2D_copy == d2D ); } - std::cout<< fs::file_size(name) <<'\n'; - fs::remove(name); + std::cout << fs::file_size(filename) << '\n'; + fs::remove(filename); + + { + std::ostringstream oss; + { + boost::archive::text_oarchive xoa{oss}; + + std::vector v = {1, 2, 3}; + std::for_each(v.begin(), v.end(), [&xoa](auto const& e) { xoa << e; }); + // std::accumulate(v.begin(), v.end(), &xoa, [](boost::archive::text_oarchive* x, int e) {return &(*x << BOOST_SERIALIZATION_NVP(e));}); + } + std::cout << oss.str() << std::endl; + } } -BOOST_AUTO_TEST_CASE(multi_serialization_small_xml){ +BOOST_AUTO_TEST_CASE(multi_serialization_small_xml) { multi::array d2D({10, 10}); - std::mt19937 e{std::random_device{}()}; -// auto g = std::bind(std::uniform_real_distribution<>{}, e);// - auto g = [&](){return std::uniform_real_distribution<>{}(e);}; - std::for_each(begin(d2D), end(d2D), [&](auto&& r){std::generate(begin(r), end(r), g);}); - auto const name = "serialization-small.xml"; + std::mt19937_64 e(std::random_device{}()); + + // auto g = std::bind(std::uniform_real_distribution<>{}, e);// + auto g = [&]() { return std::uniform_real_distribution<>{}(e); }; + + std::for_each(begin(d2D), end(d2D), [&](auto&& r) { std::generate(begin(r), end(r), g); }); + std::string const filename = "serialization-small.xml"; { - std::ofstream ofs{name}; assert(ofs); + std::ofstream ofs{filename}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); } { - std::ifstream ifs{name}; assert(ifs); decltype(d2D) d2D_copy(extensions(d2D)); + std::ifstream ifs{filename}; + assert(ifs); + decltype(d2D) d2D_copy(extensions(d2D)); boost::archive::xml_iarchive{ifs} >> BOOST_SERIALIZATION_NVP(d2D_copy); BOOST_REQUIRE( d2D_copy == d2D ); } { - std::ofstream ofs{"serialization-small-part.xml"}; assert(ofs); + std::ofstream ofs{"serialization-small-part.xml"}; + assert(ofs); auto&& a = d2D({0, 5}, {0, 5}); - boost::archive::xml_oarchive{ofs} << boost::serialization::make_nvp("d2D_part", a);//BOOST_SERIALIZATION_NVP(d2D); + boost::archive::xml_oarchive{ofs} << boost::serialization::make_nvp("d2D_part", a); // BOOST_SERIALIZATION_NVP(d2D); fs::remove("serialization-small-part.xml"); } - std::cout<< fs::file_size(name) <<'\n'; - fs::remove(name); + std::cout << fs::file_size(filename) << '\n'; + fs::remove(filename); } +BOOST_AUTO_TEST_CASE(multi_serialization_static_large_xml) { -BOOST_AUTO_TEST_CASE(multi_serialization_static_large_xml){ - watch w("static_large_xml"); multi::static_array d2D({1000, 1000}); - auto gen = [e=std::mt19937{std::random_device{}()}]() mutable{return std::uniform_real_distribution<>{}(e);}; - std::for_each(begin(d2D), end(d2D), [&](auto&& r){std::generate(begin(r), end(r), gen);}); - auto const name = "serialization-static-large.xml"; + + auto gen = [e = std::mt19937_64(std::random_device{}())]() mutable { return std::uniform_real_distribution<>{}(e); }; + std::for_each(begin(d2D), end(d2D), [&](auto&& r) { std::generate(begin(r), end(r), gen); }); + + watch w("static_large_xml"); + + std::string const filename = "serialization-static-large.xml"; { - std::ofstream ofs{name}; assert(ofs); + std::ofstream ofs{filename}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); } { - std::ifstream ifs{name}; assert(ifs); decltype(d2D) d2D_copy(extensions(d2D)); + std::ifstream ifs{filename}; + assert(ifs); + decltype(d2D) d2D_copy(extensions(d2D)); boost::archive::xml_iarchive{ifs} >> BOOST_SERIALIZATION_NVP(d2D_copy); BOOST_REQUIRE( d2D_copy == d2D ); } - std::cout<< fs::file_size(name) <<'\n'; - fs::remove(name); + std::cout << fs::file_size(filename) << '\n'; + fs::remove(filename); } -BOOST_AUTO_TEST_CASE(multi_serialization_static_small){ +BOOST_AUTO_TEST_CASE(multi_serialization_static_small) { { - multi::static_array d0D = 12.; - std::ofstream ofs{"serialization-static_0D.xml"}; assert(ofs); + multi::static_array d0D{12.0}; + + std::ofstream ofs{"serialization-static_0D.xml"}; + assert(ofs); + boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d0D); fs::remove("serialization-static_0D.xml"); } { multi::array d2D = { - {150., 16., 17., 18., 19.}, - { 5., 5., 5., 5., 5.}, - {100., 11., 12., 13., 14.}, - { 50., 6., 7., 8., 9.} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 5.0, 5.0, 5.0, 5.0, 5.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; - auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable{return d(e);}; + auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable { return d(e); }; std::for_each( - begin(d2D), end(d2D), - [&](auto&& r){std::generate(begin(r), end(r), gen);} + begin(d2D), end(d2D), + [&](auto&& r) { std::generate(begin(r), end(r), gen); } ); - auto name = "serialization-small-double2D.xml"; - [&, _ = watch("xml write double")]{ - std::ofstream ofs{"serialization-small-double2D.xml"}; assert(ofs); + std::string const filename = "serialization-small-double2D.xml"; + [&, _ = watch("xml write double")] { + std::ofstream ofs{filename}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); }(); - std::cerr<<"size "<< double(fs::file_size(name))/1e6 <<"MB\n"; - fs::remove("serialization-small-double2D.xml"); + std::cerr << "size " << double(fs::file_size(filename)) / 1e6 << "MB\n"; + fs::remove(filename); } { multi::array d2D = { - {150., 16., 17., 18., 19.}, - { 5., 5., 5., 5., 5.}, - {100., 11., 12., 13., 14.}, - { 50., 6., 7., 8., 9.} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 5.0, 5.0, 5.0, 5.0, 5.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; d2D.reextent({2000, 2000}); - auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable{return d(e);}; + auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable { return d(e); }; std::for_each( - begin(d2D), end(d2D), - [&](auto&& r){std::generate(begin(r), end(r), gen);} + begin(d2D), end(d2D), + [&](auto&& r) { std::generate(begin(r), end(r), gen); } ); - [&, _ = watch("xml write double")]{ - std::ofstream ofs{"serialization-double.xml"}; assert(ofs); + [&, _ = watch("xml write double")] { + std::ofstream ofs{"serialization-double.xml"}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); }(); - std::cerr<<"size "<< double(fs::file_size("serialization-double.xml"))/1e6 <<"MB\n"; + std::cerr << "size " << double(fs::file_size("serialization-double.xml")) / 1e6 << "MB\n"; fs::remove("serialization-double.xml"); } using complex = std::complex; - auto const d2D = []{multi::array _({10000, 1000}); - auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable{return std::complex{d(e), d(e)};}; - std::for_each(begin(_), end(_), [&](auto&& r){std::generate(begin(r), end(r), gen);}); return _; + auto const d2D = [] { + multi::array _({10000, 1000}); + auto gen = [d = std::uniform_real_distribution{-1, 1}, e = std::mt19937{std::random_device{}()}]() mutable { return std::complex{d(e), d(e)}; }; + std::for_each(begin(_), end(_), [&](auto&& r) { std::generate(begin(r), end(r), gen); }); + return _; }(); - auto size = sizeof(double)*d2D.num_elements(); + auto size = sizeof(double) * d2D.num_elements(); using std::cerr; - std::cout<<"data size (in memory) "<< size < d2D_cpy; - auto count_load = [&, w=watch("binary load")]{ - std::ifstream ifs{file}; assert(ifs); - boost::archive::binary_iarchive{ifs} >> d2D_cpy; - return *w; + auto count_load = [&, w = watch("binary load")] { + std::ifstream ifs{file}; + assert(ifs); + boost::archive::binary_iarchive{ifs} >> d2D_cpy; + return *w; }(); - std::cerr<<"load speed "<< double(file_size(file))/1e6/count_load <<"MB/s\n"; + std::cerr << "load speed " << double(file_size(file)) / 1e6 / count_load << "MB/s\n"; BOOST_REQUIRE( d2D == d2D_cpy ); fs::remove(file); } { using std::cout; fs::path file{"serialization.xml"}; - cout<< file << std::endl; - auto count = [&, w = watch("xml write base64")]{ - std::ofstream ofs{file}; assert(ofs); + cout << file << std::endl; + auto count = [&, w = watch("xml write base64")] { + std::ofstream ofs{file}; + assert(ofs); boost::archive::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(d2D); return *w; }(); - cout<<"data size "<< double(size)/1e6 << "MB\n"; - cout<<"file size "<< double(file_size(file))/1e6 <<"MB\n"; - cout<<"save speed "<< double(size)/1e6/count <<"MB/s"<< std::endl; + cout << "data size " << double(size) / 1e6 << "MB\n"; + cout << "file size " << double(file_size(file)) / 1e6 << "MB\n"; + cout << "save speed " << double(size) / 1e6 / count << "MB/s" << std::endl; multi::array d2D_cpy; - auto count2 = [&, w = watch("xml load base64")]{ - std::ifstream ifs{file}; assert(ifs); + + auto count2 = [&, w = watch("xml load base64")] { + std::ifstream ifs{file}; + assert(ifs); boost::archive::xml_iarchive{ifs} >> BOOST_SERIALIZATION_NVP(d2D_cpy); return *w; }(); - cout<<"load speed "<< double(size)/1e6/count2 <<"MB/s"<< std::endl; + + cout << "load speed " << double(size) / 1e6 / count2 << "MB/s" << std::endl; BOOST_REQUIRE( d2D_cpy == d2D ); fs::remove(file); } @@ -229,33 +285,36 @@ BOOST_AUTO_TEST_CASE(multi_serialization_static_small){ } #endif { - [&, _ = watch("text write")]{ - std::ofstream ofs{"serialization.txt"}; assert(ofs); + [&, _ = watch("text write")] { + std::ofstream ofs{"serialization.txt"}; + assert(ofs); boost::archive::text_oarchive{ofs} << d2D; }(); - std::cerr<<"size "<< double(fs::file_size("serialization.txt"))/1e6 <<"MB\n"; + std::cerr << "size " << double(fs::file_size("serialization.txt")) / 1e6 << "MB\n"; fs::remove("serialization.txt"); } { - multi::array d2D_copy;//(extensions(d2D), 9999.); - [&, _ = watch("text read")]{ - std::ifstream ifs{"serialization.txt"}; assert(ifs); + multi::array d2D_copy; //(extensions(d2D), 9999.0); + [&, _ = watch("text read")] { + std::ifstream ifs{"serialization.txt"}; + assert(ifs); boost::archive::text_iarchive{ifs} >> d2D_copy; }(); BOOST_REQUIRE( d2D_copy == d2D ); fs::remove("serialization.txt"); } { - multi::array d2D_copy;//(extensions(d2D), 9999.); - [&, _=watch("binary read")]{ - std::ifstream ifs{"serialization.bin"}; assert(ifs); + multi::array d2D_copy; //(extensions(d2D), 9999.0); + [&, _ = watch("binary read")] { + std::ifstream ifs{"serialization.bin"}; + assert(ifs); boost::archive::binary_iarchive{ifs} >> d2D_copy; }(); BOOST_REQUIRE( d2D_copy == d2D ); fs::remove("serialization.bin"); } { - [&, _=watch("binary compressed write")]{ + [&, _ = watch("binary compressed write")] { std::ofstream ofs{"serialization_compressed.bin.gz"}; { boost::iostreams::filtering_stream f; @@ -264,12 +323,13 @@ BOOST_AUTO_TEST_CASE(multi_serialization_static_small){ boost::archive::binary_oarchive{f} << d2D; } }(); - std::cerr<<"size "<< double(fs::file_size("serialization.bin.gz"))/1e6 <<"MB\n"; + std::cerr << "size " << double(fs::file_size("serialization.bin.gz")) / 1e6 << "MB\n"; fs::remove("serialization.bin.gz"); } { - [&, _ = watch("compressed xml write")]{ - std::ofstream ofs{"serialization.xml.gz"}; assert(ofs); + [&, _ = watch("compressed xml write")] { + std::ofstream ofs{"serialization.xml.gz"}; + assert(ofs); { boost::iostreams::filtering_stream f; f.push(boost::iostreams::gzip_compressor()); @@ -277,13 +337,14 @@ BOOST_AUTO_TEST_CASE(multi_serialization_static_small){ boost::archive::xml_oarchive{f} << BOOST_SERIALIZATION_NVP(d2D); } }(); - std::cerr<<"size "<< double(fs::file_size("serialization.xml.gz"))/1e6 <<"MB\n"; + std::cerr << "size " << double(fs::file_size("serialization.xml.gz")) / 1e6 << "MB\n"; fs::remove("serialization.xml.gz"); } { - multi::array d2D_copy;//(extensions(d2D), 9999.); - [&, _ = watch("xml read")]{ - std::ifstream ifs{"serialization.xml"}; assert(ifs); + multi::array d2D_copy; //(extensions(d2D), 9999.); + [&, _ = watch("xml read")] { + std::ifstream ifs{"serialization.xml"}; + assert(ifs); boost::archive::xml_iarchive{ifs} >> BOOST_SERIALIZATION_NVP(d2D_copy); }(); BOOST_REQUIRE( d2D_copy == d2D ); @@ -291,36 +352,39 @@ BOOST_AUTO_TEST_CASE(multi_serialization_static_small){ } } -BOOST_AUTO_TEST_CASE(test_utility_serialization_2d){ +BOOST_AUTO_TEST_CASE(test_utility_serialization_2d) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types double carr[3][10] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, - {20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, + { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0}, + {20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0}, }; - multi::array_ref marr(&carr[0][0], {3, 10}); + multi::array_ref marr(&carr[0][0], {3, 10}); boost::multi_array_ref Marr(&carr[0][0], boost::extents[3][10]); namespace arxiv = boost::archive; { - std::ofstream ofs{"utility_serialization_marr.xml"}; assert(ofs); + std::ofstream ofs{"utility_serialization_marr.xml"}; + assert(ofs); arxiv::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(marr); fs::remove("utility_serialization_marr.xml"); } { - std::ofstream ofs{"utility_serialization_marr_as_value.xml"}; assert(ofs); - multi::array const& marr_value = decay(marr);//static_cast const&>(marr); + std::ofstream ofs{"utility_serialization_marr_as_value.xml"}; + assert(ofs); + multi::array const& marr_value = decay(marr); // static_cast const&>(marr); BOOST_REQUIRE( marr_value.data_elements() == marr.data_elements() ); arxiv::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(marr_value); fs::remove("utility_serialization_marr_as_value.xml"); } { - std::ofstream ofs{"utility_serialization_carr.xml"}; assert(ofs); + std::ofstream ofs{"utility_serialization_carr.xml"}; + assert(ofs); arxiv::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(carr); fs::remove("utility_serialization_carr.xml"); } { -// std::ofstream ofs{"utility_serialization_Marr.xml"}; assert(ofs); -// arxiv::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(Marr); + // std::ofstream ofs{"utility_serialization_Marr.xml"}; assert(ofs); + // arxiv::xml_oarchive{ofs} << BOOST_SERIALIZATION_NVP(Marr); } } - diff --git a/external_codes/boost_multi/multi/examples/serialization2.cpp b/external_codes/boost_multi/multi/examples/serialization2.cpp index e006d2b8ea..48f8d2a009 100644 --- a/external_codes/boost_multi/multi/examples/serialization2.cpp +++ b/external_codes/boost_multi/multi/examples/serialization2.cpp @@ -1,7 +1,8 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2021 Alfredo A. Correa +// c++ serialization2.cpp -I../include -lboost_serialization -lboost_unit_test_framework +// Copyright 2019-2024 Alfredo A. Correa #define BOOST_TEST_MODULE "C++ Unit Tests for Multi allocators" +#define BOOST_TEST_DYN_LINK #include #include "multi/array.hpp" @@ -103,7 +104,7 @@ BOOST_AUTO_TEST_CASE(extensions_serialization) { } BOOST_AUTO_TEST_CASE(carray_serialization) { - double const A[3][3] = {{0., 1., 2.}, {3., 4., 5.}, {6., 7., 8.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + double const A[3][3] = {{0.0, 1.0, 2.0}, {3.0, 4.0, 5.0}, {6.0, 7.0, 8.0}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types std::stringstream ss; { { @@ -124,17 +125,17 @@ BOOST_AUTO_TEST_CASE(carray_serialization) { // xia>> cereal::make_nvp("A", B); // xia>> B ; // xia>> multi::archive_traits::make_nvp("A", B); - BOOST_REQUIRE( B[1][2] == 5. ); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) is it? + BOOST_REQUIRE( B[1][2] == 5.0 ); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) is it? BOOST_REQUIRE( A[1][2] == B[1][2] ); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) is it? } } BOOST_AUTO_TEST_CASE(array_serialization) { - multi::array arr({10, 10}, 0.); + multi::array arr({10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -162,9 +163,9 @@ BOOST_AUTO_TEST_CASE(array_serialization) { BOOST_AUTO_TEST_CASE(array_serialization_string) { multi::array arr({10, 10}); - auto const x = extensions(arr); - for(auto i : std::get<0>(x) ) { - for(auto j : std::get<1>(x) ) { + auto const [is, js] = extensions(arr); + for(auto i : is ) { + for(auto j : js ) { arr[i][j] = std::to_string(i) + std::to_string(j); } } @@ -195,10 +196,10 @@ BOOST_AUTO_TEST_CASE(array_serialization_string) { //#if not defined(__NVCC__) // some code contained here doesn't compile with nvcc 11.0,11.1 and 11.2 BOOST_AUTO_TEST_CASE(array_serialization_binary) { - multi::array arr({10, 10}, 0.); + multi::array arr({10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -245,7 +246,7 @@ BOOST_AUTO_TEST_CASE(array_serialization_string_binary) { //#if not defined(__NVCC__) // some code contained here doesn't compile with nvcc 11.0,11.1 and 11.2 BOOST_AUTO_TEST_CASE(vector) { - std::vector v(100); std::iota(begin(v), end(v), 10.); + std::vector v(100); std::iota(begin(v), end(v), 10.0); std::stringstream ss; { @@ -265,7 +266,7 @@ BOOST_AUTO_TEST_CASE(vector) { } BOOST_AUTO_TEST_CASE(vector_binary) { - std::vector v(100); std::iota(begin(v), end(v), 10.); + std::vector v(100); std::iota(begin(v), end(v), 10.0); std::stringstream ss{}; { @@ -285,11 +286,11 @@ BOOST_AUTO_TEST_CASE(vector_binary) { } BOOST_AUTO_TEST_CASE(array_serialization_3D) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -315,11 +316,11 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D) { } BOOST_AUTO_TEST_CASE(array_serialization_3D_inplace) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; XOArchive{ss}<< make_nvp("arr", arr); @@ -332,7 +333,7 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D_inplace) { } BOOST_AUTO_TEST_CASE(array_serialization_2D_inplace_file) { - multi::array arr({2, 2}, 99.); + multi::array arr({2, 2}, 99.0); { std::ofstream ofs{"file.xml"}; @@ -347,13 +348,13 @@ BOOST_AUTO_TEST_CASE(array_serialization_2D_inplace_file) { BOOST_REQUIRE( arr2 == arr ); } -#if not defined(__NVCC__) // some code contained here doesn't compile with nvcc 11.0,11.1 and 11.2 +// #if not defined(__NVCC__) // some code contained here doesn't compile with nvcc 11.0,11.1 and 11.2 BOOST_AUTO_TEST_CASE(array_serialization_3D_part_binary_lvalue) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -373,11 +374,11 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D_part_binary_lvalue) { } BOOST_AUTO_TEST_CASE(array_serialization_3D_part_xml_lvalue) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -397,11 +398,11 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D_part_xml_lvalue) { } BOOST_AUTO_TEST_CASE(array_serialization_3D_part_binary) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { @@ -419,17 +420,22 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D_part_binary) { } BOOST_AUTO_TEST_CASE(array_serialization_3D_part_xml) { - multi::array arr({10, 10, 10}, 0.); + multi::array arr({10, 10, 10}, 0.0); BOOST_REQUIRE(( arr.extension() == boost::multi::index_range{0, 10} )); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 1000.0); std::stringstream ss{}; { XOArchive boa{ss}; boa<< multi::archive_traits::make_nvp("arr2", arr[2]); } + { + std::ofstream ofs("serialization_3D.xml"); + XOArchive boa(ofs); + boa<< multi::archive_traits::make_nvp("arr", arr()); + } { BOOST_REQUIRE( arr[3] != arr[2] ); { @@ -439,4 +445,4 @@ BOOST_AUTO_TEST_CASE(array_serialization_3D_part_xml) { BOOST_REQUIRE( arr[3] == arr[2] ); } } -#endif +// #endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas.hpp new file mode 100644 index 0000000000..8f6482912d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas.hpp @@ -0,0 +1,24 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_HPP +#pragma once + +#include "../adaptors/blas/asum.hpp" +#include "../adaptors/blas/axpy.hpp" +#include "../adaptors/blas/copy.hpp" +#include "../adaptors/blas/dot.hpp" +#include "../adaptors/blas/gemm.hpp" +#include "../adaptors/blas/gemv.hpp" +//#include "../adaptors/blas/ger.hpp" +#include "../adaptors/blas/herk.hpp" +#include "../adaptors/blas/iamax.hpp" +#include "../adaptors/blas/nrm2.hpp" +#include "../adaptors/blas/scal.hpp" +#include "../adaptors/blas/swap.hpp" +#include "../adaptors/blas/syrk.hpp" +#include "../adaptors/blas/trsm.hpp" + +#endif // BOOST_MULTI_ADAPTORS_BLAS_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/CMakeLists.txt new file mode 100644 index 0000000000..af45955095 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 3.16) + +#project( +# boost-multi-adaptors-blas +# VERSION 0.1 +# LANGUAGES CXX +#) + +find_package(BLAS) + +if(BLAS_FOUND) + message("Some BLAS found: linker flags: ${BLAS_LINKER_FLAGS}, libs: ${BLAS_LIBRARIES}, libs95: ${BLAS95_LIBRARIES}") + foreach(lib ${BLAS_LIBRARIES}) + # https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors + if(${lib} MATCHES "mkl") + message("Some BLAS found matches MKL") + add_definitions(-DBLAS_DOT_RETURNS_VOID) + target_compile_definitions(multi INTERFACE BLAS_DOT_RETURNS_VOID) + # add_definitions(-D_MULTI_USING_BLAS_MKL) + # in some systems with MKL, regular BLAS headers need to be found for it to work + SET(CMAKE_SKIP_BUILD_RPATH FALSE) + SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) + SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib64") + SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib64") + endif() + if(${lib} MATCHES "hpc_sdk") + message("Some BLAS found matches hpc_sdk (nvhpc)") # nvhpc may still use a different version of BLAS + add_definitions(-DBLAS_DOT_RETURNS_VOID) + target_compile_definitions(multi INTERFACE BLAS_DOT_RETURNS_VOID) + endif() + if(${lib} MATCHES "Accelerate") + message("Some BLAS found matches Accelerate (Apple)") + message(WARNING "Apple Accelerate BLAS is know to have bugs in single precission function `sdot` and `smrm2`, be careful: https://stackoverflow.com/a/77017238/225186, https://fortran-lang.discourse.group/t/how-many-blas-libraries-have-this-error/4454/23, https://forums.developer.apple.com/forums/thread/717757") + add_definitions(-DBLAS_DOT_RETURNS_VOID) + target_compile_definitions(multi INTERFACE BLAS_DOT_RETURNS_VOID) + endif() + endforeach() +endif() + +# this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/ +if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + return() +endif() + +if(BLAS_FOUND) + find_package(Boost COMPONENTS unit_test_framework) + if(Boost_unit_test_framework_FOUND) + add_subdirectory(test) + else() + message(WARNING "Boost Unit Test Framework not found, BLAS-adaptor tests will not be compiled and run. If you want this feature, install Boost") + endif() +else() + message(WARNING "BLAS not found, BLAS-adaptor tests will not be compiled and run. If you want this feature install BLAS, for example please run:" + "\n sudo apt install libblas-dev" + "\n sudo dnf install blas-devel # in Fedora") + message(WARNING "BLA_VENDOR was set to ${BLA_VENDOR}\n") +endif() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/README.md b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/README.md new file mode 100644 index 0000000000..5a69accb15 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/README.md @@ -0,0 +1,141 @@ + +# Multi BLAS Adaptor + +_© Alfredo A. Correa, 2018-2024_ + +(documentation in progress) + +The BLAS Adaptor provides an interface for BLAS and BLAS-like libraries (namely cuBLAS). + +## Contents +[[_TOC_]] + +## Numeric Arrays, Conjugation Real and Imaginary parts + +These functions produce views (not copies) related to conjugation, real and imaginary parts. + +```cpp + using complex = std::complex; + complex const I{0.0, 1.0}; + multi::array B = { + {1.0 - 3.0*I, 6.0 + 2.0*I}, + {8.0 + 2.0*I, 2.0 + 4.0*I}, + {2.0 - 1.0*I, 1.0 + 1.0*I} + }; + + namespace blas = multi::blas; + multi::array conjB = blas::conj(B); + + assert( blas::conj(B)[2][1] == std::conj(B[2][1]) ); + + assert( blas::transposed(B)[1][2] == B[2][1] ); + assert( blas::transposed(B) == ~B ); + + assert( blas::hermitized(B)[2][1] == blas::conj(B)[1][2] ); + assert( blas::hermitized(B) == blas::conj(blas::transposed(B)) ); + + assert( blas::real(B)[2][1] == std::real(B[2][1]) ); + assert( blas::imag(B)[2][1] == std::imag(B[2][1]) ); + + multi::array B_real_doubled = { + { 1.0, -3.0, 6.0, 2.0}, + { 8.0, 2.0, 2.0, 4.0}, + { 2.0, -1.0, 1.0, 1.0} + }; + assert( blas::real_doubled(B) == B_real_doubled ); +``` + +```cpp + multi::array const a_real = { + { 1.0, 3.0, 1.0}, + { 9.0, 7.0, 1.0}, + }; + + multi::array const b = { + { 11.0 + 1.0*I, 12.0 + 1.0*I, 4.0 + 1.0*I, 8.0 - 2.0*I}, + { 7.0 + 8.0*I, 19.0 - 2.0*I, 2.0 + 1.0*I, 7.0 + 1.0*I}, + { 5.0 + 1.0*I, 3.0 - 1.0*I, 3.0 + 8.0*I, 1.0 + 1.0*I} + }; + + multi::array c({2, 4}); + + blas::real_doubled(c) = blas::gemm(1., a_real, blas::real_doubled(b)); // c = a_real*b +``` + +## GEMM + +``` +#include +#include + +namespace multi = boost::multi; + +int main() { + multi::array const A({2, 2}); + multi::array const B({2, 2}); + + multi::array const C1 = multi::blas::gemm(1.0, A, B); + auto const C2 = + multi::blas::gemm(1.0, A, B); +} +``` +https://godbolt.org/z/d1E7donWM + +(need linking to BLAS to work, e.g. `-lblas` or `-lopenblas` or `-lmkl`) + +## Table of features + +All these operations are now supported for CPU and GPU memory, real and complex. + +scalars: `aa` ($`\alpha`$), `bb` ($`\beta`$) \ +vectors: `x`, `y` \ +matrices: `A`, `B`, `C` + +vector operations: `C` (`*`) conjugation (element-wise) \ +matrix operations: `J` (`*`) conjugation (element-wise) (use `C` for vectors), `T` transpose, `H` transpose conjugate (also `C`, discouraged), `U`/`L` upper or lower triangular part (logical zeroing other side) + + +| BLAS | mutable form | effect | operator form [³] | functional form | thrust/STL [¹] | +|--- |--- | --- | --- | --- | --- | +| SWAP |`blas::swap(x, y)` | $`x_i \leftrightarrow y_i`$ | `(x^y)` | | `swap_ranges(begin(x), end(x), begin(y))` | +| COPY |`blas::copy(x, y)` | $`y_i \leftrightarrow x_i`$ | `y << x` | `y = blas::copy(x)` | `copy(begin(x), end(x), begin(y))` | +| ASUM |`blas::asum(x, res)` | $`r \leftarrow \sum_i \|\Re x_i\| + \|\Im x_i\|`$ | `x==0`/`x!=0` `isinf(x)` `isnan(x)`[²] | `res = blas::asum(x)` | `transform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return abs(e.real()) + abs(e.imag());})` | +| NRM2 |`blas::nrm2(x, res)` | $`r \leftarrow \sqrt{\sum_i \|x_i\|^2}`$ | `abs(x)` | `res = blas::nrm2(x);` | `sqrt(trasnform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return norm(e);}));` | +| SCAL |`blas::scal(aa, x);` | $`x_i \leftarrow \alpha x_i`$ | `x*=aa;` | | `for_each(begin(x), end(x), [aa](auto& e){return e*=aa;})` | +| AXPY |`blas::axpy(aa, x, y)` | $`y_i \leftarrow \alpha x_i + y_i`$ | `y+=x` `y-=x` `y+=aa*x` `y-=aa*x` | | `transform(x.begin(), x.end(), y.begin(), y.begin(), [aa](auto ex, auto ey) {return aa*ex + ey;}` | +| DOT | `blas::dot(x, y, res)` | $`r = \sum_i x_i y_i`$ | `res = (x, y);` | `res = blas::dot(x, y)` | `inner_product(begin(x), end(x), begin(y), T{});` | +| | `blas::dot(blas::C(x), y, res)` | $`r = \sum_i \bar x_i y_i`$ | `res = (*x, y);` | `res = blas::dot(blas::C(x), y)` | `inner_product(begin(x), end(x), begin(y), T{}, plus<>{}, [](T const& t1, T const& t2) {return conj(t1)*t2;});` | +| | `blas::dot(x, blas::C(y), res)` | $`r = \sum_i x_i \bar y_i`$ | `res = (x, *y);` | `res = blas::dot(x, blas::C(y));` | `inner_product(x.begin(), x.end(), y.begin(), T{}, plus<>{}, [](T const& t1, T const& t2) {return t1*conj(t2);});` | +| | ~~`blas::dot(blas::C(x), blas::C(y), res)`~~ | $`r = \sum_i \bar x_i \bar y_i`$ not implemented in BLAS, conjugate result | | | `auto res = conj(inner_product(x.begin(), x.end(), y.begin(), T{});` | +| GEMV | `blas::gemv(aa, A, x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ij}x_j + \beta y_i`$ | `y=A%x` `y=aa*A%x` `y+=A%x` `y+=aa*A%x`[¤] | `y=blas::gemv(aa, A, x)` `y+=blas::gemv(aa, A, x)` | `transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})` | +| | `blas::gemv(aa, blas::T(A), x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ji}x_j + \beta y_i`$ | `y= ~A % x` `y=aa*(~A)%x` `y+=(~A)%x` `y+=aa*(~A)%x` | `y=blas::gemv(aa, blas::T(A), x)` `y+=blas::gemv(aa, blas::T(A), x)` | `transform(begin(transposed(A)), end(transposed(A)), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})` | +| | `blas::gemv(aa, blas::J(A), x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ij}^*x_j + \beta y_i`$ | `y= *A % x` `y=aa*(*A)%x` `y+=(*A)%x` `y+=aa*(*A)%x` | `y=blas::gemv(aa, blas::J(A), x)` `y+=blas::gemv(aa, blas::J(A), x)` | `transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(*Ac, x);})` | +| | ~~`blas::gemv(aa, blas::H(A), x, bb, y)`~~ | $`y_i \leftarrow \alpha\sum_j A_{ji}^*x_j + \beta y_i`$ (not BLAS-implemented)| | | `transform(begin(transposed(A)), end(transposed(A)), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(*Ac, x);})` | +| GEMM | `blas::gemm(aa, A, B, bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj} + \beta C_{ij}`$ | `C = aa*(A*B)` | `C = blas::gemm(aa, A, B)` `C += blas::gemm(aa, A, B)` | `transform(begin(A), end(A), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, move(Cr));})` | +| | `blas::gemm(aa, A, blas::T(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{jk} + \beta C_{ij}`$ | `C = aa*(A* ~B)` | `C = blas::gemm(aa, A, blas::T(B))` `C += blas::gemm(aa, A, blas::T(B))` | `transform(begin(A), end(A), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, move(Cr));})` | +| | `blas::gemm(aa, blas::T(A), B, bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{kj} + \beta C_{ij}`$ | `C =~A * B` `C = aa*(~A * B)` `C+=~A * B` `C+=aa*(~A * B)` | `C = blas::gemm(aa, blas::T(A), B, bb, C)` (or `+=`) | `transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));})` | +| | `blas::gemm(aa, blas::T(A), blas::T(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{jk} + \beta C_{ij}`$ | `C =~A * ~B` `C = aa*(~A * ~B)` `C+=~A * ~B` `C+=aa*(~A * ~B)` | `C = blas::gemm(aa, blas::T(A), blas::T(B), bb, C)` (or `+=`) | `transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, std::move(Cr));})` | +| | `blas::gemm(aa, A, blas::J(B), bb, C)` (use `blas::gemm(..., blas::T(B), blas::H(A), ..., HC)` and conjtranspose result) | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(Ar, blas::C(Bc)) + bb*c;}); return std::move(Cr);});` | +| | ~~`blas::gemm(aa, blas::J(A), B, bb, C)`~~ | $`C_{ij} \leftarrow \alpha \sum_k A_{ik}^* B_{kj} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), Bc) + bb*c;}); return std::move(Cr);});` | +| | `blas::gemm(aa, blas::J(A), blas::J(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k \bar{A_{ik}} \bar{B_{kj}} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), blas::C(Bc)) + bb*c;}); return std::move(Cr);});` | +| | `blas::gemm(aa, A, blas::H(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} \bar B_{jk} + \beta C_{ij}`$ | `C = aa*(A* ~*B)` (or `+=`) | `C = blas::gemm(aa, A, blas::H(B))` `C += blas::gemm(aa, A, blas::H(B))` | `transform(begin(A), end(A), begin(CC), begin(CC), [&](auto const& Ar, auto&& Cr){return blas::gemv(aa, blas::J(B), Ar, bb, move(Cr));})` | +| | `blas::gemm(aa, blas::H(A), B, bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} B_{kj} + \beta C_{ij}`$ | `CC=~*A *B` | `C=blas::gemm(aa, blas::H(A), B)` | `transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BT=transposed(B)](auto const& Ac, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c){return aa*blas::dot(blas::C(Ac), Bc) + bb*c;}); return move(Cr);})` | +| | `blas::gemm(aa, blas::H(A), blas::H(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} \bar B_{jk} + \beta C_{ij}`$ | `CC=~*A * ~*B` | `C=blas::gemm(aa, blas::H(A), blas::H(B))` | `transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [&B](auto const& Ac, auto&& Cr) {transform(begin(B), end(B), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c) {return conj(std::transform_reduce(begin(Ac), end(Ac), begin(Bc), 0.0*c, std::plus<>{}, [](auto const& a, auto const& b) {return a*b;}));}); return move(Cr);})` | +| | `blas::gemm(aa, blas::T(A), blas::H(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ki} \bar B_{jk} + \beta C_{ij}`$ | `CC=~A * ~*B` | `C=blas::gemm(aa, blas::T(A), blas::H(B))` | `transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [&B](auto const& Ac, auto&& Cr) {transform(begin(B), end(B), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c) {return std::transform_reduce(begin(Ac), end(Ac), begin(Bc), 0.0*c, std::plus<>{}, [](auto const& a, auto const& b) {return a*conj(b);});}); return move(Cr);})` | +| | ~~`blas::gemm(aa, blas::H(A), blas::T(B), bb, C)`~~ | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} B_{jk} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [&B](auto const& Ac, auto&& Cr) {transform(begin(B), end(B), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c) {return std::transform_reduce(begin(Ac), end(Ac), begin(Bc), 0.0*c, std::plus<>{}, [](auto const& a, auto const& b) {return conj(a)*b;});}); return move(Cr);})` | +| | ~~`blas::gemm(aa, blas::J(A), blas::H(B), bb, C)`~~ | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ik} \bar B_{jk} + \beta C_{ij}`$ (not BLAS-implemented) | | | | +| | ~~`blas::gemm(aa, blas::H(A), blas::J(B), bb, C)`~~ | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} \bar B_{kj} + \beta C_{ij}`$ (not BLAS-implemented) | | | | +| TRSM | `blas::trsm(blas::side::right, aa, blas::U(A), B)` | $`B\leftarrow B.U^{-1}`$ | `B /= U(A)` | | TODO | +| | `blas::trsm(blas::side::right, aa, blas::L(A), B)` | $`B\leftarrow B.L^{-1}`$ | `B /= L(A)` | | TODO | +| | `blas::trsm(blas::side::left, aa, blas::U(A), B)` | $`B\leftarrow U^{-1}.B`$ | `B \|= U(A)` | | TODO | +| | `blas::trsm(blas::side::left, aa, blas::L(A), B)` | $`B\leftarrow L^{-1}.B`$ | `B \|= L(A)` | | TODO | +| | ~~`blas::trsm(blas::side::right, aa, blas::U(A), blas::J(B))`~~ | $`B*\leftarrow B*.U^{-1}`$ $`B\leftarrow B.U*^{-1}`$ | | | TODO | +| | ~~`blas::trsm(blas::side::right, aa, blas::L(A), blas::J(B))`~~ | $`B*\leftarrow B*.L^{-1}`$ $`B\leftarrow B.L*^{-1}`$ | | | TODO | +| | `blas::trsm(blas::side::right, aa, blas::U(A), blas::H(B))` | $`B^\dagger\leftarrow B^\dagger.U^{-1}`$ $`B\leftarrow U^\dagger^{-1}.B`$ | | | TODO | +| | `blas::trsm(blas::side::right, aa, blas::L(A), blas::H(B))` | $`B^\dagger\leftarrow B^\dagger.L^{-1}`$ $`B\leftarrow L^\dagger^{-1}.B`$ | | | TODO | + +[¹]: for reference, not optimal. \ +[²]: `asum` is interpreted as a mechanism to detect null vectors or vectors containing NaN or infinities. \ +[³]: needs explicit invocation `using namespace multi::operators` namespace or of specific symbols `using multi::operator*`/`operator/=`/etc. \ +[¤]: `y *=bb +=aa*A%x` (`gemv(aa, A, x, bb, y)`) would also be possible. diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/asum.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/asum.hpp new file mode 100644 index 0000000000..864883c433 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/asum.hpp @@ -0,0 +1,97 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_ASUM_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_ASUM_HPP +#pragma once + +#include "../blas/core.hpp" + +namespace boost::multi::blas { + +template +auto asum_n(It first, Size n, A0D res) +->decltype(blas::default_context_of(base(first))->asum(n, base(first), stride(first), res), std::next(res)) { + return blas::default_context_of(base(first))->asum(n, base(first), stride(first), res), std::next(res); } + +using std::begin; using std::end; + +template +auto asum(X1D const& x, A0D&& res) // NOLINT(readability-identifier-length) x conventional blas name +//->decltype(asum_n(x.begin(), x.size(), &res)) { +{ return asum_n(std::begin(x), x.size(), &std::forward(res)); } + +template +struct asum_ptr { + A1D const* xp_; // NOLINT(misc-non-private-member-variables-in-classes) + + explicit operator bool() const {return xp_;} + + template + friend auto copy_n(asum_ptr first, [[maybe_unused]] Size2 count, ItOut d_first) + ->decltype(blas::asum_n(typename A1D::iterator{}, typename A1D::size_type{}, d_first)) {assert(count == 1); + return blas::asum_n(first.xp_->begin() , first.xp_->size() , d_first); } + + template + friend auto uninitialized_copy_n(asum_ptr first, As... as) {return copy_n(first, as...);} + + template + static auto uninitialized_copy_n(asum_ptr first, As... as) {return copy_n(first, as...);} +}; + +template +[[nodiscard]] +auto asum(A1D const& x) { // NOLINT(readability-identifier-length) BLAS naming + struct ref { + A1D const& x_; // NOLINT(misc-non-private-member-variables-in-classes,cppcoreguidelines-avoid-const-or-ref-data-members) + auto operator&() const& {return asum_ptr{&x_};} // NOLINT(google-runtime-operator) reference type //NOSONAR + using decay_type = decltype(abs(std::declval())); + operator decay_type() const {decay_type ret; blas::asum(x_, ret); return ret;} //NOSONAR // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) allow terse syntax double a = asum(v); + auto operator+() const -> decay_type {return operator decay_type();} + }; + + return ref{x}; +} + +namespace operators { + static constexpr double threshold = 1.0e-12; + + using zero_type = void*****; + + template + auto operator==(A1D const& self, [[maybe_unused]] zero_type zero) -> bool { + assert( zero == nullptr ); + return blas::asum(self) < threshold; + } + + template + auto operator!=(A1D const& self, [[maybe_unused]] zero_type zero) -> bool { + assert( zero == nullptr ); + return blas::asum(self) > threshold; + } + + template + auto contains_nan(A1D const& self) -> bool { + return std::isnan(blas::asum(self)); + } + + template + auto isnan(A1D const& self) -> bool { + return contains_nan(self); + } + + template + auto isfinite(A1D const& self) -> bool { + return std::isfinite(blas::asum(self)); + } + + template + auto isinf(A1D const& self) -> bool { + return std::isinf(blas::asum(self)); + } +} // end namespace operators + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/axpy.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/axpy.hpp new file mode 100644 index 0000000000..d9834e4b4e --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/axpy.hpp @@ -0,0 +1,178 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_AXPY_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_AXPY_HPP +#pragma once + +#include + +#include +#include + +#define BOOST_MULTI_DECLRETURN(ExpR) -> decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +#define JUSTRETURN(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +// TODO(correaa) ^^^ fix macro name + +namespace boost::multi::blas { + +using core::axpy; + +template +auto axpy_n(typename It1::value_type alpha, It1 first, Size n, OutIt d_first) +->decltype(axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { + return axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first) ), d_first + n; } + +template//, class=std::enable_if_t{}>> +auto axpy_n(Context ctxt, typename It1::value_type alpha, It1 first, Size n, OutIt d_first) +->decltype(ctxt->axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { + return ctxt->axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first) ), d_first + n; } + +template()[0] = 0.0, *X1DIt{} )> +auto axpy(Context ctxt, typename X1DIt::element alpha, X1DIt x, Y1D&& y) // NOLINT(readability-identifier-length) conventional BLAS names +->decltype( std::forward(y)) { + return axpy_n(ctxt, alpha, x, size(y), begin(y)), std::forward(y); } + +template()[0] = 0.0, size(std::declval()) )> +auto axpy(Context ctxt, typename X1D::element alpha, X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) conventional BLAS names +->decltype( std::forward(y)) { assert(x.size() == y.size() ); + return axpy_n(ctxt, alpha, begin(x), size(y), begin(y)), std::forward(y); } + +template()[0] = 0.0 )> +auto axpy(typename X1D::element alpha, X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) conventional BLAS names +-> decltype(auto) +{ + auto ctxtp = blas::default_context_of(x.base()); + return boost::multi::blas::axpy(ctxtp, alpha, x, std::forward(y)); +} + +template +auto axpy(X1D const& x, Y1D&& y) -> Y1D&& { // NOLINT(readability-identifier-length) conventional BLAS names + return axpy(+1.0, x, std::forward(y)); +} + +template{}> > +auto axpy(Context&& ctxt, X1D const& x, Y1D&& y) -> Y1D&& { // NOLINT(readability-identifier-length) conventional BLAS names + return axpy(std::forward(ctxt), +1.0, x, std::forward(y)); +} + +template +class axpy_iterator { + Context ctxt_; + Scale alpha_; + ItX x_begin_; + + public: + axpy_iterator(Context ctxt, Scale alpha, ItX x_begin) + : ctxt_{ctxt}, alpha_{alpha}, x_begin_{x_begin} {} + + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = void; + using reference = void; + using iterator_category = std::random_access_iterator_tag; + + friend auto operator-(axpy_iterator const& self, axpy_iterator const& other) -> difference_type { + assert(self.alpha_ == other.alpha_); + return self.x_begin_ - other.x_begin_; + } + + template + friend auto copy_n(axpy_iterator first, difference_type count, It1DOut result) { + blas::axpy_n(first.ctxt_, first.alpha_, first.x_begin_, count, result); // NOLINT(fuchsia-default-arguments-calls) + return result + count; + } + template + friend auto copy(axpy_iterator first, axpy_iterator last, It1DOut result){return copy_n(first, last - first, result);} +}; + +template +class axpy_range { + Context ctxt_; + Scale alpha_; + ItX x_begin_; + size_type count_; + + public: + axpy_range(axpy_range const&) = delete; + axpy_range(axpy_range&&) noexcept = delete; + ~axpy_range() = default; + auto operator=(axpy_range const&) -> axpy_range& = delete; + auto operator=(axpy_range&&) noexcept -> axpy_range& = delete; + + axpy_range(Context ctxt, Scale alpha, ItX x_first, ItX x_last) + : ctxt_{ctxt}, alpha_{alpha}, x_begin_{x_first}, count_{x_last - x_first} {} + + using iterator = axpy_iterator; + + auto begin() const -> iterator{ return {ctxt_, alpha_, x_begin_ }; } + auto end() const -> iterator{ return {ctxt_, alpha_, x_begin_ + count_}; } + + auto size() const -> size_type{return end() - begin();} + + template + friend auto operator+=(Other&& other, axpy_range const& self) -> Other&& { + assert(other.size() == self.count_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 + blas::axpy_n(self.ctxt_, +static_cast(self.alpha_), self.x_begin_, self.count_, other.begin()); // NOLINT(fuchsia-default-arguments-calls) + return std::forward(other); + } + template + friend auto operator-=(Other&& other, axpy_range const& self) -> Other&& { + assert(other.size() == self.count_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 + blas::axpy_n(self.ctxt_, -static_cast(self.alpha_), self.x_begin_, self.count_, other.begin()); // NOLINT(fuchsia-default-arguments-calls) + return std::forward(other); + } + auto operator*=(Scale s) & -> axpy_range& {alpha_ *= s; return *this;} // NOLINT(readability-identifier-length) conventional BLAS naming +}; + +template{}>> +auto axpy(Context&& ctxt, Scalar a, X1D const& x) // NOLINT(readability-identifier-length) conventional BLAS naming +-> axpy_range { // NOLINT(readability-identifier-length) conventional BLAS naming + return {std::forward(ctxt), a, begin(x), end(x)}; +} + +template +auto axpy(Scalar a, X1D const& x) // NOLINT(readability-identifier-length) conventional BLAS naming +{ + auto ctxtp = blas::default_context_of(x.base()); + return axpy_range{ctxtp, a, begin(x), end(x)}; // TODO(correaa) fix temporary +} + +template +class scaled { + AA a_; + X const& x_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + + public: + scaled(AA a, X const& x) : a_{a}, x_{x} {} // NOLINT(readability-identifier-length) conventional BLAS naming + template + friend auto operator+=(Y1D&& y, scaled const& ax) {return axpy(+ax.a_, ax.x_, std::forward(y));} // NOLINT(readability-identifier-length) conventional BLAS naming + template + friend auto operator-=(Y1D&& y, scaled const& ax) {return axpy(-ax.a_, ax.x_, std::forward(y));} // NOLINT(readability-identifier-length) conventional BLAS naming +}; + +namespace operators { + +template struct algebraic_traits {static auto one() {return T{1.0};}}; + +template struct algebraic_traits> {static auto one() {return std ::complex{T{1}, T{0}};}}; +template struct algebraic_traits> {static auto one() {return multi::complex{T{1}, T{0}};}}; + +template auto operator+=(X1D&& x, Y1D const& other) BOOST_MULTI_DECLRETURN(axpy(+algebraic_traits::one(), other, std::forward(x))) // NOLINT(fuchsia-default-arguments-calls,readability-identifier-length) conventional name in BLAS +template auto operator-=(X1D&& x, Y1D const& other) BOOST_MULTI_DECLRETURN(axpy(-algebraic_traits::one(), other, std::forward(x))) // NOLINT(fuchsia-default-arguments-calls,readability-identifier-length) conventional name in BLAS + +template =0> +auto operator*(typename X::element_type a, X const& x) {return scaled{a, x};} // NOLINT(readability-identifier-length) conventional BLAS naming + +template auto operator+(X1D const& x, Y1D const& y) -> std::decay_t {auto X = x.decay(); X += y; return X;} // NOLINT(readability-identifier-length) conventional name in BLAS +template auto operator-(X1D const& x, Y1D const& y) -> std::decay_t {auto X = x.decay(); X -= y; return X;} // NOLINT(readability-identifier-length) conventional name in BLAS + +} // end namespace operators + +} // end namespace boost::multi::blas + +#undef BOOST_MULTI_DECLRETURN +#undef JUSTRETURN + +#endif // BOOST_MULTI_ADAPTORS_BLAS_AXPY_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/complex_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/complex_traits.hpp new file mode 100644 index 0000000000..6a7df6247a --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/complex_traits.hpp @@ -0,0 +1,40 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_COMPLEX_TRAITS_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_COMPLEX_TRAITS_HPP +#pragma once + +#if defined(__NVCC__) || defined(__HIPCC__) // defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_NVIDIA__) +#include +#endif + +#include // for std::complex + +namespace boost::multi::blas { + +template +struct complex_traits { + using real_type = typename Complex::real_type; + constexpr static auto imaginary_unit() { return Complex{real_type{0}, real_type{1}}; } +}; + +template +struct complex_traits> { + using real_type = typename ::std::complex::value_type; + constexpr static auto imaginary_unit() { return ::std::complex{0, 1}; } +}; + +#if defined(__NVCC__) || defined(__HIPCC__) // defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_NVIDIA__) +template +struct complex_traits<::thrust::complex> { + using real_type = typename ::thrust::complex::value_type; + constexpr static auto imaginary_unit() { return std::complex{0, 1}; } +}; +#endif + + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/copy.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/copy.hpp new file mode 100644 index 0000000000..ebba4cfeb8 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/copy.hpp @@ -0,0 +1,82 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_COPY_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_COPY_HPP +#pragma once + +#include +#include + +#include + +namespace boost::multi::blas { + +using core::copy; + +template +auto copy_n(It first, Size n, OutIt d_first) +->decltype(blas::default_context_of(first.base())->copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { + return blas::default_context_of(first.base())->copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n; } + +template +auto copy(X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) BLAS naming +->decltype(blas::copy_n(x.begin(), size(x), y.begin()), std::forward(y)) { + assert( (x.size() == y.size()) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : assert + return blas::copy_n(x.begin(), x.size(), y.begin()), std::forward(y); } + +template +struct copy_it { + It it_; // NOLINT(misc-non-private-member-variables-in-classes) + + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = void; + using reference = void; + using iterator_category = std::output_iterator_tag; + using iterator_type = copy_it; + + friend auto operator-(copy_it const& c1, copy_it const& c2) {return c1.it_ - c2.it_;} + + template + friend constexpr auto copy_n(copy_it first, difference_type count, It1DOut result) -> It1DOut{ + return blas::copy_n(first.it_, count, result); + } + template + friend constexpr auto copy(copy_it first, copy_it last, It1DOut d_first) -> It1DOut{ + return copy_n(first, distance(first, last), d_first); + } + template + friend constexpr auto uninitialized_copy(copy_it first, copy_it last, It1DOut d_first) -> It1DOut{ + return copy_n(first, distance(first, last), d_first); + } + friend constexpr auto distance(copy_it const& self, copy_it const& other) -> difference_type { + return other.it_ - self.it_; + } + constexpr auto operator*() const -> value_type {return *it_;} +}; + +template [[nodiscard]] +auto copy(A1D const& x) { // NOLINT(readability-identifier-length) BLAS naming + struct ref { + A1D const& x_; // NOLINT(misc-non-private-member-variables-in-classes,cppcoreguidelines-avoid-const-or-ref-data-members) + using iterator = copy_it; + auto begin() const {return iterator{x_.begin()};} + auto end() const {return iterator{x_.end() };} + auto size() const {return x_.size();} + auto extensions() const {return x_.extensions();} + }; + return ref{x}; +} + +namespace operators { + template + auto operator<<(A1D&& lhs, B1D const& rhs) -> A1D&& { + return boost::multi::blas::copy(rhs, std::forward(lhs)); + } +} // end namespace operators + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/core.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/core.hpp similarity index 50% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/core.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/core.hpp index a5e270ccfb..4b250b0854 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/core.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/core.hpp @@ -1,13 +1,13 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_ADAPTORS_BLAS_CORE_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -#define MULTI_ADAPTORS_BLAS_CORE_HPP +#ifndef BOOST_MULTI_ADAPTORS_BLAS_CORE_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_CORE_HPP +#pragma once // https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor -//#include // consider being replaceable by cblas.h - #include #include #include // int64_t @@ -16,30 +16,24 @@ #include // numeric_limits #include // is_convertible -#include "../../config/MARK.hpp" +// #include "../../config/MARK.hpp" #include "../blas/traits.hpp" -#if 0 - #define MULTI_ASSERT1(ExpR) assert (ExpR) - #define MULTI_ASSERT2(ExpR, DescriptioN) MULTI_ASSERT1(ExpR && ##DescriptioN) +#if ! defined(NDEBUG) + #include + #include + #define BOOST_MULTI_ASSERT1(ExpR) (void)((ExpR)?0:throw std::logic_error("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed.")) /*NOLINT(fuchsia-default-arguments-calls,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ + #define BOOST_MULTI_ASSERT2(ExpR, DescriptioN) (void)((ExpR)?0:throw std::DescriptioN("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed.")) #else - #if not defined(NDEBUG) - #include - #include - #define MULTI_ASSERT1(ExpR) (void)((ExpR)?0:throw std::logic_error("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed.")) /*NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ - #define MULTI_ASSERT2(ExpR, DescriptioN) (void)((ExpR)?0:throw std::DescriptioN("\n" __FILE__ ":"+std::to_string(__LINE__)+"::\n"+std::string(__PRETTY_FUNCTION__)+"\nLogic assertion `" #ExpR "' failed.")) - #else - #define MULTI_ASSERT1(ExpR) assert(ExpR) - #define MULTI_ASSERT2(ExpR, DescriptioN) assert(EXpR) - #endif + #define BOOST_MULTI_ASSERT1(ExpR) assert(ExpR) + #define BOOST_MULTI_ASSERT2(ExpR, DescriptioN) assert(EXpR) #endif #ifdef CBLAS_H #define BLAS(NamE) cblas_##NamE #else #define BLAS(NamE) NamE##_ -extern "C" { #ifndef MULTI_BLAS_INT #if defined(__INTPTR_WIDTH__) @@ -54,36 +48,38 @@ extern "C" { #define v void // cppcheck-suppress unusedStructMember -using Complex_float = struct { float real, imag; }; +using Complex_float = struct { float real; float imag; }; // cppcheck-suppress unusedStructMember -using Complex_double = struct { double real, imag; }; +using Complex_double = struct { double real; double imag; }; #define C Complex_float // _Complex s #define Z Complex_double // _Complex d #if defined(MULTI_BLAS_INT) #if MULTI_BLAS_INT==32 - #define INT int32_t + using INT = std::int32_t; // #define INT int32_t #elif MULTI_BLAS_INT==64 - #define INT int64_t + using INT = std::int64_t; // #define INT int64_t #else - #define INT int32_t // 32bit safe? pesimistic? + using INT = std::int32_t; // #define INT int32_t // 32bit safe? pesimistic? #endif #else - #define INT int32_t // 32bit safe? pesimistic? + using INT = std::int32_t; // #define INT int32_t // 32bit safe? pesimistic? #endif -namespace core{ +namespace core { using size_t = INT; using ssize_t = std::make_signed_t; } // end namespace core +extern "C" { + #define INTEGER INT const& #define N INTEGER n #define INCX INTEGER incx #define INCY INTEGER incy -static_assert(sizeof(INT)==32/8 or sizeof(INT)==64/8, "please set MULTI_BLAS_INT to int32_t or int64_t"); +static_assert(sizeof(INT)==32/8 || sizeof(INT)==64/8, "please set MULTI_BLAS_INT to int32_t or int64_t"); // indented declarations like in https://www.netlib.org/lapack/lug/node145.html @@ -100,10 +96,11 @@ static_assert(sizeof(INT)==32/8 or sizeof(INT)==64/8, "please set MULTI_BLAS_INT #define xDOT(R, TT, T) auto TT##dot ##_ ( N, T const *x, INCX, T const *y, INCY) -> R // NOLINT(readability-identifier-length) conventional BLAS naming // PGI/NVC++ compiler uses a blas version that needs -DRETURN_BY_STACK -#if defined(RETURN_BY_STACK) || (defined(FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) +#if defined(BLAS_DOT_RETURNS_VOID) +//#if defined(RETURN_BY_STACK) || (defined(FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) //#define xDOT(R, TT, T) v TT##dot ##_ (R*, N, T const *x, INCX, T const *y, INCY) -#define xDOTU(R, T) v T ##dotu ##_ (R*, N, T const *x, INCX, T const *y, INCY) // NOLINT(bugprone-macro-parentheses) : macro arg expands to type -#define xDOTC(R, T) v T ##dotc ##_ (R*, N, T const *x, INCX, T const *y, INCY) // NOLINT(bugprone-macro-parentheses) : macro arg expands to type +#define xDOTU(R, T) v T ##dotu ##_ (R*, N, T const * /*x*/, INCX, T const * /*y*/, INCY) // NOLINT(bugprone-macro-parentheses) : macro arg expands to type +#define xDOTC(R, T) v T ##dotc ##_ (R*, N, T const * /*x*/, INCX, T const * /*y*/, INCY) // NOLINT(bugprone-macro-parentheses) : macro arg expands to type #else #define xDOTU(R, T) auto T ##dotu ##_ ( N, T const *x, INCX, T const *y, INCY) -> R // NOLINT(readability-identifier-length) conventional BLAS naming #define xDOTC(R, T) auto T ##dotc ##_ ( N, T const *x, INCX, T const *y, INCY) -> R // NOLINT(readability-identifier-length) conventional BLAS naming @@ -114,7 +111,7 @@ static_assert(sizeof(INT)==32/8 or sizeof(INT)==64/8, "please set MULTI_BLAS_INT #define xASUM(R, TT, T) auto TT##asum##_ ( N, T const *x, INCX) -> R // NOLINT(readability-identifier-length) conventional BLAS naming #define IxAMAX(T) auto i##T ##amax##_ ( N, T const* x, INCX) -> INT // NOLINT(readability-identifier-length) conventional BLAS naming -xROTG(s, s) ; xROTG(d,d) ;// MKL extension xROTG(c, s); xROTG(z, d); +xROTG(s, s) ; xROTG(d,d) ; // TODO(correaa) MKL extension for "(c, s)" and "(z, d)"? xROTMG(s) ; xROTMG(d) ; xROT(s, s, s) ; xROT(d, d, d) ; xROT(cs, c, s); xROT(zd, z, d); xROTM(s) ; xROTM(d) ; @@ -122,12 +119,11 @@ xSWAP(s) ; xSWAP(d) ; xSWAP(c) ; xSWAP(z); xSCAL(s, s, s); xSCAL(d, d, d); xSCAL(c, c, c); xSCAL(z, z, z); xSCAL(zd, d, z); xSCAL(cs, s, c); xCOPY(s) ; xCOPY(d) ; xCOPY(c) ; xCOPY(z) ; xAXPY(s) ; xAXPY(d) ; xAXPY(c) ; xAXPY(z) ; + xDOT(s, s, s) ; xDOT(d, d, d) ; xDOT(d, ds, s); +xDOTU(C, c); xDOTU(Z, z); // TODO(correaa) MKL extension for "(c, c)" and "(z, z)"? +xDOTC(C, c); xDOTC(Z, z); // TODO(correaa) MKL extension for "(sds, s)" -xDOTU(C, c); xDOTU(Z, z); -//xDOTU(c, c); xDOTU(z, z); -xDOTC(C, c); xDOTC(Z, z); -//xxDOT(sds, s); xNRM2(s, s, s); xNRM2(d, d, d); xNRM2(s, sc, c); xNRM2(d, dz, z); xASUM(s, s, s); xASUM(d, d, d); xASUM(s, sc, c); xASUM(d, dz, z); IxAMAX(s); IxAMAX(d); IxAMAX(c); IxAMAX(z); @@ -181,10 +177,11 @@ xTRSM(s); xTRSM(d); xTRSM(c) ; xTRSM(z) ; #undef xSWAP #undef xCOPY #undef xAXPY + #undef xDOT #undef xDOTU #undef xDOTC -#undef xxDOT + #undef xNRM2 #undef xASUM #undef IxAMAX @@ -217,43 +214,37 @@ xTRSM(s); xTRSM(d); xTRSM(c) ; xTRSM(z) ; namespace boost::multi::blas { -//namespace t { - using s = float; - using d = double; - using c = std::complex; //using C = Complex_float ; - using z = std::complex; //using Z = Complex_double; - using v = void; -//} // end namespace types +using s = float; +using d = double; +using c = std::complex; +using z = std::complex; +using v = void; // Boundary Checked value -#define BC(value) [](auto checked) {assert(checked >= std::numeric_limits::min() and checked < std::numeric_limits::max()); return checked;}(value) /*NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ - -//xrotg(s, s) xrotg(d, d) //MKL extension xrotg(c, s); xrotg(z, d); -//xrotmg(s) xrotmg(d) -//xrot(s, s, s) xrot(d, d, d) xrot(c, cs, s) xrot(z, zd, d) -//xrotm(s) xrotm(d) -//xswap(s) xswap(d) xswap(c) xswap(z) +#define BC(value) [](auto checked) {assert(checked >= std::numeric_limits::min() && checked < std::numeric_limits::max()); return checked;}(value) /*NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ namespace core { using std::enable_if_t; using std::is_assignable; -template::element_type, class SY = typename std::pointer_traits::element_type, enable_if_t{} and is_s{} and is_assignable{},int> =0> void swap(ssize_t n, SX* x, ptrdiff_t incx, SY* y, ptrdiff_t incy) {BLAS(sswap)(n, ( float *)(x), incx, ( float *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class DY = typename std::pointer_traits::element_type, enable_if_t{} and is_d{} and is_assignable{},int> =0> void swap(ssize_t n, DX* x, ptrdiff_t incx, DY* y, ptrdiff_t incy) {BLAS(dswap)(n, ( double *)(x), incx, ( double *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class CY = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{},int> =0> void swap(ssize_t n, CX* x, ptrdiff_t incx, CY* y, ptrdiff_t incy) {BLAS(cswap)(n, (std::complex*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class ZY = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{},int> =0> void swap(ssize_t n, ZX* x, ptrdiff_t incx, ZY* y, ptrdiff_t incy) {BLAS(zswap)(n, (std::complex*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +// TODO(correaa) implement xrotg, xrotmg, xrot, xrotm + +template{} && is_s{} && is_assignable{},int> =0> void swap(ssize_t n, SX* x, ptrdiff_t incx, SY* y, ptrdiff_t incy) noexcept {BLAS(sswap)(n, reinterpret_cast< float *>(x), incx, ( float *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR +template{} && is_d{} && is_assignable{},int> =0> void swap(ssize_t n, DX* x, ptrdiff_t incx, DY* y, ptrdiff_t incy) noexcept {BLAS(dswap)(n, reinterpret_cast< double *>(x), incx, ( double *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR +template{} && is_c{} && is_assignable{},int> =0> void swap(ssize_t n, CX* x, ptrdiff_t incx, CY* y, ptrdiff_t incy) noexcept {BLAS(cswap)(n, reinterpret_cast*>(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR +template{} && is_z{} && is_assignable{},int> =0> void swap(ssize_t n, ZX* x, ptrdiff_t incx, ZY* y, ptrdiff_t incy) noexcept {BLAS(zswap)(n, reinterpret_cast*>(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR -template{} and is_s{} and is_assignable{},int> =0> void copy(ssize_t n, SX* x, ptrdiff_t incx, SY* y, ptrdiff_t incy) {BLAS(scopy)(n, ( float const*)(x), incx, ( float *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template{} and is_d{} and is_assignable{},int> =0> void copy(ssize_t n, DX* x, ptrdiff_t incx, DY* y, ptrdiff_t incy) {BLAS(dcopy)(n, ( double const*)(x), incx, ( double *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template{} and is_c{} and is_assignable{},int> =0> void copy(ssize_t n, CX* x, ptrdiff_t incx, CY* y, ptrdiff_t incy) {BLAS(ccopy)(n, (std::complex const*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template{} and is_z{} and is_assignable{},int> =0> void copy(ssize_t n, ZX* x, ptrdiff_t incx, ZY* y, ptrdiff_t incy) {BLAS(zcopy)(n, (std::complex const*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template{} && is_s{} && is_assignable{},int> =0> void copy(ssize_t n, SX* x, ptrdiff_t incx, SY* y, ptrdiff_t incy) {BLAS(scopy)(n, ( float const*)(x), incx, ( float *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template{} && is_d{} && is_assignable{},int> =0> void copy(ssize_t n, DX* x, ptrdiff_t incx, DY* y, ptrdiff_t incy) {BLAS(dcopy)(n, ( double const*)(x), incx, ( double *)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template{} && is_c{} && is_assignable{},int> =0> void copy(ssize_t n, CX* x, ptrdiff_t incx, CY* y, ptrdiff_t incy) {BLAS(ccopy)(n, (std::complex const*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template{} && is_z{} && is_assignable{},int> =0> void copy(ssize_t n, ZX* x, ptrdiff_t incx, ZY* y, ptrdiff_t incy) {BLAS(zcopy)(n, (std::complex const*)(x), incx, (std::complex*)(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // TODO(correaa) : add mixed-type scal (zdscal, csscal) -template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} and is_s{} and is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, SXP xp, ptrdiff_t incx) {BLAS(sscal)(n, *( float const*)a, ( float *)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} and is_d{} and is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, DXP xp, ptrdiff_t incx) {BLAS(dscal)(n, *( double const*)a, ( double *)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, CXP xp, ptrdiff_t incx) {BLAS(cscal)(n, *(std::complex const*)a, (std::complex*)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, ZXP xp, ptrdiff_t incx) {BLAS(zscal)(n, *(std::complex const*)a, (std::complex*)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, SXP xp, ptrdiff_t incx) {BLAS(sscal)(n, *( float const*)a, ( float *)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, DXP xp, ptrdiff_t incx) {BLAS(dscal)(n, *( double const*)a, ( double *)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} && is_c{} && is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, CXP xp, ptrdiff_t incx) {BLAS(cscal)(n, *(std::complex const*)a, (std::complex*)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template::element_type, class ALPHA = typename std::pointer_traits::element_type, enable_if_t{} && is_z{} && is_assignable{}>* = nullptr> void scal(ssize_t n, ALPHAP a, ZXP xp, ptrdiff_t incx) {BLAS(zscal)(n, *(std::complex const*)a, (std::complex*)xp, incx);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) using std::pointer_traits; using std::enable_if_t; @@ -261,8 +252,8 @@ using std::is_convertible_v; #define xaxpy(T) \ template::element_type, class SYP, class SY = typename pointer_traits::element_type, enable_if_t< \ - is_##T{} and is_##T{} and is_##T{} and is_assignable{} \ - and is_convertible_v and is_convertible_v \ + is_##T{} && is_##T{} && is_##T{} && is_assignable{} \ + && is_convertible_v && is_convertible_v \ , int> =0> \ void axpy(size_t n, ALPHA const* a, SXP x, size_t incx, SYP y, size_t incy) {BLAS(T##axpy)(n, (T const *)a, (T const*)static_cast(x), incx, (T*)static_cast(y), incy);} /*NOLINT(readability-identifier-length) NOLINT(readability-identifier-length) conventional BLAS name*/ @@ -270,13 +261,13 @@ xaxpy(s) xaxpy(d) xaxpy(c) xaxpy(z) #undef xaxpy } // end namespace core -#undef xrotg -#undef xrot -#undef xswap -#undef xscal -#undef xcopy -#undef xaxpy -#undef xdot +// #undef xrotg +// #undef xrot +// #undef xswap +// #undef xscal +// #undef xcopy +// #undef xaxpy +// #undef xdot #ifndef CBLAS_H @@ -285,26 +276,25 @@ namespace core { using std::enable_if_t; using std::is_assignable; -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_s{} and is_assignable{}, int> =0> void dot (ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(sdot )(n, (s const*)static_cast(x), incx, (s const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_d{} and is_assignable{}, int> =0> void dot (ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(ddot )(n, (d const*)static_cast(x), incx, (d const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && is_assignable{}, int> =0> void dot (ssize_t n, XP* x, ptrdiff_t incx, YP* y, ptrdiff_t incy, RP* r) {auto const rr = BLAS(sdot )(n, (s const*)static_cast(x), incx, (s const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && is_assignable{}, int> =0> void dot (ssize_t n, XP* x, ptrdiff_t incx, YP* y, ptrdiff_t incy, RP* r) {auto const rr = BLAS(ddot )(n, (d const*)static_cast(x), incx, (d const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) // NOSONAR // PGI/NVC++ compiler uses a blas version that needs -DRETURN_BY_STACK -//#if defined(RETURN_BY_STACK) || (defined(FORTRAN_COMLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) -#if defined(FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) && FORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID -//template{} and is_s{} and is_assignable{}, int> =0> void dot (size_t n, X* x, size_t incx, Y* y, size_t incy, R* r) {BLAS(sdot )((float *)r, n, (s const*)x, incx, (s const*)y, incy);} -//template{} and is_d{} and is_assignable{}, int> =0> void dot (size_t n, X* x, size_t incx, Y* y, size_t incy, R* r) {BLAS(ddot )((double*)r, n, (d const*)x, incx, (d const*)y, incy);} +#if defined(BLAS_DOT_RETURNS_VOID) +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_c{} && is_assignable{}, int> =0> void dotu(ssize_t n, XP xp, ptrdiff_t incx, YP yp, ptrdiff_t incy, RP rp) { BLAS(cdotu)(reinterpret_cast(rp), n, (c const*)static_cast(xp), incx, (c const*)static_cast(yp), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_z{} && is_assignable{}, int> =0> void dotu(ssize_t n, XP xp, ptrdiff_t incx, YP yp, ptrdiff_t incy, RP rp) { BLAS(zdotu)(reinterpret_cast(rp), n, (z const*)static_cast(xp), incx, (z const*)static_cast(yp), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) { BLAS(cdotu)(reinterpret_cast(r), n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) { BLAS(zdotu)(reinterpret_cast(r), n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types - -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) { BLAS(cdotc)(reinterpret_cast(r), n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) { BLAS(zdotc)(reinterpret_cast(r), n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_c{} && is_assignable{}, int> =0> void dotc(ssize_t n, XP xp, ptrdiff_t incx, YP yp, ptrdiff_t incy, RP rp) { BLAS(cdotc)(reinterpret_cast(rp), n, (c const*)static_cast(xp), incx, (c const*)static_cast(yp), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_z{} && is_assignable{}, int> =0> void dotc(ssize_t n, XP xp, ptrdiff_t incx, YP yp, ptrdiff_t incy, RP rp) { BLAS(zdotc)(reinterpret_cast(rp), n, (z const*)static_cast(xp), incx, (z const*)static_cast(yp), incy);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,google-readability-casting) : adapt types #else -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(cdotu)( n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(zdotu)( n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) - -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_c{} and is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(cdotc)( n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) -template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_z{} and is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(zdotc)( n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy); std::memcpy(reinterpret_cast(static_cast(r)), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +// NOLINTBEGIN(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) +// TODO(correaa) implement workaround for bug in Apple Accelerate BLAS ? https://stackoverflow.com/a/77017238/225186 +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_c{} && is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(cdotu)( n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy); std::memcpy(reinterpret_cast*>(static_cast(r))->data(), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOSONAR +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_z{} && is_assignable{}, int> =0> void dotu(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(zdotu)( n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy); std::memcpy(reinterpret_cast*>(static_cast(r))->data(), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOSONAR + +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_c{} && is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(cdotc)( n, (c const*)static_cast(x), incx, (c const*)static_cast(y), incy); std::memcpy(reinterpret_cast*>(static_cast(r))->data(), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOSONAR +template::element_type, class YP, class Y = typename std::pointer_traits::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_z{} && is_assignable{}, int> =0> void dotc(ssize_t n, XP x, ptrdiff_t incx, YP y, ptrdiff_t incy, RP r) {auto const rr = BLAS(zdotc)( n, (z const*)static_cast(x), incx, (z const*)static_cast(y), incy); std::memcpy(reinterpret_cast*>(static_cast(r))->data(), &rr, sizeof(rr)); static_assert(sizeof(rr)==sizeof(*r));} // NOSONAR +// NOLINTEND(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-readability-casting,readability-identifier-length) #endif } // end namespace core @@ -324,31 +314,29 @@ namespace core { namespace core { template auto dot(S n, s const& b, s const* x, S incx, s const* y, S incy) -> s {return BLAS(sdsdot)(BC(n), b, x, BC(incx), y, BC(incy));} // NOLINT(readability-identifier-length) conventional BLAS name - -//template void dot(S n, s const& b, s const* x, S incx, s const* y, S incy, s* result){*result = BLAS(sdsdot)(BC(n), b, x, BC(incx), y, BC(incy));} } // end namespace core -//#define xnrm2(R, T, TT) template v nrm2 (S n, add_const_ptr_t x, S incx, R* r){*r = BLAS(TT##nrm2 )(BC(n), x, BC(incx));} - #define xasum(T, TT) template auto asum (S n, T const* x, S incx){return BLAS(TT##asum )(BC(n), x, BC(incx)) ;} // NOLINT(readability-identifier-length) conventional BLAS name #define ixamax(T) template auto iamax(S n, T const* x, S incx){return BLAS(i##T##amax)(BC(n), x, BC(incx)) - 1;} // NOLINT(readability-identifier-length) conventional BLAS name -xasum(s, s) xasum(d, d) xasum (c, sc) xasum(z, dz) - namespace core { -// xnrm2(s, s, s) xnrm2(d, d, d) xnrm2(s, c, sc) xnrm2(d, z, dz) -template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_s{} and std::is_assignable{} , int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r){auto rr = BLAS(snrm2) (n, (s const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) -template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_d{} and std::is_assignable{} , int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r){auto rr = BLAS(dnrm2) (n, (d const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && std::is_assignable{} , int> =0> void asum(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(sasum) (n, (s const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && std::is_assignable{} , int> =0> void asum(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(dasum) (n, (d const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) + +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && std::is_assignable{}, int> =0> void asum(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(scasum)(n, (c const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && std::is_assignable{}, int> =0> void asum(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(dzasum)(n, (z const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) -template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_s{} and std::is_assignable{}, int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r){auto rr = BLAS(scnrm2)(n, (c const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) -template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} and is_d{} and std::is_assignable{}, int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r){auto rr = BLAS(dznrm2)(n, (z const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) +// TODO(correaa) implement workaround for bug in Apple Accelerate BLAS ? https://stackoverflow.com/a/77017238/225186 +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && std::is_assignable{} , int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(snrm2) (n, (s const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && std::is_assignable{} , int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(dnrm2) (n, (d const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) + +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_s{} && std::is_assignable{}, int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(scnrm2)(n, (c const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(s));} // NOLINT(google-readability-casting,readability-identifier-length) +template::element_type, class RP, class R = typename std::pointer_traits::element_type, enable_if_t{} && is_d{} && std::is_assignable{}, int> =0> void nrm2(ssize_t n, XP x, ptrdiff_t incx, RP r) {auto rr = BLAS(dznrm2)(n, (z const*)static_cast(x), incx); std::memcpy((s*)static_cast(r), &rr, sizeof(d));} // NOLINT(google-readability-casting,readability-identifier-length) -// template v nrm2 (S n, typename add_const_ptr>::type x, S incx, d* r){*r = BLAS(dznrm2 )(BC(n), x, BC(incx));} ixamax(s) ixamax(d) ixamax(c) ixamax(z) } // end namespace core -#undef xnrm2 #undef xasum #undef ixamax @@ -363,53 +351,31 @@ template::element_type, cla namespace core { -//xgemv(s) xgemv(d) xgemv(c) xgemv(z) -//xger(s) xger(d) -// xgeru(c) xgeru(z) -// xgerc(c) xgerc(z) - using std::enable_if_t; using std::is_assignable; -template{} and is_s{} and is_s{} and is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy) {BLAS(sgemv)(trans, m, n, a, (s const*)ma, lda, (s const*)x, incx, b, (s*)y, incy);} // NOLINT(google-readability-casting,readability-identifier-length) -template{} and is_d{} and is_d{} and is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy) {BLAS(dgemv)(trans, m, n, a, (d const*)ma, lda, (d const*)x, incx, b, (d*)y, incy);} // NOLINT(google-readability-casting,readability-identifier-length) -template{} and is_c{} and is_c{} and is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy) {BLAS(cgemv)(trans, m, n, a, (c const*)ma, lda, (c const*)x, incx, b, (c*)y, incy);} // NOLINT(google-readability-casting,readability-identifier-length) -template{} and is_z{} and is_z{} and is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const& a, M* ma, size_t lda, X* x, size_t incx, B b, Y* y, size_t incy) {BLAS(zgemv)(trans, m, n, a, (z const*)ma, lda, (z const*)x, incx, b, (z*)y, incy);} // NOLINT(google-readability-casting,readability-identifier-length) +template{} && is_s{} && is_s{} && is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const* a, M* ma, size_t lda, X* x, size_t incx, B const* b, Y* y, size_t incy) {BLAS(sgemv)(trans, m, n, *a, reinterpret_cast(ma), lda, reinterpret_cast(x), incx, *b, reinterpret_cast(y), incy);} // NOLINT(google-readability-casting,readability-identifier-length,cppcoreguidelines-pro-type-reinterpret-cast) // NOSONAR wrapped func has 11 params +template{} && is_d{} && is_d{} && is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const* a, M* ma, size_t lda, X* x, size_t incx, B const* b, Y* y, size_t incy) {BLAS(dgemv)(trans, m, n, *a, reinterpret_cast(ma), lda, reinterpret_cast(x), incx, *b, reinterpret_cast(y), incy);} // NOLINT(google-readability-casting,readability-identifier-length,cppcoreguidelines-pro-type-reinterpret-cast) // NOSONAR wrapped func has 11 params +template{} && is_c{} && is_c{} && is_assignable{}, int> =0> void gemv(char trans, size_t m, size_t n, A const* a, M* ma, size_t lda, X* x, size_t incx, B const* b, Y* y, size_t incy) {BLAS(cgemv)(trans, m, n, *a, reinterpret_cast(ma), lda, reinterpret_cast(x), incx, *b, reinterpret_cast(y), incy);} // NOLINT(google-readability-casting,readability-identifier-length,cppcoreguidelines-pro-type-reinterpret-cast) // NOSONAR wrapped func has 11 params +template{} && is_z{} && is_z{} && is_assignable()*std::declval()*std::declval()+std::declval()*std::declval())>{}, int> =0> void gemv(char trans, size_t m, size_t n, A const* a, M* ma, size_t lda, X* x, size_t incx, B const* b, Y* y, size_t incy) { // NOLINT(google-readability-casting,readability-identifier-length,cppcoreguidelines-pro-type-reinterpret-cast) //NOSONAR wrapped func has 11 params + BLAS(zgemv)(trans, m, n, *a, reinterpret_cast(ma), lda, reinterpret_cast(x), incx, *b, reinterpret_cast(y), incy); // NOLINT(fuchsia-default-arguments-calls,google-readability-casting,readability-identifier-length,cppcoreguidelines-pro-type-reinterpret-cast) // NOSONAR +} + +// TODO(correaa) implement get, geru, gerc } // end namespace core template -struct blas2 { -// template -// static v trsv(char ulA, char transA, char di, S m, T const* A, S lda, T* X, S incx) = delete; -}; +struct blas2 {}; template<> struct blas2 {template static v trsv(As... args) {BLAS(strsv)(args...);}}; template<> struct blas2 {template static v trsv(As... args) {BLAS(dtrsv)(args...);}}; template<> struct blas2 {template static v trsv(As... args) {BLAS(ctrsv)(args...);}}; template<> struct blas2 {template static auto trsv(As... args) -> decltype(BLAS(ztrsv)(args...)) {BLAS(ztrsv)(args...);}}; -namespace core { - template - v trsv(C ulA, C transA, C diA, S n, TconstP A, S lda, TP X, S incx) { // NOLINT(readability-identifier-length) conventional BLAS naming - blas2::element_type>>::trsv(ulA, transA, diA, n, A, lda, X, incx); - } -} // end namespace core - -//#undef xgemv -#undef xger -#undef xgeru -#undef xgerc - /////////////////////////////////////////////////////////////////////////////// // LEVEL 3 -#if 0 -#define xsyrk(T) \ -template v syrk( UL ul, C transA, S n, S k, T alpha, T const* A, S lda, T beta, T* CC, S ldc){ \ - MULTI_MARK_SCOPE("cpu_syrk"); BLAS(T##syrk)( ul, transA, BC(n), BC(k), alpha, A, BC(lda), beta, CC, BC(ldc));} -#endif - namespace core { using std::is_convertible_v; @@ -420,54 +386,52 @@ using std::max; #define xsyrk(T) \ template::element_type, class BETA, class CCP, class CC = typename pointer_traits::element_type, \ enable_if_t< \ - is_##T{} and is_##T{} and is_assignable{} and \ - is_convertible_v and is_convertible_v \ + is_##T{} && is_##T{} && is_assignable{} && \ + is_convertible_v && is_convertible_v \ , int> =0> \ v syrk( UL uplo, C transA, S n, S k, ALPHA const* alpha, AAP aa, S lda, BETA const* beta, CCP cc, S ldc) /*NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length)*/ \ /*=delete;*/ \ { \ - if(transA == 'N' or transA == 'n') {MULTI_ASSERT1( lda >= max(1L, n) );} \ - if(transA != 'N' and transA != 'n') {MULTI_ASSERT1( lda >= max(1L, k) );} \ - MULTI_ASSERT1( ldc >= max(1L, n) ); \ - MULTI_MARK_SCOPE("cpu_herk"); \ - BLAS(T##syrk)( uplo, transA, BC(n), BC(k), *(T const*)alpha, aa, BC(lda), *(T const*)beta, cc, BC(ldc)); \ + if(transA == 'N' || transA == 'n') {BOOST_MULTI_ASSERT1( lda >= max(1L, n) );} \ + if(transA != 'N' && transA != 'n') {BOOST_MULTI_ASSERT1( lda >= max(1L, k) );} \ + BOOST_MULTI_ASSERT1( ldc >= max(1L, n) ); \ + BLAS(T##syrk)( uplo, transA, BC(n), BC(k), *reinterpret_cast(alpha), aa, BC(lda), *reinterpret_cast(beta), cc, BC(ldc)); /*NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)*/ \ } \ #define xherk(T) \ template::element_type, class BETA, class CCP, class CC = typename pointer_traits::element_type, class Real = typename T::value_type, \ enable_if_t< \ - is_##T{} and is_##T{} and is_assignable{} and \ - is_convertible_v and is_convertible_v \ + is_##T{} && is_##T{} && is_assignable{} && \ + is_convertible_v && is_convertible_v \ , int> =0> \ -v herk( UL uplo, C transA, S n, S k, ALPHA const* alpha, AAP aa, S lda, BETA const* beta, CCP cc, S ldc) /*NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length)*/ \ -/*=delete;*/ \ +v herk( UL uplo, C transA, S n, S k, ALPHA const* alpha, AAP aa, S lda, BETA const* beta, CCP cc, S ldc) /*NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length)*/ \ +/*=delete;*/ \ { \ - if(transA == 'N' or transA == 'n') {MULTI_ASSERT1( lda >= max(1L, n) );} /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - if(transA != 'N' and transA != 'n') {MULTI_ASSERT1( lda >= max(1L, k) );} \ - MULTI_ASSERT1( ldc >= max(1L, n) ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - MULTI_MARK_SCOPE("cpu_herk"); \ - BLAS(T##herk)( uplo, transA, BC(n), BC(k), *(Real const*)alpha, aa, BC(lda), *(Real const*)beta, cc, BC(ldc)); \ + if(transA == 'N' || transA == 'n') {BOOST_MULTI_ASSERT1( lda >= max(1L, n) );} /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + if(transA != 'N' && transA != 'n') {BOOST_MULTI_ASSERT1( lda >= max(1L, k) );} \ + BOOST_MULTI_ASSERT1( ldc >= max(1L, n) ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + /*BOOST_MULTI_MARK_SCOPE("cpu_herk");*/ \ + BLAS(T##herk)( uplo, transA, BC(n), BC(k), *reinterpret_cast(alpha), aa, BC(lda), *reinterpret_cast(beta), cc, BC(ldc)); /*NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)*/ \ } \ -#define xgemm(T) \ +#define xgemm(T) \ template::element_type, class BBP, class BB = typename pointer_traits::element_type, class BETA, class CCP, class CC = typename pointer_traits::element_type, \ enable_if_t< \ - is_##T{} and is_##T{} and is_##T{} and is_assignable{} and \ - is_convertible_v and is_convertible_v and is_convertible_v \ -, int> =0> \ + is_##T{} && is_##T{} && is_##T{} && \ + is_convertible_v && is_convertible_v && is_convertible_v \ +, int> =0> \ v gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { /*NOLINT(bugprone-easily-swappable-parameters)*/ \ - MULTI_MARK_SCOPE("cpu_gemm"); \ using std::max; \ - if(transA == 'N') {MULTI_ASSERT1(lda >= max(1L, m));} \ - if(transA != 'N') {MULTI_ASSERT1(lda >= max(1L, k));} \ - if(transB == 'N') {MULTI_ASSERT1(ldb >= max(1L, k));} \ - if(transB != 'N') {MULTI_ASSERT1(ldb >= max(1L, n));} \ - MULTI_ASSERT1( aa != cc ); \ - MULTI_ASSERT1( bb != cc ); \ - if(not( ldc >= max(1L, m) )) {throw std::logic_error("failed 'ldc >= max(1L, m)' with ldc = "+ std::to_string(ldc) +" and m = "+ std::to_string(m));} \ - if(*beta != 0.) {MULTI_ASSERT1((is_assignable{}));} \ - BLAS(T##gemm)(transA, transB, BC(m), BC(n), BC(k), *(T const*)alpha, (T const*)static_cast(aa), BC(lda), (T const*)static_cast(bb), BC(ldb), *(T const*)beta, (T*)static_cast(cc), BC(ldc)); \ -} \ + if(transA == 'N') {BOOST_MULTI_ASSERT1(lda >= max(1L, m));} \ + if(transA != 'N') {BOOST_MULTI_ASSERT1(lda >= max(1L, k));} \ + if(transB == 'N') {BOOST_MULTI_ASSERT1(ldb >= max(1L, k));} \ + if(transB != 'N') {BOOST_MULTI_ASSERT1(ldb >= max(1L, n));} \ + BOOST_MULTI_ASSERT1( aa != cc ); \ + BOOST_MULTI_ASSERT1( bb != cc ); \ + if(!( ldc >= max(1L, m) )) {throw std::logic_error("failed 'ldc >= max(1L, m)' with ldc = "+ std::to_string(ldc) +" and m = "+ std::to_string(m));} \ + if(*beta != 0.0) {BOOST_MULTI_ASSERT1((is_assignable()*std::declval()*std::declval() + std::declval()*std::declval())> {}));} \ + BLAS(T##gemm)(transA, transB, BC(m), BC(n), BC(k), *reinterpret_cast(alpha), reinterpret_cast(static_cast(aa)), BC(lda), reinterpret_cast(static_cast(bb)), BC(ldb), *reinterpret_cast(beta), (T*)(static_cast(cc)) /*NOLINT(cppcoreguidelines-pro-type-reinterpret-cast)*/ /*TODO(correaa) check constness*/, BC(ldc)); \ +} \ // NOLINTNEXTLINE(readability-identifier-length) conventional BLAS name xgemm(s) xgemm(d) xgemm(c) xgemm(z) // NOLINT(readability-function-cognitive-complexity) : 36 of 25 @@ -476,21 +440,20 @@ xgemm(s) xgemm(d) xgemm(c) xgemm(z) // NOLINT(readability-function-cognitive-co #define xtrsm(T) \ template::element_type, class BBP, class BB = typename pointer_traits::element_type, \ enable_if_t< \ - is_##T{} and is_##T{} and is_assignable{} and is_assignable{} and \ - is_convertible_v and is_convertible_v \ + is_##T{} && is_##T{} && is_assignable{} && is_assignable{} && \ + is_convertible_v && is_convertible_v \ ,int> =0> \ v trsm(char side, char uplo, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb) { /*NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length)*/ \ - MULTI_MARK_SCOPE("cpu_trsm"); \ - assert( side == 'L' or side == 'R' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - assert( uplo == 'U' or uplo == 'L' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - assert( transA == 'N' or transA == 'T' or transA == 'C' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - assert( diag == 'U' or diag == 'N' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - MULTI_ASSERT1( m >= 0 and n >= 0 ); \ + assert( side == 'L' || side == 'R' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + assert( uplo == 'U' || uplo == 'L' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + assert( transA == 'N' || transA == 'T' || transA == 'C' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + assert( diag == 'U' || diag == 'N' ); /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + BOOST_MULTI_ASSERT1( m >= 0 && n >= 0 ); \ using std::max; \ - if(side == 'L') {MULTI_ASSERT1( lda >= max(ssize_t{1}, m) );} /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ - if(side == 'R') {MULTI_ASSERT1( lda >= max(ssize_t{1}, n) );} \ - MULTI_ASSERT1( ldb >= max(ssize_t{1}, m) ); \ - BLAS(T##trsm)(side, uplo, transA, diag, BC(m), BC(n), alpha, (T const*)static_cast(aa), BC(lda), (T*)static_cast(bb), BC(ldb)); \ + if(side == 'L') {BOOST_MULTI_ASSERT1( lda >= max(ssize_t{1}, m) );} /* NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay)*/ \ + if(side == 'R') {BOOST_MULTI_ASSERT1( lda >= max(ssize_t{1}, n) );} \ + BOOST_MULTI_ASSERT1( ldb >= max(ssize_t{1}, m) ); \ + BLAS(T##trsm)(side, uplo, transA, diag, BC(m), BC(n), alpha, reinterpret_cast(static_cast(aa)), BC(lda), reinterpret_cast(static_cast(bb)), BC(ldb)); /*NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,bugprone-macro-parentheses)*/ \ } \ xtrsm(s) xtrsm(d) xtrsm(c) xtrsm(z) // NOLINT(readability-function-cognitive-complexity) : 29 of 25 @@ -502,22 +465,45 @@ xsyrk(s) xsyrk(d) xsyrk(c) xsyrk(z) } // end namespace core -#undef xsyrk #undef xherk -#undef xtrsm #undef BC struct context { // stateless (and thread safe) + template + static auto scal(As... args) + ->decltype(core::scal(args...)) { + return core::scal(args...); } + + template + static auto copy(As... args) noexcept + ->decltype(core::copy(args...)) { + return core::copy(args...); } + + template + static auto swap(As... args) noexcept + ->decltype(core::swap(args...)) { + return core::swap(args...); } + template static auto axpy(As... args) ->decltype(core::axpy(args...)) { return core::axpy(args...); } template - static auto gemv(As... args) - ->decltype(core::gemv(args...)) { - return core::gemv(args...); } + static auto dot(As... args) + ->decltype(core::dot(args...)) { + return core::dot(args...); } + + template + static auto dotc(As... args) + ->decltype(core::dotc(args...)) { + return core::dotc(args...); } + + template + static auto dotu(As... args) + ->decltype(core::dotu(args...)) { + return core::dotu(args...); } template static auto gemm(As&&... args) @@ -525,22 +511,22 @@ struct context { // stateless (and thread safe) return core::gemm(std::forward(args)...); } template - static auto dot(As&&... args) - ->decltype(core::dot(std::forward(args)...)) { - return core::dot(std::forward(args)...); } + static auto gemv(As&&... args) + ->decltype(core::gemv(std::forward(args)...)) { + return core::gemv(std::forward(args)...); } template - static auto dotc(As&&... args) - ->decltype(core::dotc(std::forward(args)...)) { - return core::dotc(std::forward(args)...); } + static auto asum(As... args) + ->decltype(core::asum(args...)) { + return core::asum(args...); } template - static auto dotu(As&&... args) - ->decltype(core::dotu(std::forward(args)...)) { - return core::dotu(std::forward(args)...); } + static auto nrm2(As... args) + ->decltype(core::nrm2(args...)) { + return core::nrm2(args...); } template - static auto trsm(As&&... args) + static auto trsm(As&&... args) // TODO(correaa) remove && ->decltype(core::trsm(std::forward(args)...)) { return core::trsm(std::forward(args)...); } @@ -559,13 +545,6 @@ template<> struct is_context : std::true_type {}; template<> struct is_context : std::true_type {}; -namespace core { -template -auto copy(Context&& /*unused*/, As... args) -->decltype(core::copy(args...)) { - return core::copy(args...); } -} // end namespace core - template::element_type*>{}, int> =0> auto default_context_of(TPtr const& /*unused*/) -> blas::context* { static blas::context dc; diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda.hpp_ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda.hpp_ similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda.hpp_ rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda.hpp_ diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/gemm.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/gemm.cpp similarity index 83% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/gemm.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/gemm.cpp index 5bdaf7b630..0d77b46c25 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/gemm.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/gemm.cpp @@ -1,18 +1,13 @@ #ifdef COMPILATION_INSTRUCTIONS /usr/local/cuda-11.1/bin/nvcc -x cu -std=c++17 -use_fast_math -lpthread -D_REENTRANT -DBOOST_PP_VARIADICS -Xcudafe "--diag_suppress=implicit_return_from_non_void_function" --extended-lambda --expt-relaxed-constexpr $0 -o $0x `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework -DBOOST_LOG_DYN_LINK -lboost_log -lboost_thread -lboost_system -lboost_log_setup -lpthread -lboost_timer&&$0x&&rm $0x; exit #endif -// © Alfredo A. Correa 2020-2021 +// © Alfredo A. Correa 2020-2024 #define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS gemm" #define BOOST_TEST_DYN_LINK #include #include -//#include"boost/log/trivial.hpp" -//#define MULTI_MARK_SCOPE(MsG) BOOST_LOG_TRIVIAL(trace)<; complex const I{0, 1}; namespace blas = multi::blas; multi::array const a = { - {1. + 2.*I, 5. + 2.*I}, - {9. - 1.*I, 9. + 1.*I}, - {1. + 1.*I, 2. + 2.*I} + {1.0 + 2.0*I, 5.0 + 2.0*I}, + {9.0 - 1.0*I, 9.0 + 1.0*I}, + {1.0 + 1.0*I, 2.0 + 2.0*I} }; multi::array const b = { - { 11. - 2.*I, 5. + 2.*I}, - { 7. - 3.*I, 2. + 1.*I}, - { 8. - 1.*I, 1. + 1.*I} + { 11.0 - 2.0*I, 5.0 + 2.0*I}, + { 7.0 - 3.0*I, 2.0 + 1.0*I}, + { 8.0 - 1.0*I, 1.0 + 1.0*I} }; { { multi::array c({2, 2}); c = blas::gemm(1., blas::H(a), b); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 125.-84.*I ); + BOOST_REQUIRE( c[1][0] == 125.0 - 84.0*I ); } } { @@ -49,11 +44,11 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_complex_3x2_3x2){ { multi::cuda::array c_gpu({2, 2}); c_gpu = blas::gemm(1., blas::H(a_gpu), b_gpu); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I ); + BOOST_REQUIRE( c_gpu[1][0] == 125.0 - 84.0*I ); } { auto c_gpu =+ blas::gemm(1.0, blas::H(a_gpu), b_gpu); - BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I ); + BOOST_REQUIRE( c_gpu[1][0] == 125.0 - 84.0*I ); } } { @@ -62,11 +57,11 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_complex_3x2_3x2){ { multi::cuda::managed::array c_gpu({2, 2}); blas::gemm(1., blas::H(a_gpu), b_gpu, 0., c_gpu); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I ); + BOOST_REQUIRE( c_gpu[1][0] == 125.0 - 84.0*I ); } { auto c_gpu =+ blas::gemm(1.0, blas::H(a_gpu), b_gpu); - BOOST_REQUIRE( c_gpu[1][0] == 125.-84.*I ); + BOOST_REQUIRE( c_gpu[1][0] == 125.0 - 84.0*I ); } } } @@ -123,7 +118,7 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_complex_3x2_3x2){ //} #if 0 -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_context_timing){ +BOOST_AUTO_TEST_CASE(const multi_adaptors_blas_cuda_gemm_context_timing){ using complex = std::complex;//complex const I{0, 1}; multi::array A({1000, 1000}); @@ -142,7 +137,7 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_context_timing){ boost::timer::auto_cpu_timer t; // 2.398206s for(auto i = 0; i != 10; ++i){ blas::context ctx; - blas::gemm(ctx, 1, A, B, 0, C); + blas::gemm(ctx, 1.0, A, B, 0.0, C); } } using device_array = multi::cuda::array; @@ -152,7 +147,7 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_context_timing){ boost::timer::auto_cpu_timer t; // 0.707426s for(auto i = 0; i != 10; ++i){ multi::cublas::context ctx; - blas::gemm(ctx, 1, A_gpu, B_gpu, 0, C_gpu); + blas::gemm(ctx, 1.0, A_gpu, B_gpu, 0.0, C_gpu); } } { @@ -160,8 +155,7 @@ BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_gemm_context_timing){ boost::timer::auto_cpu_timer t; // 0.613534s multi::cublas::context ctx; - for(auto i = 0; i != 10; ++i) blas::gemm(ctx, 1, A_gpu, B_gpu, 0, C_gpu); + for(auto i = 0; i != 10; ++i) blas::gemm(ctx, 1.0, A_gpu, B_gpu, 0.0, C_gpu); } } #endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/gemm.su b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/gemm.su similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/gemm.su rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/gemm.su diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/iamax.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/iamax.cpp new file mode 100644 index 0000000000..e407fb50d4 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/cuda/tests/iamax.cpp @@ -0,0 +1,31 @@ +// © Alfredo A. Correa 2019-2024 + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS iamax" +#define BOOST_TEST_DYN_LINK + +#include + +#include "../../../../adaptors/blas.hpp" +#include "../../../../adaptors/cuda.hpp" +#include "../../../../adaptors/blas/cuda.hpp" + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(const multi_adaptors_blas_cuda_iamax){ + using complex = std::complex; complex const I{0.0, 1.0}; + { + multi::array const A = {1.0 + 2.0*I, 2.0, 3.0 + 3.0*I, 4.0}; + using multi::blas::iamax; + BOOST_REQUIRE( iamax(A) == 2 ); + } + { + multi::cuda::array const A = {1.0 + 2.0*I, 2.0, 3.0 + 3.0*I, 4.0}; + using multi::blas::iamax; + BOOST_REQUIRE( iamax(A) == 2 ); + } + { + multi::cuda::managed::array const A = {1.0 + 2.0*I, 2.0, 3.0 + 3.0*I, 4.0}; + using multi::blas::iamax; + BOOST_REQUIRE( iamax(A) == 2 ); + } +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/dot.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/dot.hpp similarity index 67% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/dot.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/dot.hpp index 86a6c95269..43a3aac718 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/dot.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/dot.hpp @@ -1,8 +1,9 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_ADAPTORS_BLAS_DOT_HPP -#define MULTI_ADAPTORS_BLAS_DOT_HPP +#ifndef BOOST_MULTI_ADAPTORS_BLAS_DOT_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_DOT_HPP #include "../blas/core.hpp" #include "../blas/numeric.hpp" // for is_complex @@ -16,14 +17,15 @@ using core::dotc; template auto dot_n(Context&& ctxt, XIt x_first, Size count, YIt y_first, RPtr rp) { - if constexpr(is_complex{}) { - if constexpr(!is_conjugated{} and !is_conjugated{}) {std::forward(ctxt)->dotu(count, base(x_first) , stride(x_first), base(y_first), stride(y_first), rp);} - else if constexpr(!is_conjugated{} and is_conjugated{}) {std::forward(ctxt)->dotc(count, underlying(base(y_first)), stride(y_first), base(x_first), stride(x_first), rp);} - else if constexpr( is_conjugated{} and !is_conjugated{}) {std::forward(ctxt)->dotc(count, underlying(base(x_first)), stride(x_first), base(y_first), stride(y_first), rp);} - else if constexpr( is_conjugated{} and is_conjugated{}) {static_assert(!sizeof(XIt*), "not implemented in blas");} - } else { + if constexpr(! is_complex{}) { std::forward(ctxt)->dot (count, base(x_first) , stride(x_first), base(y_first), stride(y_first), rp); + } else { + if constexpr(!is_conjugated{} && !is_conjugated{}) {std::forward(ctxt)->dotu(count, base(x_first) , stride(x_first), base(y_first), stride(y_first), rp);} + else if constexpr(!is_conjugated{} && is_conjugated{}) {std::forward(ctxt)->dotc(count, underlying(base(y_first)), stride(y_first), base(x_first), stride(x_first), rp);} + else if constexpr( is_conjugated{} && !is_conjugated{}) {std::forward(ctxt)->dotc(count, underlying(base(x_first)), stride(x_first), base(y_first), stride(y_first), rp);} + else if constexpr( is_conjugated{} && is_conjugated{}) {static_assert(!sizeof(XIt*), "not implemented in blas");} } + struct{XIt x_last; YIt y_last;} ret{x_first + count, y_first + count}; return ret; } @@ -39,10 +41,16 @@ auto dot_n(XIt x_first, Size count, YIt y_first, RPtr rp) {//->decltype(dot_n(bl } } -template -auto dot(Context&& ctxt, X1D const& x, Y1D const& y, R&& res) -> R&& { // NOLINT(readability-identifier-length) res = \sum_i x_i y_i +template>::value, int> =0> +auto dot(Context ctxt, X1D const& x, Y1D const& y, R&& res) -> R&& { // NOLINT(readability-identifier-length) res = \sum_i x_i y_i + assert( size(x) == size(y) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + return blas::dot_n(ctxt, begin(x), size(x), begin(y), &res), std::forward(res); +} + +template>::value, int> =0> +auto dot(Context ctxt, X1D const& x, Y1D const& y, R&& res) -> R&& { // NOLINT(readability-identifier-length) res = \sum_i x_i y_i assert( size(x) == size(y) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - return blas::dot_n(std::forward(ctxt), begin(x), size(x), begin(y), &res), std::forward(res); + return blas::dot_n(ctxt, begin(x), size(x), begin(y), res.base()), std::forward(res); } template @@ -50,10 +58,10 @@ auto dot(X1D const& x, Y1D const& y, R&& res) -> R&& { // NOLINT(readability-id assert( size(x) == size(y) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) if constexpr(is_conjugated{}) { auto ctxtp = blas::default_context_of(underlying(x.base())); - return blas::dot(ctxtp, x, y, res); + return blas::dot(ctxtp, x, y, std::forward(res)); } else { auto ctxtp = blas::default_context_of( x.base() ); - return blas::dot(ctxtp, x, y, res); + return blas::dot(ctxtp, x, y, std::forward(res)); } } @@ -68,7 +76,8 @@ class dot_ptr { dot_ptr(ContextPtr ctxt, ItX x_first, Size count, ItY y_first) : ctxt_{ctxt}, x_first_{x_first}, count_{count}, y_first_{y_first} {} public: -// dot_ptr(dot_ptr const&) = default; + constexpr explicit operator bool() const {return true;} + template friend constexpr auto copy_n(dot_ptr first, Size2 count, ItOut d_first) ->decltype(blas::dot_n(std::declval(), std::declval(), Size{} , std::declval(), d_first), d_first + count) { @@ -84,19 +93,23 @@ class dot_ptr { template> struct dot_ref : private Ptr { - using decay_type = decltype(typename X::value_type{}*typename Y::value_type{}); + using decay_type = decltype(std::declval()*std::declval()); dot_ref(ContextPtr ctxt, X const& x, Y const& y) : Ptr{ctxt, begin(x), size(x), begin(y)} { // NOLINT(readability-identifier-length) BLAS naming assert(( size(x) == size(y) )); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) } - constexpr auto operator&() const& -> Ptr const& {return *this;} // NOLINT(google-runtime-operator) reference type - auto decay() const& -> decay_type {decay_type ret; copy_n(operator&(), 1, &ret); return ret;} - operator decay_type() const& {return decay();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions,hicpp-explicit-conversion) to allow terse syntax -#if not defined(__CUDACC__) or not defined(__INTEL_COMPILER) + + constexpr auto operator&() const& -> Ptr const& {return *this;} // NOLINT(google-runtime-operator) reference type // NOSONAR + + auto decay() const -> decay_type {decay_type ret; copy_n(operator&(), 1, &ret); return ret;} // NOLINT(fuchsia-default-arguments-calls) + operator decay_type() const {return decay();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions,hicpp-explicit-conversion) //NOSONAR to allow terse syntax +#if ! defined(__CUDACC__) || ! defined(__INTEL_COMPILER) friend auto operator*(decay_type const& lhs, dot_ref const& self) {return lhs*self.decay();} #endif auto operator+() const -> decay_type {return decay();} + auto operator==(dot_ref const& other) const -> bool {return decay() == other.decay();} auto operator!=(dot_ref const& other) const -> bool {return decay() != other.decay();} + template auto operator==(Other const& other) const ->decltype(decay()==other) { @@ -108,7 +121,7 @@ struct dot_ref : private Ptr { }; template [[nodiscard]] -auto dot(Context const& ctxt, X const& x, Y const& y) { // NOLINT(readability-identifier-length) BLAS naming +auto dot(Context ctxt, X const& x, Y const& y) { // NOLINT(readability-identifier-length) BLAS naming return dot_ref{ctxt, x, y}; } diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/examples/gemv.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/examples/gemv.cpp new file mode 100644 index 0000000000..3cc7eb1fc9 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/examples/gemv.cpp @@ -0,0 +1,58 @@ +#include +#include + +#include + +int main() { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, hicpp-avoid-c-arrays, modernize-avoid-c-arrays) test legacy types + float matA[3][3] = { + {1.1, 2.2, 3.3}, + {4.4, 5.5, 6.6}, + {7.7, 8.8, 9.9}, + }; + float vecB[3] = {1.0, 2.0, 3.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + float vecC[3] = {0.0, 0.0, 0.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + + namespace multi = boost::multi; + + { // make references to c-arrays + multi::array_ref A{matA}; + multi::array_ref B{vecB}; + multi::array_ref C{vecC}; + + multi::blas::gemv(1.0, A, B, 0.0, C); // C is output + } + { // make references to c-arrays + auto const& A = multi::ref(matA); // deduce element type and dimensionality + auto const& B = multi::ref(vecB); + auto&& vecCref = multi::ref(vecC); + + multi::blas::gemv(1.0, A, B, 0.0, vecCref); // vecC holds the result + } + { // one liner + multi::blas::gemv(1.0, multi::ref(matA), multi::ref(vecB), 0.0, multi::ref(vecC)); // vecC holds the result + } + { // one liner with output + multi::ref(vecC) = multi::blas::gemv(1.0, multi::ref(matA), multi::ref(vecB)); + } + { // using the library, not references to c-arrays + multi::array A = { + {1.1, 2.2, 3.3}, + {4.4, 5.5, 6.6}, + {7.7, 8.8, 9.9}, + }; + multi::array B = {1.0, 2.0, 3.0}; + + multi::array C = multi::blas::gemv(1.0, A, B); + } + { + multi::array A = { + {1.1, 2.2, 3.3}, + {4.4, 5.5, 6.6}, + {7.7, 8.8, 9.9}, + }; + multi::array B = {1.0, 2.0, 3.0}; + + auto C =+ multi::blas::gemv(1.0, A, B); // create (allocate) the result in C + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/filling.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/filling.hpp new file mode 100644 index 0000000000..4d686bc2aa --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/filling.hpp @@ -0,0 +1,32 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_FILLING_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_FILLING_HPP + +#include + +#include +#include + +namespace boost::multi::blas { + +enum class filling : char { + lower = 'U', + upper = 'L' +}; + +inline auto flip(filling side) -> filling { + switch(side) { + case filling::lower: return filling::upper; + case filling::upper: return filling::lower; + } __builtin_unreachable(); // LCOV_EXCL_LINE +} + +inline auto operator-(filling side) -> filling {return flip(side);} +inline auto operator+(filling side) -> filling {return side;} + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemm.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemm.hpp new file mode 100644 index 0000000000..65f669704d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemm.hpp @@ -0,0 +1,349 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_GEMM_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_GEMM_HPP + +#include "../blas/core.hpp" +#include "../blas/gemv.hpp" +#include "../blas/numeric.hpp" +#include "../blas/operations.hpp" + +namespace boost::multi::blas { + +using core::gemm; + +template +auto xbase_aux(It const& it, std::true_type const& /*true */) +->decltype(underlying(base(it))) { + return underlying(base(it)); } + +template +auto xbase_aux(It const& it, std::false_type const& /*false*/) +->decltype(base(it)) { + return base(it); } + +template +auto xbase(It const& it) +->decltype(xbase_aux(it, std::integral_constant{}>{})) { + return xbase_aux(it, std::integral_constant{}>{}); } + +#define CTXT std::forward(ctxt) + +template{} && !is_conjugated{}), int> = 0 +> +auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) // NOLINT(readability-function-cognitive-complexity) : 125 +{ + assert( b_first->size() == c_first->size() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( a_first.stride()==1 || a_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( b_first.stride()==1 || b_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( c_first.stride()==1 || c_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + if(a_count == 0) { return c_first; } + + if (a_first->stride()==1 && b_first->stride()==1 && c_first->stride()==1) { + if ( a_count==1 && b_first->size()==1 ) {CTXT->gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );} + else if( a_count==1 ) {CTXT->gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );} + else {CTXT->gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first. stride());} + }else if(a_first->stride()==1 && b_first->stride()==1 && c_first. stride()==1) { + if (a_count==1) {CTXT->gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first->size() , &beta, base(c_first), a_first->size() );} + else {CTXT->gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first. stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first. stride()==1 && b_first->stride()==1 && c_first->stride()==1) { + if (a_count==1) {CTXT->gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), a_count );} + else {CTXT->gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first.stride());} + }else if(a_first. stride()==1 && b_first->stride()==1 && c_first. stride()==1) { + if (a_count==1) {CTXT->gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), a_first->size() , &beta, base(c_first), b_first->size() );} + else {CTXT->gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first. stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first->stride()==1 && b_first.stride()==1 && c_first. stride()==1) { + if (a_count==1 && b_first->size()==1 ) {CTXT->gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());} + else if(a_count==1) {CTXT->gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());} + else if(a_first->size() == 1 && b_first->size() == 1) + {CTXT->gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} + else {CTXT->gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first->stride()==1 && b_first. stride()==1 && c_first->stride()==1) { + if (a_count==1) {CTXT->gemm('T', 'N', a_count, c_first->size(), a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first. stride());} + else {CTXT->gemm('T', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first. stride());} + }else if(a_first. stride()==1 && b_first.stride( )==1 && c_first. stride()==1) { + if (b_first->size()==1) {CTXT->gemm('N', 'N', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first->stride(), &beta, base(c_first), a_count );} + else {CTXT->gemm('N', 'N', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first->stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first. stride()==1 && b_first.stride( )==1 && c_first->stride()==1) { + {CTXT->gemm('T', 'T', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());} + } else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + return c_first + a_count; +} + +template{} && is_conjugated{}), int> =0 +> +auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) // NOLINT(readability-function-cognitive-complexity) : 125 +{ + assert( b_first->size() == c_first->size() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( a_first.stride()==1 || a_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( b_first.stride()==1 || b_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( c_first.stride()==1 || c_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + if(a_count == 0) { return c_first; } + + if (a_first->stride()==1 && b_first->stride()==1 && c_first->stride()==1){ + {CTXT->gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());} + }else if(a_first->stride()==1 && b_first. stride()==1 && c_first->stride()==1){ + if (a_count==1) {CTXT->gemm('C', 'N', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());} + else {CTXT->gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first.stride(), &beta, base(c_first), c_first.stride());} + }else if(a_first->stride()==1 && b_first. stride()==1 && c_first. stride()==1){ + {CTXT->gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first. stride()==1 && b_first. stride()==1 && c_first. stride()==1){ + {CTXT->gemm('C', 'T', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first->stride());} + }else if(a_first. stride()==1 && b_first. stride()==1 && c_first->stride()==1){ + {CTXT->gemm('C', 'T', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());} + }else{assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + return c_first + a_count; +} + +template{} && !is_conjugated{}), int> =0 +> +auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) // NOLINT(readability-function-cognitive-complexity) : 125 +{ + assert( b_first->size() == c_first->size() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( a_first.stride()==1 || a_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( b_first.stride()==1 || b_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( c_first.stride()==1 || c_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + if(a_count == 0) { return c_first; } + + if (a_first. stride()==1 && b_first->stride()==1 && c_first->stride()==1){ + if (a_count==1) {CTXT->gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), a_first->size() );} + else {CTXT->gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first.stride());} + } else {throw std::logic_error{"not BLAS-implemented"};} + + return c_first + a_count; +} + +template{} && is_conjugated{}), int> =0 +> +auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) // NOLINT(readability-function-cognitive-complexity) : 125 +{ + assert( b_first->size() == c_first->size() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( a_first.stride()==1 || a_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( b_first.stride()==1 || b_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( c_first.stride()==1 || c_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + if(a_count == 0) { return c_first; } + if (a_first. stride()==1 && b_first. stride()==1 && c_first->stride()==1){ + {CTXT->gemm('C', 'C', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first. stride());} + } else {throw std::logic_error{"not BLAS-implemented"};} + return c_first + a_count; +} + +#undef CTXT + +template // TODO(correaa) automatic deduction of context +auto gemm_n(typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) +->decltype(gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first)) { + return gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first); } + +template +auto gemm(Context&& ctx, typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c) -> C&& { // NOLINT(readability-identifier-length) BLAS naming + assert( size( a) == size( c) ); + if(! a.is_empty()) {assert( size(~a) == size( b) );} + if constexpr(is_conjugated{}) {blas::gemm (std::forward(ctx), conj(alpha), conj(a), conj(b) , conj(beta), conj(c) );} + else {blas::gemm_n(std::forward(ctx), alpha , begin(a), size(a), begin(b), beta , begin(c));} + return std::forward(c); +} + +template +auto gemm(typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c) -> C&& { // NOLINT(readability-identifier-length) BLAS naming + if constexpr(is_conjugated{}) { + auto ctxt = blas::default_context_of(underlying(a.base())); + return gemm(ctxt, alpha, a, b, beta, std::forward(c)); + } else { + auto ctxt = blas::default_context_of(a.base()); + return gemm(ctxt, alpha, a, b, beta, std::forward(c)); + } +} + +template +class gemm_range; + +template +class gemm_reference { // TODO(correaa) implement this in terms of gemv_range? + Ext exts_; + + public: + explicit gemm_reference(Ext exts) : exts_{std::move(exts)} {} + auto extensions() const {return exts_;} + friend auto extensions(gemm_reference const& self) {return self.extensions();} +}; + +template +class gemm_iterator { + ContextPtr ctxtp_; + Scalar s_; + ItA a_it_; + ItB b_begin_; + gemm_iterator(ContextPtr ctxtp, Scalar s, ItA a_it, ItB b_begin) : ctxtp_{ctxtp}, s_{s}, a_it_{std::move(a_it)}, b_begin_{std::move(b_begin)} {} // NOLINT(readability-identifier-length) BLAS naming + template + friend class gemm_range; + + public: + gemm_iterator(gemm_iterator const&) = default; + gemm_iterator(gemm_iterator&&) noexcept = default; + ~gemm_iterator() = default; + + auto operator=(gemm_iterator&&) -> gemm_iterator& = delete; + auto operator=(gemm_iterator const&) -> gemm_iterator& = delete; + + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = std::nullptr_t; + using reference = gemm_referenceextensions())>; + using iterator_category = std::random_access_iterator_tag; + + static_assert( std::is_base_of::iterator_category>{} ); + + auto operator+=(difference_type n) -> gemm_iterator& {a_it_ += n; return *this;} + auto operator-=(difference_type n) -> gemm_iterator& {a_it_ -= n; return *this;} + + auto operator++() -> gemm_iterator& {return operator+=(1);} // required by random access concept requires even if not used explicitly + auto operator--() -> gemm_iterator& {return operator-=(1);} + + auto operator+(difference_type n) const {gemm_iterator ret{*this}; ret+=n; return ret;} + + friend auto operator-(gemm_iterator const& a, gemm_iterator const& b) -> difference_type { // NOLINT(readability-identifier-length) BLAS naming + assert(a.b_begin_ == b.b_begin_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + return a.a_it_ - b.a_it_; + } + friend auto operator==(gemm_iterator const& self, gemm_iterator const& other) -> bool {return self.a_it_ == other.a_it_;} + friend auto operator!=(gemm_iterator const& self, gemm_iterator const& other) -> bool {return self.a_it_ != other.a_it_;} + + template + friend auto copy_n(gemm_iterator const& first, difference_type count, ItOut d_first) + ->decltype(blas::gemm_n(std::declval(), std::declval() , std::declval(), count, std::declval(), 0.0, d_first)) try { // std::complex NOLINT(fuchsia-default-arguments-calls) + return blas::gemm_n(first.ctxtp_ , static_cast(first.s_), first.a_it_ , count, first.b_begin_ , 0.0, d_first); // NOLINT(fuchsia-default-arguments-calls) + } catch(std::exception const& e) { + using namespace std::string_literals; + throw std::logic_error( + "in "s + __PRETTY_FUNCTION__ +"\nCouldn't decay product of arrays of size "+ std::to_string(count) +"x"+ std::to_string(first.a_it_->size()) + " and " + // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + std::to_string(first.a_it_->size())+ "x" +std::to_string(first.b_begin_->size()) + " into " + std::to_string(count) +"x" + std::to_string(first.b_begin_->size()) + + "\nbecause\n" + e.what() + ); + } + + template + friend auto copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first) {assert(first.s_ == last.s_); + return copy_n(first, last - first, d_first); + } + + template + friend auto uninitialized_copy_n(gemm_iterator const& first, difference_type count, ItOut d_first) { + return copy_n(first, count, d_first); + } + + template + friend auto uninitialized_copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first) { + assert( first.s_ == last.s_ ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + return uninitialized_copy_n(first, last - first, d_first); + } + + auto operator*() const {return reference{b_begin_->extensions()};} +}; + +template +class gemm_range { + ContextPtr ctxtp_; + Scalar s_; + ItA a_begin_; + ItA a_end_; + ItB b_begin_; + + public: + gemm_range(gemm_range const&) = delete; + gemm_range(gemm_range&&) = delete; + auto operator=(gemm_range const&) -> gemm_range& = delete; + auto operator=(gemm_range&&) -> gemm_range& = delete; + ~gemm_range() = default; + + gemm_range(ContextPtr ctxtp, Scalar s, ItA a_first, ItA a_last, ItB b_first) // NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length) BLAS naming + : ctxtp_{ctxtp} + , s_{s}, a_begin_{std::move(a_first)}, a_end_{std::move(a_last)} + , b_begin_{std::move(b_first)} + {} + + using iterator = gemm_iterator; + using decay_type = DecayType; + using size_type = typename decay_type::size_type; + + auto begin() const& -> iterator {return {ctxtp_, s_, a_begin_, b_begin_};} + auto end() const& -> iterator {return {ctxtp_, s_, a_end_ , b_begin_};} + friend auto begin(gemm_range const& self) {return self.begin();} + friend auto end (gemm_range const& self) {return self.end ();} + + auto size() const -> size_type {return a_end_ - a_begin_;} + + auto extensions() const -> typename decay_type::extensions_type {return size()*b_begin_->extensions();} + friend auto extensions(gemm_range const& self) {return self.extensions();} + + auto operator+() const -> decay_type {return *this;} // TODO(correaa) : investigate why return decay_type{*this} doesn't work + template + friend auto operator+=(Arr&& a, gemm_range const& self) -> Arr&& { // NOLINT(readability-identifier-length) BLAS naming + blas::gemm_n(self.ctxtp_, self.s_, self.a_begin_, self.a_end_ - self.a_begin_, self.b_begin_, 1., a.begin()); + return std::forward(a); + } + friend auto operator*(Scalar factor, gemm_range const& self) { + return gemm_range{self.ctxtp_, factor*self.s_, self.a_begin_, self.a_end_, self.b_begin_}; + } +}; + +template())>{}> > +auto gemm(ContextPtr ctxtp, Scalar s, A2D const& a, B2D const& b) // NOLINT(readability-identifier-length) BLAS naming +->gemm_range +{ + return {ctxtp, s, begin(a), end(a), begin(b)}; +} + +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template +auto gemm(Scalar s, A2D const& a, B2D const& b) { // NOLINT(readability-identifier-length) conventional BLAS naming + if constexpr(is_conjugated{}) { + auto ctxtp = blas::default_context_of(underlying(a.base())); + return blas::gemm(ctxtp, s, a, b); + } else { + auto ctxtp = blas::default_context_of(a.base()); + return blas::gemm(ctxtp, s, a, b); + } +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #pragma diagnostic pop + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +namespace operators { + template =0> + auto operator*(A2D const& A, B2D const& B) // NOLINT(readability-identifier-length) conventional BLAS names + ->decltype(blas::gemm(1.0, A, B)) { + return blas::gemm(1.0, A, B); } +} // end namespace operators + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemv.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemv.hpp new file mode 100644 index 0000000000..09c815cf82 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/gemv.hpp @@ -0,0 +1,202 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_GEMV_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_GEMV_HPP + +#include "../blas/core.hpp" +#include "../blas/dot.hpp" + +#include "./../../detail/../utility.hpp" + +namespace boost::multi::blas { + +using core::gemv; + +struct gemv_stride_error : std::logic_error { + using std::logic_error::logic_error; +}; + +template +auto gemv_n(Context ctxt, typename MIt::element a, MIt m_first, Size count, XIt x_first, typename MIt::element b, YIt y_first) { // NOLINT(readability-identifier-length) BLAS naming + assert(m_first->stride()==1 || m_first.stride()==1); // blas doesn't implement this case + assert( x_first.base() != y_first.base() ); + + if constexpr(! is_conjugated::value) { + if (m_first .stride()==1) {ctxt->gemv('N', count, m_first->size(), &a, m_first.base() , m_first->stride(), x_first.base(), x_first.stride(), &b, y_first.base(), y_first.stride());} + else if(m_first->stride()==1) {ctxt->gemv('T', m_first->size(), count, &a, m_first.base() , m_first. stride(), x_first.base(), x_first.stride(), &b, y_first.base(), y_first.stride());} + else {throw gemv_stride_error{"not BLAS-implemented"};} // LCOV_EXCL_LINE + } else { + if (m_first->stride()==1) {ctxt->gemv('C', m_first->size(), count, &a, underlying(m_first.base()), m_first. stride(), x_first.base(), x_first.stride(), &b, y_first.base(), y_first.stride());} + else {throw gemv_stride_error{"not BLAS-implemented"};} // LCOV_EXCL_LINE + } + + struct { + MIt m_last; + YIt y_last; + } ret{m_first + count, y_first + count}; + + return ret; +} + +template +auto gemv_n(A a, MIt m_first, Size count, XIt x_first, B b, YIt y_first) { // NOLINT(readability-identifier-length) BLAS naming + blas::context ctxt; + return gemv_n(&ctxt, static_cast(a), m_first, count, x_first, static_cast(b), y_first); +} + +template +auto gemv(Ctxt ctxt, A const& a, M const& m, V const& v, B const& b, W&& w) -> W&& { // NOLINT(readability-identifier-length) BLAS naming + assert(size( m) == size(w) ); + assert(size(~m) == size(v) ); + + gemv_n(ctxt, static_cast(a), begin(m), size(m), begin(v), static_cast(b), begin(w)); // NOLINT(fuchsia-default-arguments-calls) + + return std::forward(w); +} + +template +auto gemv(A const& a, M const& m, V const& v, B const& b, W&& w) -> W&& { // NOLINT(readability-identifier-length) BLAS naming + assert(size( m) == size(w) ); + + if constexpr(is_conjugated{}) { + auto ctxtp = blas::default_context_of(underlying(m.base())); + return blas::gemv(ctxtp, a, m, v, b, std::forward(w)); + } else { + auto ctxtp = blas::default_context_of(m.base()); + return blas::gemv(ctxtp, a, m, v, b, std::forward(w)); + } +} + +template +class gemv_iterator { + Scalar alpha_ = 1.0; + It2D m_it_; + It1D v_first_; + Context ctxt_; + + public: + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = void; + using reference = void; + using iterator_category = std::random_access_iterator_tag; + + friend auto operator-(gemv_iterator const& self, gemv_iterator const& other) -> difference_type { + assert(self.v_first_ == other.v_first_); + return self.m_it_ - other.m_it_; + } + template + friend auto copy_n(gemv_iterator first, difference_type count, It1DOut result){ + if constexpr(std::is_same_v) {blas::gemv_n( static_cast(first.alpha_), first.m_it_, count, first.v_first_, Scalar{0.0}, result);} // NOLINT(fuchsia-default-arguments-calls) + else {blas::gemv_n(first.ctxt_, static_cast(first.alpha_), first.m_it_, count, first.v_first_, Scalar{0.0}, result);} // NOLINT(fuchsia-default-arguments-calls) + return result + count; + } + template + friend auto copy(gemv_iterator first, gemv_iterator last, It1DOut result){return copy_n(first, last - first, result);} + template + friend auto uninitialized_copy(gemv_iterator first, gemv_iterator last, It1DOut result) { + #if defined(__cpp_lib_start_lifetime_as) + auto count = last - first; + // or use start_lifetime_as_array(std::addressof(*result), count); since this is always called on contiguos iterators + for(; count > 0; ++result, --count) { + std::start_lifetime_as(std::addressof(*result)); + } + #endif + return copy(first, last, result); + } + gemv_iterator(Scalar alpha, It2D m_it, It1D v_first, Context ctxt) + : alpha_{alpha}, m_it_{std::move(m_it)}, v_first_{std::move(v_first)}, ctxt_{ctxt} {} + auto operator*() const { return value_type{0.0}; } // could be std::complex NOLINT(fuchsia-default-arguments-calls) +}; + +template +class gemv_range { + Scalar alpha_{1.0}; + It2D m_begin_; + It2D m_end_; + It1D v_first_; + Context ctxt_; + + public: + gemv_range(gemv_range&&) noexcept = default; + gemv_range(gemv_range const&) = delete; + ~gemv_range() = default; + auto operator=(gemv_range const&) = delete; + auto operator=(gemv_range&&) = delete; + + gemv_range(Scalar alpha, It2D m_first, It2D m_last, It1D v_first) // NOLINT(bugprone-easily-swappable-parameters) + : alpha_{alpha}, m_begin_{std::move(m_first)}, m_end_{std::move(m_last)}, v_first_{std::move(v_first)} { + assert(m_begin_.stride() == m_end_.stride()); + } + gemv_range(Context ctxt, Scalar alpha, It2D m_first, It2D m_last, It1D v_first) // NOLINT(bugprone-easily-swappable-parameters) + : alpha_{alpha} + , m_begin_{std::move(m_first)}, m_end_{std::move(m_last)} + , v_first_{std::move(v_first)} + , ctxt_{std::move(ctxt)} { + assert(m_begin_.stride() == m_end_.stride()); + } + using iterator = gemv_iterator; + using decay_type = DecayType; + + auto begin() const -> iterator{return {alpha_, m_begin_, v_first_, ctxt_};} + auto end() const -> iterator{return {alpha_, m_end_ , v_first_, ctxt_};} + + auto size() const -> size_type{return end() - begin();} + auto extensions() const -> typename decay_type::extensions_type{return typename decay_type::extensions_type{{0, size()}};} + auto decay() const{return decay_type{*this};} + + friend auto operator+(gemv_range const& self) {return self.decay();} + template + friend auto operator+=(V&& v, gemv_range const& s) -> V&& { // NOLINT(readability-identifier-length) BLAS naming + blas::gemv_n(s.ctxt_, static_cast(s.alpha_), s.m_begin_, s.m_end_ - s.m_begin_, s.v_first_, static_cast(1.0), v.begin()); + return std::forward(v); + } +}; + +template +auto gemv(Context ctxt, Scalar s, M const& m, V const& v) { // NOLINT(readability-identifier-length) BLAS naming + assert(size(~m) == size(v)); + return gemv_range(ctxt, s, m.begin(), m.end(), v.begin()); +} + +template +auto gemv(Scalar s, M const& m, V const& v) // NOLINT(readability-identifier-length) BLAS naming +{ + if constexpr(is_conjugated{}) { + auto ctxtp = blas::default_context_of(underlying(m.base())); + return blas::gemv(ctxtp, s, m, v); + } else { + auto ctxtp = blas::default_context_of(m.base()); + return blas::gemv(ctxtp, s, m, v); + } +} + +template +struct scaled_matrix { + T aa_; + Matrix const& A_; // NOLINT(readability-identifier-length,cppcoreguidelines-avoid-const-or-ref-data-members) BLAS naming + + template + friend auto operator%(scaled_matrix const& aaA, Vector const& x) { // NOLINT(readability-identifier-length) BLAS naming + return blas::gemv(aaA.aa_, aaA.A_, x); + } +}; + +namespace operators { + template + auto operator%(M const& m, V const& v) // NOLINT(readability-identifier-length) BLAS naming + ->decltype(+blas::gemv(1.0, m, v)) { + return +blas::gemv(1.0, m, v); } + + template =0> + auto operator*(typename Matrix::element_type aa, Matrix const& A) { // NOLINT(readability-identifier-length) BLAS naming + return scaled_matrix{aa, A}; + } + +} // end namespace operators + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/ger.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/ger.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/ger.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/ger.hpp diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/herk.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/herk.hpp similarity index 55% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/herk.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/herk.hpp index f58568cc2f..dee621b679 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/herk.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/herk.hpp @@ -1,8 +1,8 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa -#ifndef MULTI_ADAPTORS_BLAS_HERK_HPP -#define MULTI_ADAPTORS_BLAS_HERK_HPP +#ifndef BOOST_MULTI_ADAPTORS_BLAS_HERK_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_HERK_HPP +#pragma once #include "../blas/copy.hpp" #include "../blas/core.hpp" @@ -13,15 +13,15 @@ namespace boost::multi::blas { -template{}, int> =0> +template{}, int> =0> auto base_aux(A&& array) -->decltype(base(array)) { - return base(array); } +->decltype(base(std::forward(array))) { + return base(std::forward(array)); } template{}, int> =0> auto base_aux(A&& array) -->decltype(underlying(base(array))) { - return underlying(base(array)); } +->decltype(underlying(base(std::forward(array)))) { + return underlying(base(std::forward(array))); } using core::herk; @@ -32,36 +32,38 @@ auto herk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c) -> C2D&& { if(c.is_empty()) {return std::forward(c);} if constexpr(is_conjugated{}) { herk(flip(c_side), alpha, a, beta, hermitized(c)); + return std::forward(c); + } + + auto base_a = base_aux(a); // NOLINT(llvm-qualified-auto,readability-qualified-auto) TODO(correaa) + auto base_c = base_aux(c); // NOLINT(llvm-qualified-auto,readability-qualified-auto) TODO(correaa) + if constexpr(is_conjugated{}) { + // auto& ctxt = *blas::default_context_of(underlying(a.base())); + // if you get an error here might be due to lack of inclusion of a header file with the backend appropriate for your type of iterator + if (stride(a)==1 && stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));} + else if(stride(a)==1 && stride(c)==1) { + if(size(a)==1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));} + else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + } + else if(stride(a)!=1 && stride(c)==1) {herk(c_side==filling::upper?'U':'L', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(rotated(c)));} + else if(stride(a)!=1 && stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride( c ));} + else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) } else { - auto base_a = base_aux(a); - auto base_c = base_aux(c); // static_assert( not is_conjugated{}, "!" ); - if constexpr(is_conjugated{}) { - // auto& ctxt = *blas::default_context_of(underlying(a.base())); - // if you get an error here might be due to lack of inclusion of a header file with the backend appropriate for your type of iterator - if (stride(a)==1 and stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));} - else if(stride(a)==1 and stride(c)==1) { - if(size(a)==1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(c));} - else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - } - else if(stride(a)!=1 and stride(c)==1) {herk(c_side==filling::upper?'U':'L', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(rotated(c)));} - else if(stride(a)!=1 and stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride( c ));} - else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - } else { - // auto& ctxt = *blas::default_context_of( a.base() ); - if (stride(a)!=1 and stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(c));} - else if(stride(a)!=1 and stride(c)==1) { - if(size(a)==1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));} - else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - } - else if(stride(a)==1 and stride(c)!=1) {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - else if(stride(a)==1 and stride(c)==1) {herk(c_side==filling::upper?'U':'L', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));} - // else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + // auto& ctxt = *blas::default_context_of( a.base() ); + if (stride(a)!=1 && stride(c)!=1) {herk(c_side==filling::upper?'L':'U', 'C', size(c), size(rotated(a)), &alpha, base_a, stride( a ), &beta, base_c, stride(c));} + else if(stride(a)!=1 && stride(c)==1) { + if(size(a)==1) {herk(c_side==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));} + else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) } + else if(stride(a)==1 && stride(c)!=1) {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + else if(stride(a)==1 && stride(c)==1) {herk(c_side==filling::upper?'U':'L', 'N', size(c), size(rotated(a)), &alpha, base_a, stride(rotated(a)), &beta, base_c, stride(rotated(c)));} + // else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) } + return std::forward(c); } -template{}, int> =0> +template{}, int> =0> auto herk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c) // NOLINT(readability-identifier-length) BLAS naming ->decltype(syrk(c_side, alpha, a, beta, std::forward(c))) { return syrk(c_side, alpha, a, beta, std::forward(c)); } diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/herk.su b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/herk.su similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/herk.su rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/herk.su diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/iamax.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/iamax.hpp new file mode 100644 index 0000000000..fef8158725 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/iamax.hpp @@ -0,0 +1,38 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_IAMAX_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_IAMAX_HPP + +#include "../blas/core.hpp" + +namespace boost::multi::blas { + +template +auto iamax_n(It first, Size n) { + using core::iamax; + return iamax(n, base(first), stride(first)); // if you get an error here make sure that you are including (and linking) the appropriate BLAS backend for your memory type +} + +template +auto iamax(It first, It last) + -> decltype(iamax_n(first, std::distance(first, last))) { + return iamax_n(first, std::distance(first, last)); +} + +template +auto iamax(X1D const& x) // NOLINT(readability-identifier-length) x conventional blas name + -> decltype(iamax(begin(x), end(x))) { + assert(! offset(x)); + return iamax(begin(x), end(x)); +} + +template +auto amax(X1D const& x) { // NOLINT(readability-identifier-length) x conventional blas name + return begin(x) + iamax(x); +} + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/nrm2.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/nrm2.hpp new file mode 100644 index 0000000000..5b0dfe583e --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/nrm2.hpp @@ -0,0 +1,106 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_NRM2_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_NRM2_HPP +#pragma once + +#include + +#include + +#include // std::norm + +namespace boost::multi::blas { + +using core::nrm2; + +using multi::base; +using std::norm; // nvcc11 needs using std::FUNCTION and the FUNCTION (and it works in clang, gcc, culang, icc) + +template +auto nrm2_n(It const& x, Size n, A0D res) // NOLINT(readability-identifier-length) conventional BLAS naming +//->decltype(blas::default_context_of(x.base())->nrm2(n, x.base(), x.stride(), res), std::next(res)) { // NOLINT(fuchsia-default-arguments-calls) +{ return blas::default_context_of(x.base())->nrm2(n, x.base(), x.stride(), res), std::next(res); } // NOLINT(fuchsia-default-arguments-calls) + +template +auto nrm2(A1D const& x, A0D&& res) // NOLINT(readability-identifier-length) conventional BLAS naming +//->decltype(nrm2_n(x.begin(), x.size(), &res)) { +{ return nrm2_n(std::begin(x), x.size(), &std::forward(res)); } + +template +class nrm2_ptr { + ItX x_first_; + Size count_; + + protected: + nrm2_ptr(ItX x_first, Size count) : x_first_{x_first}, count_{count} {} + + public: + explicit operator bool() const {return true;} + + template + friend constexpr auto copy_n(nrm2_ptr first, Size2 count, ItOut d_first) { +// ->decltype(blas::nrm2_n(std::declval(), Size2{} , d_first), d_first + count) { + assert(count == 1); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + return blas::nrm2_n(first.x_first_ , first.count_, d_first), d_first + count; } + + template + friend constexpr auto uninitialized_copy_n(nrm2_ptr first, Size2 count, ItOut d_first) + ->decltype(blas::nrm2_n(std::declval(), Size2{} , d_first), d_first + count) {assert(count == 1); + return blas::nrm2_n(first.x_first_ , first.count_, d_first), d_first + count; } + + template + static constexpr auto uninitialized_copy_n(nrm2_ptr first, Size2 count, ItOut d_first) + ->decltype(blas::nrm2_n(std::declval(), Size2{} , d_first), d_first + count) {assert(count == 1); + return blas::nrm2_n(first.x_first_ , first.count_, d_first), d_first + count; } +}; + +template> +struct nrm2_ref : private Ptr { + using decay_type = decltype(norm(std::declval())); + explicit nrm2_ref(X const& x) : Ptr{begin(x), size(x)} {} // NOLINT(readability-identifier-length) BLAS naming + + constexpr auto operator&() const& -> Ptr const& {return *this;} // NOLINT(google-runtime-operator) reference type //NOSONAR + + auto decay() const -> decay_type {decay_type ret; copy_n(operator&(), 1, &ret); return ret;} // NOLINT(fuchsia-default-arguments-calls) complex + operator decay_type() const {return decay();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions,hicpp-explicit-conversion) //NOSONAR to allow terse syntax +// #if ! defined(__CUDACC__) || ! defined(__INTEL_COMPILER) +// friend auto operator*(decay_type const& lhs, dot_ref const& self) {return lhs*self.decay();} +// #endif + auto operator+() const -> decay_type {return decay();} + + auto operator==(nrm2_ref const& other) const -> bool {return decay() == other.decay();} + auto operator!=(nrm2_ref const& other) const -> bool {return decay() != other.decay();} +}; + +template +[[nodiscard]] +auto nrm2(X const& x) { // NOLINT(readability-identifier-length) BLAS naming + return nrm2_ref{x}; +} + +namespace operators { + using std::norm; + template()))>//decltype(norm(std::declval()))> + [[nodiscard]] auto operator^(A1D const& array, int n) + ->decltype(std::pow(Real{blas::nrm2(array)}, n)) { + return std::pow(Real{blas::nrm2(array)}, n); } + + template + [[nodiscard]] auto abs(A1D const& array) { + return blas::nrm2(array); + } + + template + [[nodiscard]] auto norm(A1D const& array) { + auto const sqrt = +blas::nrm2(array); + return sqrt*sqrt; + } + +} // end namespace operators + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric.hpp new file mode 100644 index 0000000000..0ff3b392f9 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric.hpp @@ -0,0 +1,304 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_NUMERIC_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_NUMERIC_HPP +#pragma once + +#include "../../adaptors/complex.hpp" + +#include + +#include + +#include "../blas/complex_traits.hpp" + +#include "../../detail/pointer_traits.hpp" + +#include "numeric/is_complex.hpp" + +#include + +#if defined(__NVCC__) +#define BOOST_MULTI_HD __host__ __device__ +#else +#define BOOST_MULTI_HD +#endif + +namespace boost { +namespace multi::blas { + +template struct complex_dummy { + T real; + T imag; +}; + +template< + class A, typename Complex = typename std::decay_t::element, typename T = typename multi::blas::complex_traits::real_type, + class = std::enable_if_t::value>> +auto real(A&& array) + -> decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::real)) { + return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::real); +} + +template< + class A, class Complex = typename std::decay_t::element_type, typename T = typename complex_traits::real_type, + class = std::enable_if_t::value>> +auto imag(A&& array) + -> decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::imag)) { + return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::imag); +} + +template::element, typename RealElem = typename ComplexElem::value_type, + class = std::enable_if_t::value>> +auto real_doubled(ComplexArr&& array) { // produces a real view of complex array with the last dimension duplicated and with interleaved real imaginary parts + return std::forward(array).template reinterpret_array_cast(2).rotated().flatted().unrotated(); +} + +template class involuted; + +template::reference, F>> class involuter; + +template +class involuted { + Ref r_; // [[no_unique_address]] // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + Involution f_; + + public: + using decay_type = std::decay_t()(std::declval()))>; + + constexpr explicit involuted(Ref& ref, Involution fun) : r_{ref}, f_{fun} {} // r_{std::forward(ref)}, f_{fun} {} + constexpr explicit involuted(Ref& ref) : r_{ref}, f_{} {} + + ~involuted() = default; + + involuted(involuted const&) = delete; + involuted(involuted&&) noexcept = default; + + auto operator=(involuted const& other) -> involuted& = delete; + auto operator=(involuted&& other) noexcept -> involuted& = default; + + constexpr auto decay() const& -> decay_type { return f_(r_); } + + constexpr explicit operator decay_type() & { return f_(r_); } + constexpr explicit operator decay_type() const& { return f_(r_); } + constexpr /*plct*/ operator decay_type() && { return f_(r_); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) //NOSONAR to allow terse syntax + + constexpr auto operator*(decay_type const& other) const { return f_(r_) * other; } + + template() = (std::declval())(std::declval()))> + constexpr auto operator=(DecayType&& other) & -> involuted& { + r_ = f_(std::forward(other)); + return *this; + } + + template() = (std::declval())(std::declval()))> + constexpr auto operator=(DecayType&& other) && -> involuted& { + r_ = f_(std::forward(other)); + return *this; + } + + template + friend constexpr auto operator==(involuted const& self, DecayType const& other) + -> decltype(std::declval() == other) { + return self.operator decay_type() == other; + } + template + friend constexpr auto operator!=(involuted const& self, DecayType const& other) + -> decltype(std::declval() != other) { + return self.operator decay_type() != other; + } + + friend constexpr auto operator==(decay_type const& other, involuted const& self) -> bool { + return other == self.operator decay_type(); + } + + friend constexpr auto operator!=(decay_type const& other, involuted const& self) -> bool { + return other != self.operator decay_type(); + } + + template{}, int> = 0> + friend constexpr auto operator==(DecayType const& other, involuted const& self) { + return other == self.operator decay_type(); + } + template{}, int> = 0> + friend constexpr auto operator!=(DecayType const& other, involuted const& self) { + return other != self.operator decay_type(); + } + + template friend constexpr auto operator<<(Sink&& sink, involuted const& self) -> Sink& { + return std::forward(sink) << self.operator decay_type(); + } + + constexpr auto conj() const& { return ::boost::multi::adl_conj(this->operator decay_type()); } + + template + friend constexpr auto imag(involuted const& self) { + //->decltype(imag(std::declval())) { + return self.operator decay_type().imag(); + } +}; + +#if defined(__cpp_deduction_guides) +template involuted(T&&, F) -> involuted; +#endif + +template +auto default_allocator_of(involuter const& iv) { + return default_allocator_of(iv.it_); +} + +template +class involuter { + It it_; + F f_; // [[no_unique_address]] + template friend class involuter; + + public: + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = involuter; // svoid; // typename std::iterator_traits::pointer + using reference = Reference; + using iterator_category = typename std::iterator_traits::iterator_category; + using element_type = typename std::pointer_traits::element_type; + template using rebind = involuter::template rebind, F>; + + involuter() = default; + + BOOST_MULTI_HD constexpr explicit involuter(It it) : it_{std::move(it)}, f_{} {} + BOOST_MULTI_HD constexpr explicit involuter(It it, F fun) : it_{std::move(it)}, f_{std::move(fun)} {} + + template(typename Other::underlying_type{}))* = nullptr> + // cppcheck-suppress noExplicitConstructor + BOOST_MULTI_HD constexpr /*implct*/ involuter(Other const& other) : it_{other.it_}, f_{other.f_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR inherit implicit conversion of underlying type + template(typename Other::underlying_type{}))* = nullptr> + BOOST_MULTI_HD constexpr explicit involuter(Other const& other) : it_{other.it_}, f_{other.f_} {} + + constexpr auto operator*() const { return reference{*it_, f_}; } + constexpr auto operator[](difference_type n) const { return reference{*(it_ + n), f_}; } + + auto operator==(involuter const& other) const -> bool { return it_ == other.it_; } + auto operator!=(involuter const& other) const -> bool { return it_ != other.it_; } + + constexpr auto operator+=(difference_type n) -> involuter& { + it_ += n; + return *this; + } + constexpr auto operator-=(difference_type n) -> involuter& { + it_ -= n; + return *this; + } + + constexpr auto operator+(difference_type n) const { return involuter{it_ + n, f_}; } + constexpr auto operator-(difference_type n) const { return involuter{it_ - n, f_}; } + + auto operator-(involuter const& other) const { return it_ - other.it_; } + + explicit operator bool() const { return it_; } + using underlying_type = It; + friend /*constexpr*/ auto underlying(involuter const& self) -> underlying_type { return self.it_; } + constexpr explicit operator It() const { return underlying(*this); } + + friend auto default_allocator_of(involuter const& inv) { + using multi::default_allocator_of; + return default_allocator_of(inv.it_); + } + + using default_allocator_type = typename multi::pointer_traits::default_allocator_type; + + friend auto get_allocator(involuter const& inv) { + using boost::multi::get_allocator; + return get_allocator(inv.it_); + } +}; + +template using negated = involuted>; +template using negater = involuter>; + +struct conjugate { + template + constexpr auto operator()(Complex const& zee) const { + // using std::conj; // for doubles? + return conj(zee); + } + +#if defined(__CUDACC__) + template + constexpr auto operator()(::thrust::tagged_reference zee) const { + return conj(static_cast(zee)); + } +#endif +#if defined(__HIPCC__) + template + constexpr auto operator()(::thrust::tagged_reference zee) const { + return conj(static_cast(zee)); + } +#endif +}; + +template using conjugated = involuted; + +template using conjugater = involuter; + +template auto make_conjugater(It it) { return conjugater{it}; } +template auto make_conjugater(conjugater it) -> It { return underlying(it); } + +template auto imag(involuted const& inv) { return inv.decay().imag(); } +template auto real(involuted const& inv) { return inv.decay().real(); } + +template auto has_imag_fun_aux(T const& value) -> decltype((void)imag(value), std::true_type{}); +inline auto has_imag_fun_aux(...) -> decltype(std::false_type{}); +template struct has_imag_fun : decltype(has_imag_fun_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + +template auto has_imag_mem_aux(T const& value) -> decltype((void)value.imag(), std::true_type{}); +inline auto has_imag_mem_aux(...) -> decltype(std::false_type{}); +template struct has_imag_mem : decltype(has_imag_mem_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + +template struct has_imag : std::integral_constant{} || has_imag_mem{})> {}; + +template +struct is_complex_array : has_imag().base())>>::element_type>> {}; + +template struct is_complex : has_imag {}; + +template +auto is_conjugated_aux(conjugater const& /*self*/) -> std::true_type; +inline auto is_conjugated_aux(...) -> std::false_type; + +template struct is_conjugated : decltype(is_conjugated_aux((std::declval()).base())) { // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + template constexpr auto operator()(AA&& /*unused*/) { return is_conjugated_aux((std::declval()).base()); } // NOLINT(cppcoreguidelines-missing-std-forward) +}; + +template, typename Elem = typename D::element_type, typename Ptr = typename D::element_ptr, + std::enable_if_t{}, int> = 0> +auto conj(A&& array) -> A&& { + return std::forward(array); +} + +template< + class A, class D = std::decay_t, typename Elem = typename D::element_type, + typename Ptr = std::decay_t().base())>, std::enable_if_t{} && is_complex_array{}, int> = 0> +auto conj(A&& array) -> decltype(auto) { + return std::forward(array).template static_array_cast>(); +} + +template, typename Elem = typename D::element_type, + typename Ptr = typename decltype(std::declval().base())::underlying_type, std::enable_if_t{}, int> = 0> +auto conj(A&& array) + -> decltype(std::forward(array).template static_array_cast()) { + return std::forward(array).template static_array_cast(); +} + +} // end namespace multi::blas + +template +auto default_allocator_of(multi::blas::involuter it) { + return multi::default_allocator_of(it.underlying()); +} + +} // end namespace boost + +#undef BOOST_MULTI_HD + +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric/is_complex.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric/is_complex.hpp similarity index 84% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric/is_complex.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric/is_complex.hpp index 9c52ef32e0..ceae5d78c3 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric/is_complex.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/numeric/is_complex.hpp @@ -1,8 +1,10 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP -#define MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP +#ifndef BOOST_MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_NUMERIC_IS_COMPLEX_HPP +#pragma once #include #include @@ -33,7 +35,7 @@ template struct has_imag : decltype(has_imag_aux(std::declval())){}; template constexpr bool has_imag_v = has_imag::value; template struct is_complex : std::integral_constant or has_real_fun_v) and (has_imag_v or has_imag_fun_v) + (has_real_v || has_real_fun_v) && (has_imag_v || has_imag_fun_v) >{}; template auto real_is_aux(T const& value) -> typename std::is_same; @@ -44,7 +46,7 @@ template auto imag_is_aux(T const& value) -> typename std::is_ template auto imag_is_aux(... ) -> false_type; template struct imag_is : decltype(imag_is_aux(std::declval())){}; -template struct is_complex_of : std::integral_constant::value and imag_is::value>{}; +template struct is_complex_of : std::integral_constant::value && imag_is::value>{}; } // end namespace boost::multi::blas::numeric diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/operations.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/operations.hpp similarity index 54% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/operations.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/operations.hpp index a5c52c12be..1638524bbc 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/operations.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/operations.hpp @@ -1,10 +1,12 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_ADAPTORS_BLAS_OPERATIONS_HPP -#define MULTI_ADAPTORS_BLAS_OPERATIONS_HPP +#ifndef BOOST_MULTI_ADAPTORS_BLAS_OPERATIONS_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_OPERATIONS_HPP +#pragma once -#include "../blas/numeric.hpp" +#include namespace boost::multi::blas { @@ -17,6 +19,11 @@ auto conjugated_transposed(A&& array) -> decltype(auto) { template auto identity(A&& array) -> decltype(auto) {return std::forward(array);} +// template, typename E=typename D::element_type> +// auto conjugated(A&& array) -> decltype(auto) { +// return blas::conj(std::forward(array)); +// } + template auto hermitized(A&& array, std::true_type /*true */) -> decltype(auto) { return conjugated_transposed(std::forward(array)); @@ -37,36 +44,45 @@ namespace operators { [[maybe_unused]] constexpr static struct { - template::rank_v == 2, int> =0> - auto operator()(A&& array) const -> decltype(auto){return hermitized(std::forward(array));} + template::rank::value == 2, int> =0> + [[nodiscard]] auto operator()(A&& array) const -> decltype(auto) { return hermitized(std::forward(array)); } - template::rank_v == 1, int> =0> + template::rank::value == 1, int> =0> [[deprecated("use blas::C instead of blas::H for conjugated vectors to avoid confusions")]] - auto operator()(A&& array) const -> decltype(auto){return blas::conj(std::forward(array));} + [[nodiscard]] auto operator()(A&& array) const -> decltype(auto) { return blas::conj(std::forward(array)); } } H; // NOLINT(readability-identifier-length) conventional name in BLAS template auto operator^(A&& array, Op op) -->decltype(op(std::forward(array))){ - return op(std::forward(array));} +->decltype(op(std::forward(array))) { + return op(std::forward(array)); } } // end namespace operators using operators::H; -template::rank_v == 1, int> =0> +template::rank::value == 1, int> =0> auto C(A&& array) -> decltype(auto) {return blas::conj(std::forward(array));} // NOLINT(readability-identifier-naming,readability-identifier-length) : conventional one-letter operation BLAS -template::rank_v == 2, int> =0> +template::rank::value == 2, int> =0> +[[deprecated("use blas::H instead of blas::C for conjugated transposed matrices to avoid confusion, use blas::J for only-conjugation of matrices")]] auto C(A&& array) -> decltype(auto) {return hermitized(std::forward(array));} // NOLINT(readability-identifier-naming,readability-identifier-length) : conventional one-letter operation BLAS -namespace operators{ +template::rank::value == 2, int> =0> +auto J(A&& array) -> decltype(auto) {return blas::conj(std::forward(array));} // NOLINT(readability-identifier-naming,readability-identifier-length) : conventional one-letter operation BLAS + +namespace operators { template auto operator*(A&& array) - ->decltype(blas::conj(std::forward(array))){ - return blas::conj(std::forward(array));} + ->decltype(blas::conj(std::forward(array))) { + return blas::conj(std::forward(array)); } + + template + auto operator~(A&& array) + ->decltype(blas::transposed(std::forward(array))) { + return blas::transposed(std::forward(array)); } } // end namespace operators diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/scal.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/scal.hpp new file mode 100644 index 0000000000..40187c2dae --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/scal.hpp @@ -0,0 +1,54 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_SCAL_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_SCAL_HPP +#pragma once + +#include "../blas/core.hpp" + +namespace boost::multi::blas { + +using core::scal; + +template +auto scal_n(typename It::element a, It first, Size count) { // NOLINT(readability-identifier-length) conventional BLAS naming + auto ctxt = blas::default_context_of(first.base()); + ctxt->scal(count, &a, first.base(), first.stride()); +} + +template +auto scal(Scalar const& a, It1D first, It1D last) // NOLINT(readability-identifier-length) conventional BLAS naming +->decltype(blas::scal_n(a, first, last - first)) { // NOLINT(fuchsia-default-arguments-calls) allow a possible double -> complex conversion (with default 0 imag part) + return blas::scal_n(a, first, last - first); } // NOLINT(fuchsia-default-arguments-calls) same + +template // don't do this: ", typename Elem = typename X1D::element_type>" +auto scal(Scalar const& a, X1D&& x) // NOLINT(readability-identifier-length) conventional BLAS naming +->decltype(blas::scal(a, x.begin(), x.end()), std::forward(x)) { + return blas::scal(a, x.begin(), x.end()), std::forward(x); } + +template +class scal_range { + A alpha_; + + public: + using scalar_type = A; + explicit scal_range(A const& alpha) : alpha_{alpha} {} + template + friend auto operator*=(X1D&& x, scal_range const& self) // NOLINT(readability-identifier-length) conventional BLAS naming + ->decltype(std::forward(scal(std::declval(), std::forward(x)))) { + return std::forward(scal(self.alpha_, std::forward(x)));} +}; + +template auto scal(A const& array) {return scal_range{array};} + +namespace operators { + template + auto operator*=(X&& x, Scalar const& alpha) -> X&& { // NOLINT(readability-identifier-length) conventional BLAS naming + return blas::scal(alpha, std::forward(x)); + } +} // end namespace operators + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/side.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/side.hpp new file mode 100644 index 0000000000..c3fe0bb0f0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/side.hpp @@ -0,0 +1,24 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_SIDE_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_SIDE_HPP +#pragma once + +namespace boost::multi::blas { + +enum class side : char { + left = 'L', + right = 'R' +}; + +inline auto swap(side sid) noexcept -> side { + switch(sid) { + case side::left : return side::right; + case side::right: return side::left ; + } __builtin_unreachable(); // LCOV_EXCL_LINE +} + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/swap.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/swap.hpp new file mode 100644 index 0000000000..bb3e766c83 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/swap.hpp @@ -0,0 +1,48 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_SWAP_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_SWAP_HPP +#pragma once + +#include + +namespace boost::multi::blas { + +using core::swap; + +template +auto swap_n(It1 first, Size count, It2 first2) -> It2 { + blas::default_context_of(base(first))->swap(count, base(first), stride(first), base(first2), stride(first2)); + return first2 + count; +} + +template +auto swap(It1 first, It2 last, It2 first2) noexcept -> It2 { + assert(stride(first) == stride(last)); + return swap_n(first, last - first, first2); +} + +template +auto swap(X1D&& x, Y1D&& y) noexcept(false) -> Y1D&& { // NOLINT(readability-identifier-length) x, y conventional blas names, // NOSONAR(cpp:S5018) this swap can "fail" if sizes do not match + assert( size(x) == size(y) ); + swap( std::begin(x), std::end(std::forward(x)), std::begin(y) ); + return std::forward(y); +} + +template +auto swap(X1D const&, Y1D const&) noexcept(false) = delete; // NOSONAR(cpp:S5018) this swap can "fail" if sizes do not match + +template +auto operator^(X1D&& x, Y1D&& y) { // NOLINT(readability-identifier-length) BLAS naming + blas::swap(x, y); + return std::tie(std::forward(x), std::forward(y)); // or use std::forward_as_tuple ? +} + +namespace operators { + using blas::operator^; +} // end namespace operators + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/syrk.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/syrk.hpp new file mode 100644 index 0000000000..46686f1fe2 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/syrk.hpp @@ -0,0 +1,64 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_SYRK_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_SYRK_HPP +#pragma once + +#include "../blas/core.hpp" +#include "../blas/filling.hpp" +#include "../blas/numeric.hpp" + +namespace boost::multi::blas { + +using core::syrk; + +template +auto syrk(filling c_side, typename A2D::element alpha, A2D const& a, typename A2D::element beta, C2D&& c) { // NOLINT(readability-identifier-length) BLAS naming + //->decltype(syrk('\0', '\0', size(c), size(a), alpha, base(a), stride(rotated(a)), beta, base(c), stride(c)), std::forward(c)){ + assert(size(c) == size(rotated(c))); + if(stride(a) == 1) { + if(stride(c) == 1) { + syrk(flip(c_side) == filling::upper ? 'L' : 'U', 'N', size(c), size(a), &alpha, base(a), stride(rotated(a)), &beta, base(c), stride(rotated(c))); + } else { + syrk(c_side == filling::upper ? 'L' : 'U', 'N', size(c), size(rotated(a)), &alpha, base(a), stride(rotated(a)), &beta, base(c), stride(c)); + } + } else { + if(stride(c) == 1) { + syrk(flip(c_side) == filling::upper ? 'L' : 'U', 'T', size(c), size(rotated(a)), &alpha, base(a), stride(a), &beta, base(c), stride(rotated(c))); + } else { + syrk(c_side == filling::upper ? 'L' : 'U', 'T', size(c), size(rotated(a)), &alpha, base(a), stride(a), &beta, base(c), stride(c)); + } + } + return std::forward(c); +} + +template +auto syrk(filling c_side, AA alpha, A2D const& a, C2D&& c) // NOLINT(readability-identifier-length) BLAS naming + -> decltype(syrk(c_side, alpha, a, 0.0, std::forward(c))) { + return syrk(c_side, alpha, a, 0.0, std::forward(c)); +} + +// template +// auto syrk(AA alpha, A2D const& a, C2D&& c) // NOLINT(readability-identifier-length) BLAS naming +// -> decltype(syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward(c)))) { +// return syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward(c))); +// } + +// template +// [[nodiscard]] // ("because input argument is const") +// // this decay in the return type is important +// auto // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// syrk(AA alpha, A2D const& a) -> std::decay_t { +// return syrk(alpha, a, Ret({size(a), size(a)}, get_allocator(a))); +// } + +// template +// [[nodiscard]] auto syrk(A2D const& A) // NOLINT(readability-identifier-length) BLAS naming +// -> decltype(syrk(1.0, A)) { +// return syrk(1.0, A); +// } + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/CMakeLists.txt new file mode 100644 index 0000000000..0848f44875 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/CMakeLists.txt @@ -0,0 +1,189 @@ +cmake_minimum_required(VERSION 3.18) # required by BLAS::BLAS + +if((NOT + CMAKE_CXX_COMPILER_ID + STREQUAL + "PGI" + ) + AND (NOT + CMAKE_CXX_COMPILER_ID + STREQUAL + "NVHPC" + ) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "nvcc" + ) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "icpc" + ) +) + find_package(Boost REQUIRED COMPONENTS unit_test_framework) +# link_libraries("-lboost_unit_test_framework") + + find_package(BLAS REQUIRED) + find_path( + BLAS_INCLUDE_DIRS + cblas.h + /usr/include + /usr/local/include + $ENV{BLAS_HOME}/include + ) +# include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) + link_libraries(${BLAS_LIBRARIES}) +else() + find_package(Boost REQUIRED) # cmake cannot detect this component with pgi compiler + link_libraries("-lboost_unit_test_framework") + + link_libraries("-lblas") # cmake cannot detect BLAS with pgi/nvc++ but it ships with its own version +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "PGI") +# add_definitions(-DRETURN_BY_STACK) + add_definitions(-DBLAS_DOT_RETURNS_VOID) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") # nvhpc will find its own blas version +# add_definitions(-DRETURN_BY_STACK) + add_definitions(-DBLAS_DOT_RETURNS_VOID) +endif() + + +if(ENABLE_CUDA OR DEFINED CXXCUDA) + enable_language(CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() +endif() + +enable_testing() +list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17 +include(CTest) + +configure_file("config.hpp.in" ${CMAKE_BINARY_DIR}/config.hpp) + +include_directories(${CMAKE_BINARY_DIR}) + +# file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) +set(TEST_SRCS + axpy.cpp + copy.cpp + dot.cpp + herk.cpp + gemv.cpp + gemm.cpp + numeric.cpp + nrm2.cpp + scal.cpp + syrk.cpp + traits.cpp + trsm.cpp +) + +foreach(TEST_FILE ${TEST_SRCS}) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + if(ENABLE_CUDA OR DEFINED CXXCUDA) + set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + #set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") + target_compile_options(${TEST_EXE} PRIVATE -std=c++17) + endif() + + target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) + + target_link_libraries (${TEST_EXE} PRIVATE Boost::unit_test_framework ) + # target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS} ) + + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_DYN_LINK=1) + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_MODULE="C++ Unit Tests for Multi BLAS") + target_compile_options( + ${TEST_EXE} PRIVATE + $<$: + -Wno-unknown-warning-option # for -Wno-enum-constexpr-conversion in older versions + -Wno-enum-constexpr-conversion # for Boost.Test + > + ) + + target_link_libraries(${TEST_EXE} PRIVATE ${BLAS_LIBRARIES}) # there is no BLAS_INCLUDE_DIRS + + target_link_libraries(${TEST_EXE} PRIVATE multi) + target_link_libraries(${TEST_EXE} PRIVATE BLAS::BLAS) + + if(NOT ENABLE_CIRCLE) + if(ENABLE_CUDA) + target_compile_options( + ${TEST_EXE} + PRIVATE + $<$,$>: + -Wno-unknown-warning-option + #-Wextra + #-Wpedantic + #-Wmove + -Wno-enum-constexpr-conversion # for Boost.Test + #-Wno-error=\#warnings + > + ) + endif() + + if(NOT ENABLE_CUDA + AND (NOT + DART_COMPILER_NAME + STREQUAL + "nvcc" + ) + ) + target_compile_options( + ${TEST_EXE} + PRIVATE -Werror + -Wall + $<$: + -Wextra + -Wpedantic + -Wformat-truncation + -Wno-unknown-pragmas> # -Wconversion + $<$,$>: + -Wextra + -Wpedantic + -Wmove + -Wno-error=\#warnings> + $<$: + -Wextra + -wd161 + -diag-disable=remark + -Warray-bounds + -Wchar-subscripts + -Wcomment + -Wenum-compare + -Wno-enum-constexpr-conversion # for Boost.Test + -Wformat + -Wuninitialized + -Wmaybe-uninitialized + -Wmain + -Wnarrowing + -Wnonnull + -Wparentheses + -Wpointer-sign + -Wreorder + -Wno-return-type + -Wsign-compare + -Wsequence-point + -Wtrigraphs + -Wunused-function + -Wunused-but-set-variable + -Wunused-variable + -Wwrite-strings + -diag-error:3846> + $<$,$>: # EDG diagnostics list: http://www.ssl.berkeley.edu/~jimm/grizzly_docs/SSL/opt/intel/cc/9.0/lib/locale/en_US/mcpcom.msg + --display_error_number --diag_error=incompatible_assignment_operands,returning_ptr_to_local_variable,subscript_out_of_range,used_before_set,undefined_preproc_id,implicit_func_decl,implicit_return_from_non_void_function,missing_type_specifier + > + $<$: + /W4> + ) + endif() + endif() + add_test(NAME ${TEST_EXE} COMMAND $) +endforeach() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/asum.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/asum.cpp new file mode 100644 index 0000000000..84a4ff563e --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/asum.cpp @@ -0,0 +1,89 @@ +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2019-2024 Alfredo A. Correa + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS asum" +#include + +#include "../../../adaptors/cuda.hpp" +#include "../../../array.hpp" +#include "../../blas/asum.hpp" +#include "../../blas/cuda.hpp" + +#include "multi/adaptors/complex.hpp" + +#include +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(const multi_blas_asum_double) { + multi::array const A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + using multi::blas::asum; + BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0.0, [](auto&& a, auto&& b) { return a + std::abs(b); })); +} + +BOOST_AUTO_TEST_CASE(const multi_blas_asum_complex) { + using Z = multi::complex; // std::complex; + + auto const I = Z{0.0, 1.0}; + + multi::array const A = { + {1.0 + 2.0 * I, 2.0, 3.0, 4.0}, + { 5.0, 6.0 + 3.0 * I, 7.0, 8.0}, + { 9.0, 10.0, 11.0 + 4.0 * I, 12.0}, + }; + + using multi::blas::asum; + BOOST_REQUIRE( + asum(A[1]) == std::accumulate( + begin(A[1]), end(A[1]), 0.0, + [](auto&& a, auto&& b) { return a + std::abs(real(b)) + std::abs(imag(b)); } + ) + ); +} + +BOOST_AUTO_TEST_CASE(const multi_blas_asum_double_cuda) { + multi::cuda::array const A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + using multi::blas::asum; + BOOST_REQUIRE(asum(A[1]) == 26.0 ); +} + +using complex = multi::complex; +constexpr auto I = complex{0.0, 1.0}; + +BOOST_AUTO_TEST_CASE(const multi_blas_asum_complex_cuda) { + namespace blas = multi::blas; + + multi::cuda::array const A = { + {1.0 + 2.0 * I, 2.0 , 3.0 , 4.0}, + {5.0 , 6.0 + 3.0 * I, 7.0 , 8.0}, + {9.0 , 10.0 , 11.0 + 4.0 * I, 12.0}, + }; + + BOOST_REQUIRE( blas::asum(A[1] ) == 29.0 ); + BOOST_REQUIRE( blas::asum(A[1]({0, 4})) == 29.0 ); +} + +BOOST_AUTO_TEST_CASE(const multi_blas_asum_complex_cuda_mutable) { + using Z = multi::complex; + + auto const I = Z{0.0, 1.0}; + + multi::cuda::array const A = { + {1.0 + 2.0 * I, 2.0 , 3.0 , 4.0}, + {5.0 , 6.0 + 3.0 * I, 7.0 , 8.0}, + {9.0 , 10.0 , 11.0 + 4.0 * I, 12.0}, + }; + + using multi::blas::asum; + BOOST_REQUIRE( asum(A[1] ) == Z{29.0} ); + BOOST_REQUIRE( asum(A[1]({0, 4})) == Z{29.0} ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/axpy.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/axpy.cpp new file mode 100644 index 0000000000..809464dbe5 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/axpy.cpp @@ -0,0 +1,190 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include +#include + +#include + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +using complex = multi::complex; // test internal implementation of complex (partially formed complex) + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_real) { + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + + auto const AC = arr; + + multi::array const b = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(2.0, b, arr[1]); // daxpy + BOOST_REQUIRE( arr[1][2] == 2.0*b[2] + AC[1][2] ); +} + +BOOST_AUTO_TEST_CASE(blas_axpy_repeat) { + multi::array a1D = multi::iextension(3); + BOOST_REQUIRE( a1D[0] == 0.0 ); + BOOST_REQUIRE( a1D[1] == 1.0 ); + BOOST_REQUIRE( a1D[2] == 2.0 ); + + multi::array const b1D = {3.0, 3.0, 3.0}; + + blas::axpy(1.0, b1D, a1D); + BOOST_REQUIRE( a1D[0] == 3.0 ); + BOOST_REQUIRE( a1D[1] == 4.0 ); + BOOST_REQUIRE( a1D[2] == 5.0 ); + + // BOOST_REQUIRE(( multi::array(3.0).broadcasted().size() != 0 )); + + blas::axpy_n(1.0, multi::array(3.0).broadcasted().begin(), 3, a1D.begin()); + BOOST_REQUIRE( a1D[0] == 6.0 ); + BOOST_REQUIRE( a1D[1] == 7.0 ); + BOOST_REQUIRE( a1D[2] == 8.0 ); + + // blas::axpy(1.0, multi::array(3.0).broadcasted(), a1D); + // BOOST_REQUIRE( a1D[0] == 6.0 ); + // BOOST_REQUIRE( a1D[1] == 7.0 ); + // BOOST_REQUIRE( a1D[2] == 8.0 ); + + // blas::axpy(2.0, b, arr[1]); // daxpy + // BOOST_REQUIRE( arr[1][2] == 2.0*b[2] + AC[1][2] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_double) { + multi::array const const_arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + multi::array arr = const_arr; + multi::array const b = const_arr[2]; // NOLINT(readability-identifier-length) conventional name in BLAS + + blas::axpy(2.0, b, arr[1]); // A[1] = 2*b + A[1], A[1]+= a*A[1] + BOOST_REQUIRE( arr[1][2] == 2.0*b[2] + const_arr[1][2] ); + + auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) imaginary unit + + multi::array AC = {1.0 + 2.0 * I, 3.0 + 4.0 * I, 4.0 - 8.0 * I}; + multi::array BC(extensions(AC), complex{0.0, 0.0}); + + blas::axpy(+1.0, blas::real(AC), blas::real(BC)); + blas::axpy(-1.0, blas::imag(AC), blas::imag(BC)); + + // BOOST_REQUIRE( BC[2] == std::conj(AC[2]) ); + BOOST_REQUIRE( BC[2] == conj(AC[2]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex) { + multi::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const const_arr = arr; + + multi::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + blas::axpy(complex{2.0, 0.0}, x, arr[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically) + BOOST_REQUIRE( arr[1][2] == 2.0*x[2] + const_arr[1][2] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_plus_equal) { + using complex = std::complex; + + multi::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const carr = arr; + multi::array const y = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + arr[1] += blas::axpy(2.0, y); // zaxpy (2. is promoted to 2+I*0 internally and automatically) + BOOST_REQUIRE( arr[1][2] == 2.0*y[2] + carr[1][2] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_minus_equal) { + multi::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const AC = arr; + multi::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + arr[1] -= blas::axpy(complex{2.0, 0.0}, x); // zaxpy (2. is promoted to 2+I*0 internally and automatically) + BOOST_REQUIRE( arr[1][2] == -2.0*x[2] + AC[1][2] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_context) { + multi::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const arr_copy = arr; + multi::array const arr2 = arr[2]; + blas::context ctxt{}; + blas::axpy(&ctxt, complex{2.0, 0.0}, arr2, arr[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically) + BOOST_REQUIRE( arr[1][2] == 2.0*arr2[2] + arr_copy[1][2] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_axpy_operator_minus) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array x = { + {10.0, 0.0}, + {11.0, 0.0}, + {12.0, 0.0}, + {13.0, 0.0}, + }; + multi::array const y = x; // NOLINT(readability-identifier-length) BLAS naming + + using blas::operators::operator-; + + BOOST_REQUIRE( (x - y)[0] == complex{} ); + BOOST_REQUIRE( (y - x)[0] == complex{} ); + + using blas::operators::operator+; + + BOOST_REQUIRE( (x - (y+y))[0] == -x[0] ); + BOOST_REQUIRE( ((x+x) - y)[0] == +x[0] ); + + multi::array arr = { + {{1.0, 0.0}, {2.0, 0.0}}, + {{3.0, 0.0}, {4.0, 0.0}}, + }; + multi::array const arr2 = { + {1.0, 0.0}, + {2.0, 0.0}, + }; + BOOST_REQUIRE( (arr[0] - arr2)[0] == complex{} ); + BOOST_REQUIRE( (arr[0] - arr2)[1] == complex{} ); + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array X = { + {10.0, 0.0}, + {11.0, 0.0}, + {12.0, 0.0}, + {13.0, 0.0}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const Y = { + {10.0, 0.0}, + {11.0, 0.0}, + {12.0, 0.0}, + {13.0, 0.0}, + }; + + using blas::operators::operator-=; + X -= Y; + BOOST_REQUIRE( X[0] == complex{} ); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/config.hpp.in b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/config.hpp.in similarity index 61% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/test/config.hpp.in rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/config.hpp.in index d2cc137b5f..9fe87822be 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/config.hpp.in +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/config.hpp.in @@ -1,6 +1,6 @@ #ifndef MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP_IN // NOLINT(llvm-header-guard) name is sufficient #define MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP_IN -#cmakedefine01 CUDA_FOUND +#cmakedefine01 CUDA_FOUND // NOLINT(modernize-macro-to-enum) TODO(correaa) remove if possible #endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/copy.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/copy.cpp new file mode 100644 index 0000000000..f49f39e19c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/copy.cpp @@ -0,0 +1,79 @@ +// Copyright 2019-2024 Alfredo A. Correa + +#include + +#include "../../../array.hpp" + +#include "../../blas/copy.hpp" + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +BOOST_AUTO_TEST_CASE(multi_blas_copy_n) { + multi::array const x = {1.0, 2.0, 3.0, 4.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = {5.0, 6.0, 7.0, 8.0}; // NOLINT(readability-identifier-length) BLAS naming + blas::copy_n(x.begin(), x.size(), y.begin()); + BOOST_REQUIRE( y == x ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_copy) { + multi::array const x = {1.0, 2.0, 3.0, 4.0}; // NOLINT(readability-identifier-length) BLAS naming + { + multi::array y = {5.0, 6.0, 7.0, 8.0}; // NOLINT(readability-identifier-length) BLAS naming + blas::copy(x, y); // segmentation fault in clang-11 + BOOST_REQUIRE( y == x ); + } + { + multi::array y = {5.0, 6.0, 7.0, 8.0}; // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( size(y) == size(x) ); + y() = blas::copy(x); + BOOST_REQUIRE( y == x ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_real) { + namespace blas = multi::blas; + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 11.0 ); + + blas::copy(arr[0], arr[2]); + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 3.0 ); + + blas::copy(arr[1]({0, size(arr[1])}), arr[2]({0, size(arr[1])})); + BOOST_REQUIRE( arr[1][3] == 8.0 ); + BOOST_REQUIRE( arr[2][3] == 8.0 ); + + multi::array AR3 = blas::copy(rotated(arr)[3]); // dcopy + BOOST_REQUIRE( AR3[1] == arr[1][3] ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_copy_row) { + multi::array const arr = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + }; + multi::array y(multi::extensions_t<1>{multi::iextension{3}}); // NOLINT(readability-identifier-length) BLAS naming + blas::copy(rotated(arr)[0], y); + BOOST_REQUIRE( y == rotated(arr)[0] ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_complex) { + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array arr = { + {1.0 + 3.0 * I, 2.0 + 4.0 * I, 3.0 + 5.0 * I, 4.0 + 6.0 * I}, + {5.0 + 0.0 * I, 6.0 + 0.0 * I, 7.0 + 0.0 * I, 8.0 + 0.0 * I}, + {9.0 + 0.0 * I, 10.0 + 0.0 * I, 11.0 + 0.0 * I, 12.0 + 0.0 * I}, + }; + blas::copy(arr[0], arr[2]); + BOOST_REQUIRE( arr[0][2] == 3.0 + 5.0*I ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/dot.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/dot.cpp new file mode 100644 index 0000000000..ebbadab64b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/dot.cpp @@ -0,0 +1,492 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include +#include +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +BOOST_AUTO_TEST_CASE(blas_dot_context_double) { + multi::array const x = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + + blas::context const ctxt; + + auto res1 = +blas::dot(&ctxt, x, y); + BOOST_TEST( res1 == std::inner_product(begin(x), end(x), begin(y), 0.0) ); + + auto const res2 = +blas::dot(&ctxt, x, y); + BOOST_TEST( res2 == std::inner_product(begin(x), end(x), begin(y), 0.0) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_double) { + multi::array const x = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + + auto res = +blas::dot(x, y); + + BOOST_TEST( res == std::inner_product(begin(x), end(x), begin(y), 0.0) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_double) { + multi::array const x = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + + double res = NAN; + + blas::dot(x, y, multi::array_ref(res)); + BOOST_TEST( res == std::inner_product(begin(x), end(x), begin(y), 0.0) ); +} + +// float uses of dot are disabled because of a bug in Apple Accelerate BLAS, https://fortran-lang.discourse.group/t/how-many-blas-libraries-have-this-error/4454/23, https://forums.developer.apple.com/forums/thread/717757 +BOOST_AUTO_TEST_CASE(blas_dot_context_float, *boost::unit_test::disabled()) { + multi::array const x = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + + blas::context const ctxt; + + auto res1 = +blas::dot(&ctxt, x, y); + BOOST_TEST( res1 == std::inner_product(begin(x), end(x), begin(y), 0.0F) ); + + auto const res2 = +blas::dot(&ctxt, x, y); + BOOST_TEST( res2 == std::inner_product(begin(x), end(x), begin(y), 0.0F) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_float, *boost::unit_test::disabled()) { + multi::array const x = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + + auto res = +blas::dot(x, y); + + BOOST_TEST( res == std::inner_product(begin(x), end(x), begin(y), 0.0F) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_float, *boost::unit_test::disabled()) { + multi::array const x = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + float res = NAN; + blas::dot(x, y, multi::array_ref(res)); + BOOST_TEST( res == std::inner_product(begin(x), end(x), begin(y), 0.0F) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_double) { // if you get a segfaut here, your system may require -DRETURN_BY_STACK + using complex = std::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const x = { + {1.0, 0.0}, + {2.0, 0.0}, + {3.0, 0.0}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const y = { + {1.0, 0.0}, + {2.0, 0.0}, + {3.0, 0.0}, + }; // NOLINT(readability-identifier-length) BLAS naming + complex res{0.0, 0.0}; + blas::dot(x, y, res); + // an isolated error here might mean that the dot and nrm2 interface for the BLAS library is not detected properly + BOOST_REQUIRE_EQUAL(real(res), real(std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return alpha * std::conj(omega); }))); + BOOST_REQUIRE_EQUAL(imag(res), imag(std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return alpha * std::conj(omega); }))); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_float) { // if you get a segfaut here, your system may require -DRETURN_BY_STACK + using complex = std::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const x = { + {1.0F, 0.0F}, + {2.0F, 0.0F}, + {3.0F, 0.0F}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const y = { + {1.0F, 0.0F}, + {2.0F, 0.0F}, + {3.0F, 0.0F}, + }; // NOLINT(readability-identifier-length) BLAS naming + complex res{0.0F, 0.0F}; + blas::dot(x, y, res); + + // // an isolated error here might mean that the dot and nrm2 interface for the BLAS library is not detected properly + BOOST_REQUIRE_EQUAL(real(res), real(std::inner_product(begin(x), end(x), begin(y), complex{0.0F, 0.0F}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return alpha * std::conj(omega); }))); + BOOST_REQUIRE_EQUAL(imag(res), imag(std::inner_product(begin(x), end(x), begin(y), complex{0.0F, 0.0F}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return alpha * std::conj(omega); }))); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C) { + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const x = {1.0 + 0.0 * I, 2.0 + 0.0 * I, 3.0 + 0.0 * I}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0 + 0.0 * I, 2.0 + 2.0 * I, 3.0 + 0.0 * I}; // NOLINT(readability-identifier-length) BLAS naming + + complex res{0.0, 0.0}; + blas::dot(blas::C(x), y, res); + BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return conj(alpha) * omega;}) ); +} + +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C_float) { + using complex = std::complex; + auto const I = complex{0.0F, 1.0F}; // NOLINT(readability-identifier-length) imag unit + + multi::array const x = {1.0F + 0.0F * I, 2.0F + 0.0F * I, 3.0F + 0.0F * I}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0F + 0.0F * I, 2.0F + 2.0F * I, 3.0F + 0.0F * I}; // NOLINT(readability-identifier-length) BLAS naming + + complex res{0.0F, 0.0F}; + blas::dot(blas::C(x), y, res); + BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.0F, 0.0F}, std::plus<>{}, [](auto const& alpha, auto const& omega) { return conj(alpha) * omega;}) ); +} + +#if defined(CUDA_FOUND) and CUDA_FOUND +#include +BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C_thrust) { + using complex = thrust::complex; + auto const I = complex{0.0, 1.0}; + + multi::array const A = {1.0 + 0. * I, 2.0 + 0.0 * I, 3.0 + 0.0 * I}; + multi::array const B = {1.0 + 0. * I, 2.0 + 2.0 * I, 3.0 + 0.0 * I}; + + complex C; + blas::dot(blas::C(A), B, C); + BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), complex{0.0, 0.0}, std::plus<>{}, [](auto& a, auto& b){ return conj(a) * b;}) ); +} +#endif + +BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_double) { + multi::array const CA = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + double res = std::numeric_limits::quiet_NaN(); + blas::dot_n(begin(CA[1]), size(CA[1]), begin(CA[2]), &res); + BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.0) ); + + double const res2 = blas::dot(CA[1], CA[2]); + BOOST_REQUIRE( res == res2 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_float, *boost::unit_test::disabled()) { + multi::array const CA = { + {1.0F, 2.0F, 3.0F, 4.0F}, + {5.0F, 6.0F, 7.0F, 8.0F}, + {9.0F, 10.0F, 11.0F, 12.0F}, + }; + auto res = std::numeric_limits::quiet_NaN(); + blas::dot_n(begin(CA[1]), size(CA[1]), begin(CA[2]), &res); + BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.0F) ); + + double const res2 = blas::dot(CA[1], CA[2]); + BOOST_REQUIRE( res == res2 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_context) { + multi::array const CA = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + double res = std::numeric_limits::quiet_NaN(); + blas::context ctxt; + blas::dot_n(&ctxt, begin(CA[1]), size(CA[1]), begin(CA[2]), &res); + BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.0) ); + + double const res2 = blas::dot(CA[1], CA[2]); + BOOST_REQUIRE( res == res2 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_context_float, *boost::unit_test::disabled()) { + multi::array const CA = { + {1.0F, 2.0F, 3.0F, 4.0F}, + {5.0F, 6.0F, 7.0F, 8.0F}, + {9.0F, 10.0F, 11.0F, 12.0F}, + }; + float res = std::numeric_limits::quiet_NaN(); + + blas::context ctxt; + blas::dot_n(&ctxt, begin(CA[1]), size(CA[1]), begin(CA[2]), &res); + + BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.0) ); + + float const res2 = blas::dot(CA[1], CA[2]); + BOOST_REQUIRE( res == res2 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_1d_real_double) { + multi::array const x = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + + using blas::dot; + BOOST_TEST( 14.0 == dot(x, y) ); + BOOST_TEST( dot(x, y) == 14.0F ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_1d_real_float, *boost::unit_test::disabled()) { + multi::array const x = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {1.0F, 2.0F, 3.0F}; // NOLINT(readability-identifier-length) BLAS naming + + using blas::dot; + BOOST_TEST( 14.0F == dot(x, y) ); + BOOST_TEST( dot(x, y) == 14.0F ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_real_double) { + multi::array const cA = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + + double const res1 = blas::dot(cA[1], cA[2]); + BOOST_REQUIRE( res1 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0) ); + + double res2 = NAN; + blas::dot(cA[1], cA[2], res2); + BOOST_REQUIRE( res2 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0) ); + + double res_nan = NAN; + double const res3 = blas::dot(cA[1], cA[2], res_nan); + BOOST_REQUIRE( res3 == res2 ); + + double const res4 = blas::dot(cA[1], cA[2]); + BOOST_REQUIRE( res4 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0) ); + BOOST_REQUIRE( blas::dot(cA[1], cA[2]) == blas::dot(cA[2], cA[1]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_real_float, *boost::unit_test::disabled()) { + multi::array const cA = { + {1.0F, 2.0F, 3.0F, 4.0F}, + {5.0F, 6.0F, 7.0F, 8.0F}, + {9.0F, 10.0F, 11.0F, 12.0F}, + }; + + float const res1 = blas::dot(cA[1], cA[2]); + BOOST_REQUIRE( res1 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0F) ); + + float res2 = NAN; + blas::dot(cA[1], cA[2], res2); + BOOST_REQUIRE( res2 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0F) ); + + float res_nan = NAN; + float const res3 = blas::dot(cA[1], cA[2], res_nan); + + BOOST_REQUIRE( res3 == res2 ); + + float const res4 = blas::dot(cA[1], cA[2]); + BOOST_REQUIRE( res4 == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.0F) ); + BOOST_REQUIRE( blas::dot(cA[1], cA[2]) == blas::dot(cA[2], cA[1]) ); +} + +BOOST_AUTO_TEST_CASE(inq_case) { + multi::array const x(multi::extensions_t<1>{multi::iextension{10}}, +1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array const y(multi::extensions_t<1>{multi::iextension{10}}, -1.0); // NOLINT(readability-identifier-length) BLAS naming + + using blas::conj; + using blas::dot; + using blas::hermitized; + + auto res = dot(x, y); + auto res2 = dot(hermitized(x), y); + + BOOST_REQUIRE(res == res2); + + auto res3 = dot(blas::conj(x), y); // conjugation doesn't do anything for real array + BOOST_REQUIRE(res3 == res); + + auto d_arr = dot(blas::C(x), y); + BOOST_REQUIRE(d_arr == res); + + static_assert(!std::is_same{}); + + using blas::C; + double const d_doub = dot(C(x), y); + + BOOST_REQUIRE( d_doub == d_arr ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex_double) { + namespace blas = multi::blas; + + using complex = std::complex; + + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + I, 2.0 + 3.0 * I, 3.0 + 2.0 * I, 4.0 - 9.0 * I}, + {5.0 + 2.0 * I, 6.0 + 6.0 * I, 7.0 + 2.0 * I, 8.0 - 3.0 * I}, + {9.0 + 1.0 * I, 10.0 + 9.0 * I, 11.0 + 1.0 * I, 12.0 + 2.0 * I}, + }; + + auto c1 = complex{0.0, 0.0}; + blas::dot(A[1], A[2], c1); + BOOST_TEST_REQUIRE( c1 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}) ); + + auto const c2 = +blas::dot(A[1], A[2]); + BOOST_TEST_REQUIRE( c2 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}) ); + + complex const c3 = blas::dot(A[1], A[2]); + BOOST_TEST_REQUIRE( c3 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}) ); + + complex const c4 = blas::dot(A[1], blas::C(A[2])); + BOOST_TEST_REQUIRE( c4 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}, std::plus<>{}, [](auto alpha, auto omega) { return alpha * conj(omega);}) ); + + complex const c5 = blas::dot(blas::C(A[1]), A[2]); + BOOST_TEST_REQUIRE( c5 == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); + + complex const c6 = blas::dot(blas::conj(A[1]), A[2]); + BOOST_TEST_REQUIRE( c6 == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); + + complex const c7 = blas::dot(blas::C(A[1]), A[2]); + BOOST_TEST_REQUIRE( c7 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0, 0.0}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex_float) { + namespace blas = multi::blas; + + using complex = std::complex; + + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0F + 1.0F * I, 2.0F + 3.0F * I, 3.0F + 2.0F * I, 4.0F - 9.0F * I}, + {5.0F + 2.0F * I, 6.0F + 6.0F * I, 7.0F + 2.0F * I, 8.0F - 3.0F * I}, + {9.0F + 1.0F * I, 10.0F + 9.0F * I, 11.0F + 1.0F * I, 12.0F + 2.0F * I}, + }; + + auto c1 = complex{0.0F, 0.0F}; + blas::dot(A[1], A[2], c1); + BOOST_TEST_REQUIRE( c1 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}) ); + + auto const c2 = +blas::dot(A[1], A[2]); + BOOST_TEST_REQUIRE( c2 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}) ); + + complex const c3 = blas::dot(A[1], A[2]); + BOOST_TEST_REQUIRE( c3 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}) ); + + complex const c4 = blas::dot(A[1], blas::C(A[2])); + BOOST_TEST_REQUIRE( c4 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}, std::plus<>{}, [](auto alpha, auto omega) { return alpha * conj(omega);}) ); + + complex const c5 = blas::dot(blas::C(A[1]), A[2]); + BOOST_TEST_REQUIRE( c5 == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); + + complex const c6 = blas::dot(blas::conj(A[1]), A[2]); + BOOST_TEST_REQUIRE( c6 == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); + + complex const c7 = blas::dot(blas::C(A[1]), A[2]); + BOOST_TEST_REQUIRE( c7 == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.0F, 0.0F}, std::plus<>{}, [](auto alpha, auto omega) { return conj(alpha) * omega;}) ); +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_second_double) { + namespace blas = multi::blas; + + using complex = std::complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + + auto CC = C; + + auto const [is, js] = C.extensions(); + std::for_each(is.begin(), is.end(), [&, js = js](auto ii) { + std::for_each(js.begin(), js.end(), [&](auto jj) { + C[ii][jj] *= 0.0; + std::for_each(B.extension().begin(), B.extension().end(), [&](auto kk) { + C[ii][jj] += A[ii][kk] * conj(B[kk][jj]); + }); + }); + }); + + // TODO(correaa) MKL gives an error here + // unknown location(0): fatal error: in "cublas_one_gemv_complex_conjtrans_zero": memory access violation at address: 0x00000007: no mapping at fault address + + std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + return std::transform( + begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto const& Ce) { + return std::complex{1.0, 0.0} * blas::dot(Ar, blas::C(Bc)) + 0.0 * Ce; + } + ), + std::forward(Cr); + }); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_second_float) { + namespace blas = multi::blas; + + using complex = std::complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + + auto const I = complex{0.0F, 1.0F}; // NOLINT(readability-identifier-length) + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0F - 2.0F * I, 9.0F - 1.0F * I}, + {2.0F + 3.0F * I, 1.0F - 2.0F * I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const B = { + {3.0F - 4.0F * I, 19.0F - 1.0F * I}, + {1.0F + 5.0F * I, 8.0F - 8.0F * I}, + }; + + multi::array C({2, 2}, {3.0F, 0.0F}); // NOLINT(readability-identifier-length) conventional BLAS naming + + auto CC = C; + + auto const [is, js] = C.extensions(); + std::for_each(is.begin(), is.end(), [&, js = js](auto ii) { + std::for_each(js.begin(), js.end(), [&](auto jj) { + C[ii][jj] *= 0.0F; + std::for_each(B.extension().begin(), B.extension().end(), [&](auto kk) { + C[ii][jj] += A[ii][kk] * conj(B[kk][jj]); + }); + }); + }); + + // TODO(correaa) MKL gives an error here + // unknown location(0): fatal error: in "cublas_one_gemv_complex_conjtrans_zero": memory access violation at address: 0x00000007: no mapping at fault address + + std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + return std::transform( + begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto const& Ce) { + return complex{1.0F, 0.0F} * blas::dot(Ar, blas::C(Bc)) + 0.0F * Ce; + } + ), + std::forward(Cr); + }); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemm.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemm.cpp new file mode 100644 index 0000000000..b200c7abbd --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemm.cpp @@ -0,0 +1,1921 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include "../../../adaptors/blas/gemm.hpp" +#include "../../../adaptors/blas/operations.hpp" + +#include + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_T_sub) { + namespace blas = multi::blas; + + multi::array A({100, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array B({4, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(1.0, A({0, 100}, {1, 2}), blas::T(B)({0, 1}, {0, 1}), 0.0, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_sub) { + multi::array A({100, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array B({4, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({0, 1}, {0, 1}), 0.0, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_sub_6) { + multi::array A({100, 4}, 2.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array B({4, 4}, 3.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({0, 1}, {0, 1}), 0.0, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 6.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_copy) { + multi::array A({100, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array B({4, 4}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + + auto C = +blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({2, 3}, {2, 3})); // c=ab, c⸆=b⸆a⸆ // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1) { + using complex = std::complex; + multi::array const A({100, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, {0.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm({1.0, 0.0}, A, B, {0.0, 0.0}, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1_T) { + using complex = std::complex; + multi::array const A({100, 1}, complex{1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, complex{1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, complex{0.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(complex{1.0, 0.0}, A, blas::T(B), complex{0.0, 0.0}, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( C[99][0] == 1.0 ); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1_H) { + using complex = std::complex; // complex const I{0, 1}; + multi::array const A({100, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, {0.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm({1.0, 0.0}, A, blas::H(B), {0.0, 0.0}, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( C[99][0] == 1.0 ); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1) { + using complex = std::complex; + multi::array const A({100, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, {1.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, {0.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm({1.0, 0.0}, A, B, {0.0, 0.0}, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_T) { + multi::array const A({100, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(1.0, A, blas::T(B), 0.0, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H) { + multi::array const A({100, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming + + blas::gemm(1.0, A, blas::H(B), 0.0, C); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(C[99][0] == 1.0); +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemm_square_real) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0}, + {1.0, 2.0, 3.0}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const b = { + {11.0, 12.0, 4.0}, + { 7.0, 19.0, 1.0}, + {11.0, 12.0, 4.0}, + }; + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, b, 0.0, c); + BOOST_REQUIRE( c[2][1] == 86.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + BOOST_REQUIRE( size( a) == size( c) ); + BOOST_REQUIRE( size(~b) == size(~c) ); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 86.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, blas::T(b), 0.0, c); + BOOST_REQUIRE( c[2][1] == 48.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1., a.begin(), a.size(), blas::T(b).begin(), 0.0, c.begin()); + BOOST_REQUIRE( c[2][1] == 48.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, blas::T(a), b, 0.0, c); + BOOST_REQUIRE( c[2][1] == 103.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(blas::T(a)), size(blas::T(a)), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 103.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, blas::T(a), blas::T(b), 0.0, c); + BOOST_REQUIRE( c[2][1] == 50.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 50.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, blas::T(b), 0.0, c); + BOOST_REQUIRE( c[2][1] == 48.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(a), size(a), begin(blas::T(b)), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 48.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, blas::T(a), b, 0.0, c); + BOOST_REQUIRE( c[2][1] == 103.0 ); + } + { + multi::array c({size(a), size(~b)}, 9999.0); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(blas::T(a)), size(blas::T(a)), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 103.0 ); + } + { + multi::array c({size(a), size(rotated(b))}, 9999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(2.0, blas::H(a), blas::H(b), 0.0, c); + BOOST_REQUIRE( c[2][1] == 100.0 ); + } + { + multi::array c = blas::gemm(2.0, blas::H(a), blas::H(b)); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( c[2][1] == 100.0 ); + } + { + multi::array const c = blas::gemm(2.0, blas::H(a), blas::H(b)); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( c[2][1] == 100.0 ); + } + { + multi::array c({size(a), size(rotated(b))}, 9999.0); // NOLINT(readability-identifier-length) BLAS naming + c = blas::gemm(2.0, blas::H(a), blas::H(b)); + BOOST_REQUIRE( c[2][1] == 100.0 ); + } + { + multi::array c; // NOLINT(readability-identifier-length) BLAS naming + c = blas::gemm(2.0, blas::H(a), blas::H(b)); + BOOST_REQUIRE( c[2][1] == 100.0 ); + } + { + multi::array c({size(a), size(rotated(b))}, 9999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm_n(2.0, begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0.0, begin(c)); + BOOST_REQUIRE( c[2][1] == 100.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_square) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {1.0, 3.0}, + {9.0, 7.0}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + }; + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 148.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + + blas::context const ctxt; + blas::gemm_n(&ctxt, 1.0, begin(a), size(a), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[1][0] == 148.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, ~a, b, 0.0, c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE(( c[1][1] == 169.0 && c[1][0] == 82.0 )); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + + blas::context const ctxt; + blas::gemm_n(&ctxt, 1.0, begin(~a), size(~a), begin(b), 0.0, begin(c)); + BOOST_REQUIRE(( c[1][1] == 169 && c[1][0] == 82 )); + } + { + multi::array const c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + + blas::context const ctxt; + blas::gemm_n(&ctxt, 1.0, begin(~a), size(~a), begin(b), 0.0, begin(~c)); + BOOST_REQUIRE( (~c)[1][1] == 169 ); + BOOST_REQUIRE( (~c)[1][0] == 82 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, ~b, 0.0, c); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == 183.0 ); + } + { + // TODO(correaa) fix sfinae of const c + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + + blas::context const ctxt; + blas::gemm_n(&ctxt, 1.0, begin(a), size(a), begin(~b), 0.0, begin(c)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == 183.0 ); + } + { + // NOLINTNEXTLINE(misc-const-correctness) TODO(correaa) fix sfinae of const c + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, ~b, 0.0, ~c); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( (~c)[1][0] == 183.0 ); + } + { + // NOLINTNEXTLINE(misc-const-correctness) TODO(correaa) fix sfinae of const c + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(a), size(a), begin(~b), 0.0, begin(~c)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( (~c)[1][0] == 183.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, ~a, ~b, 0.0, c); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == 117.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(~a), size(~a), begin(~b), 0.0, begin(c)); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == 117.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, ~a, ~b, 0.0, ~c); // c⸆=a⸆b⸆, c=ba + BOOST_REQUIRE( c[0][1] == 117.0 ); + } + { + multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(~a), size(~a), begin(~b), 0.0, begin(~c)); // c⸆=a⸆b⸆, c=ba + BOOST_REQUIRE( c[0][1] == 117.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {1.0, 3.0, 1.0}, + {9.0, 7.0, 1.0}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const b = { + {11.0, 12.0, 1.0}, + { 7.0, 19.0, 1.0}, + { 1.0, 1.0, 1.0}, + }; + { + multi::array c({2, 3}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 17 ); + } + { + multi::array c({2, 3}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 17.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_automatic) { + namespace blas = multi::blas; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {1.0, 3.0, 1.0}, + {9.0, 7.0, 1.0}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const b = { + {11.0, 12.0, 4.0, 8.0}, + { 7.0, 19.0, 2.0, 7.0}, + { 5.0, 3.0, 3.0, 1.0}, + }; + { + multi::array c({size(a), size(~b)}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53.0 ); + } + { + multi::array c({size(a), size(~b)}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53.0 ); + } + { + multi::array c({2, 4}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm(0.1, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } + { + multi::array c({2, 4}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n(0.1, begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } + { + auto c = +blas::gemm(0.1, a, b); // c=ab, c⸆=b⸆a⸆ // NOLINT(readability-identifier-length) conventional BLAS naming + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } + { + multi::array c = blas::gemm(0.1, a, b); // NOLINT(readability-identifier-length) conventional BLAS naming + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemm_nh) { + using complex = std::complex; + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imaginary unit + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + { + auto c = +blas::gemm(1.0, a, blas::H(a)); // c=aa†, c†=aa† // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c = blas::gemm(1.0, a, blas::H(a)); // c=aa†, c†=aa† // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( c[1][0] == 7.-10.*I ); + BOOST_REQUIRE( c[0][1] == 7.+10.*I ); + } + { + multi::array c = blas::gemm(1.0, a, blas::H(a)); // c=aa†, c†=aa† // NOLINT(readability-identifier-length) conventional BLAS naming + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + c = blas::gemm(1.0, a, blas::H(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + c() = blas::gemm(1.0, a, blas::H(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.*I ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, a, blas::H(a), {0.0, 0.0}, c); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm_n({1.0, 0.0}, begin(a), size(a), begin(blas::H(a)), {0.0, 0.0}, begin(c)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7. - 10.*I ); + BOOST_REQUIRE( c[0][1] == 7. + 10.*I ); + } +} + +#if defined(CUDA_FOUND) +#include +BOOST_AUTO_TEST_CASE(multi_blas_gemm_nh_thrust) { + using complex = thrust::complex; + complex const I{0.0, 1.0}; + multi::array const a = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I} + }; + { + auto c = +blas::gemm(1.0, a, blas::hermitized(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c = blas::gemm(1.0, a, blas::hermitized(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c = blas::gemm(1.0, a, blas::hermitized(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}); + c = blas::gemm(1.0, a, blas::hermitized(a)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, blas::hermitized(a), 0.0, c); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1.0, begin(a), size(a), begin(blas::H(a)), 0.0, begin(c)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[1][0] == 7.0 - 10.0*I ); + BOOST_REQUIRE( c[0][1] == 7.0 + 10.0*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemm_elongated) { + using complex = std::complex; + complex const I{0.0, 1.0}; + multi::array const a = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I} + }; + { + multi::array c({1, 1}); + blas::gemm(1.0, a, blas::H(a), 0.0, c); // c=aa†, c†=aa† + BOOST_REQUIRE( c[0][0] == 87.0 + 0.0*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1.0, begin(a), size(a), begin(blas::H(a)), 0.0, begin(c)); // c=aa†, c†=aa† + BOOST_REQUIRE( c[0][0] == 87.0 + 0.0*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1_bisbis) { + using complex = std::complex; + complex const I{0.0, 1.0}; + multi::array const a = { + {1.0 + 2.0 * I}, + {9.0 - 1.0 * I}, + {1.0 + 1.0 * I}}; + multi::array const b = { + {11.0 - 2.0 * I, 7.0 - 3.0 * I, 8.0 - 1.0 * I} + }; + { + multi::array c({1, 1}); + + BOOST_REQUIRE( size(blas::H(a)) == 1 ); + BOOST_REQUIRE( size(blas::H(b)[0]) == 1 ); + + blas::gemm(1.0, blas::H(a), blas::H(b), 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 84.0 + 7.0*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1.0, begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 84.0 + 7.0*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_empty) { + multi::array const a({0, 5}); + BOOST_REQUIRE( size( a) == 0 ); + BOOST_REQUIRE( size(~a) == 5 ); + BOOST_REQUIRE( a.is_empty() ); + + multi::array const b({5, 0}); + BOOST_REQUIRE( size( b) == 0 ); + BOOST_REQUIRE( size(~b) == 0 ); + BOOST_REQUIRE( b.is_empty() ); + { + multi::array c; + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + } + { + multi::array c; + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare2) { + multi::array const a = { + {1.0, 3.0}, + {9.0, 7.0}, + {1.0, 1.0}, + }; + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + }; + { + multi::array c({size(a), size(~b)}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[2][1] == 31.0 ); + } + { + multi::array c({size(a), size(~b)}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[2][1] == 31.0 ); + } + { + multi::array c({size(~b), size(a)}); + blas::gemm(1.0, a, b, 0.0, ~c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 31.0 ); + } + { + multi::array c({size(~b), size(a)}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(~c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 31.0 ); + } + { + auto ar = +~a; + multi::array c({3, 2}); + blas::gemm(1.0, ~ar, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[2][1] == 31.0 ); + } + { + auto ar = +~a; + multi::array c({3, 2}); + blas::gemm_n(1.0, begin(~ar), size(~ar), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[2][1] == 31.0 ); + } + { + auto ar = +~a; + multi::array c({2, 3}); + blas::gemm(1.0, ~ar, b, 0.0, ~c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 31.0 ); + } + { + auto ar = +~a; + multi::array c({2, 3}); + blas::gemm_n(1.0, begin(~ar), size(~ar), begin(b), 0.0, begin(~c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 31.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x2_2x2) { + multi::array const a = { + {1.0, 3.0}, + {9.0, 4.0}, + }; + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + }; + { + multi::array c({2, 2}); + blas::gemm(1.0, ~a, b, 0.0, c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 61.0 ); + + blas::gemm(1.0, ~a, b, 0.0, ~c); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[0][1] == 61.0 ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1.0, begin(~a), size(~a), begin(b), 0.0, begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 61.0 ); + + blas::gemm_n(1.0, begin(~a), size(~a), begin(b), 0.0, begin(~c)); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[0][1] == 61.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x2) { + multi::array const a = { + {1.0, 3.0}, + {9.0, 4.0}, + {1.0, 5.0}, + }; + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + { 8.0, 1.0}, + }; + { + multi::array c({2, 2}); + blas::gemm(1.0, ~a, b, 0.0, c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 101.0 ); + + blas::gemm(1., ~a, b, 0., ~c); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[0][1] == 101 ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1.0, begin(~a), size(~a), begin(b), 0.0, begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 101.0 ); + + blas::gemm_n(1.0, begin(~a), size(~a), begin(b), 0.0, begin(~c)); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[0][1] == 101.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_3x2) { + multi::array const a = { + {1.0, 9.0, 1.0} + }; + BOOST_REQUIRE( stride(~a) == 1 ); + BOOST_REQUIRE( stride( a) == 3 ); + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + { 8.0, 1.0}, + }; + { + multi::array c({size(a), size(~b)}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + multi::array c({size(a), size(~b)}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar)}); + blas::gemm(1.0, ~ar, b, 0.0, ~c); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } + { + auto ar = +~a; + BOOST_REQUIRE( size(~ar) == 1 ); + BOOST_REQUIRE( begin(~ar).stride() == 1 ); + BOOST_REQUIRE( begin(~ar)->stride() == 1 ); + BOOST_REQUIRE( begin( ar)->stride() == 1 ); + + multi::array c({size(~b), size(~ar)}); + BOOST_REQUIRE( begin( c).stride() == 1 ); + BOOST_REQUIRE( begin(~c).stride() == 1 ); + BOOST_REQUIRE( begin(c)->stride() == 1 ); + + BOOST_REQUIRE( begin(b) ); + blas::gemm_n(1.0, begin(~ar), size(~ar), begin(b), 0.0, begin(~c)); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complexreal_1x3_3x2) { + using complex = std::complex; + multi::array const a = { + {1.0, 9.0, 1.0} + }; + BOOST_REQUIRE( stride(~a) == 1 ); + BOOST_REQUIRE( stride( a) == 3 ); + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + { 8.0, 1.0}, + }; + { + multi::array c({size(a), size(~b)}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + multi::array c({size(a), size(~b)}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar)}); + blas::gemm(1.0, ~ar, b, 0.0, ~c); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar)}); + blas::gemm_n(1.0, begin(~ar), size(~ar), begin(b), 0.0, begin(~c)); // c⸆=a⸆b, c=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_part_3x2) { + multi::array const a = { + {1.0, 9.0, 1.0}, + {3.0, 3.0, 3.0}, + }; + BOOST_REQUIRE( stride(~a) == 1 ); + BOOST_REQUIRE( stride( a) == 3 ); + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + { 8.0, 1.0}, + }; + { + multi::array c({size(a({0, 1})), size(~b)}); + blas::gemm(1.0, a({0, 1}), b, 0.0, c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + multi::array c({size(a({0, 1})), size(~b)}); + blas::gemm_n(1.0, begin(a({0, 1})), size(a({0, 1})), begin(b), 0.0, begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm(1.0, ~(ar(extension(ar), {0, 1})), b, 0.0, ~c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm_n(1.0, begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0., begin(~c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[1][0] == 184.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complexreal_1x3_part_3x2) { + using complex = std::complex; + multi::array const a = { + {1.0, 9.0, 1.0}, + {3.0, 3.0, 3.0}, + }; + BOOST_REQUIRE( stride(~a) == 1 ); + BOOST_REQUIRE( stride( a) == 3 ); + multi::array const b = { + {11.0, 12.0}, + { 7.0, 19.0}, + { 8.0, 1.0} + }; + { + multi::array c({size(a({0, 1})), size(~b)}); + blas::gemm(1.0, a({0, 1}), b, 0.0, c); + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + multi::array c({size(a({0, 1})), size(~b)}); + blas::gemm_n(1.0, begin(a({0, 1})), size(a({0, 1})), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[0][1] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm(1.0, ~(ar(extension(ar), {0, 1})), b, 0.0, ~c); + BOOST_REQUIRE( c[1][0] == 184.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm_n(1.0, begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0.0, begin(~c)); + BOOST_REQUIRE( c[1][0] == 184.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x1) { + multi::array const a = { + {1.0, 9.0, 1.0}, + {3.0, 3.0, 3.0}, + }; + BOOST_REQUIRE( stride(~a) == 1 ); + BOOST_REQUIRE( stride( a) == 3 ); + multi::array const b = { + {11.0}, + {7.0}, + {8.0}}; + { + multi::array c({size(a), size(~b)}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 82.0 ); + BOOST_REQUIRE( c[1][0] == 78.0 ); + } + { + multi::array c({size(a), size(~b)}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[0][0] == 82.0 ); + BOOST_REQUIRE( c[1][0] == 78.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm(1.0, ~(ar(extension(ar), {0, 1})), b, 0.0, ~c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + auto ar = +~a; + multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); + blas::gemm_n(1., begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0., begin(~c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE( c[0][0] == 82.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x1_bis) { + multi::array const a = { + {1.0, 9.0, 1.0}, + {3.0, 4.0, 5.0}, + }; + multi::array const b = { + {11.0}, + {7.0}, + {8.0}}; + + { + multi::array c({1, 2}); + blas::gemm(1.0, a, b, 0.0, ~c); // c⸆=ab, c=b⸆a⸆ + BOOST_REQUIRE( (~c)[0][0] == 82.0 ); + BOOST_REQUIRE( (~c)[1][0] == 101.0 ); + } + { + multi::array c({1, 2}); + blas::gemm_n(1.0, begin(a), size(a), begin(b), 0.0, begin(~c)); // c⸆=ab, c=b⸆a⸆ + BOOST_REQUIRE( (~c)[0][0] == 82.0 ); + BOOST_REQUIRE( (~c)[1][0] == 101.0 ); + } + { + multi::array c({2, 1}); + blas::gemm(1.0, a, b, 0.0, c); // c⸆=ab, c=b⸆a⸆ + BOOST_REQUIRE( (~c)[0][1] == 101.0 ); + BOOST_REQUIRE( c[1][0] == 101.0 ); + } + { + multi::array c({2, 1}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c⸆=ab, c=b⸆a⸆ + BOOST_REQUIRE( (~c)[0][1] == 101.0 ); + BOOST_REQUIRE( c[1][0] == 101.0 ); + } + { + multi::array c({1, 2}); + auto ar = +~a; + blas::gemm(1., ~ar, b, 0., ~c); // c⸆=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 101.0 ); + } + { + multi::array c({1, 2}); + auto ar = +~a; + blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(~c)); // c⸆=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 101.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_3x1) { + multi::array const a = { + {1.0, 9.0, 1.0}, + }; + multi::array const b = { + {11.0}, + {7.0}, + {8.0}}; + { + multi::array c({1, 1}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + auto ar = +~a; + blas::gemm(1.0, ~ar, b, 0.0, c); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + auto ar = +~a; + blas::gemm_n(1.0, begin(~ar), size(~ar), begin(b), 0.0, begin(c)); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm(1.0, a, ~br, 0.0, c); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + BOOST_REQUIRE( begin(c). stride() == 1 ); + BOOST_REQUIRE( begin(c)->stride() == 1 ); + + auto br = +~b; + // BOOST_REQUIRE( begin(br). stride() == 1 ); + BOOST_REQUIRE( begin( br)->stride() == 1 ); + + BOOST_REQUIRE(begin(a)->stride() == 1); + BOOST_REQUIRE( begin(~br). stride() == 1 ); + // BOOST_REQUIRE( begin(~br)->stride() == 1 ); + BOOST_REQUIRE(begin(c)->stride() == 1); + BOOST_REQUIRE(begin(c).stride() == 1); + BOOST_REQUIRE(size(a) == 1); + + blas::gemm_n(1.0, begin(a), size(a), begin(~br), 0.0, begin(c)); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm(1.0, a, blas::H(br), 0.0, c); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm_n(1.0, begin(a), size(a), begin(blas::H(br)), 0.0, begin(c)); + BOOST_REQUIRE( c[0][0] == 82.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square) { + using complex = std::complex; + constexpr complex I{0.0, 1.0}; + multi::array const a = { + {1.0 + 3.0 * I, 3.0 + 2.0 * I}, + {9.0 + 1.0 * I, 7.0 + 1.0 * I}, + }; + multi::array const b = { + {11.0 + 2.0 * I, 12.0 + 4.0 * I}, + { 7.0 + 1.0 * I, 19.0 - 9.0 * I}, + }; + { + multi::array c({2, 2}); + blas::gemm(1.0, a, b, 0.0, c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 145.0 + 43.0*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., ~a, b, 0., c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE(( c[1][1] == 170.-8.*I && c[1][0] == 77.+42.*I )); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE(( c[1][1] == 170.-8.*I && c[1][0] == 77.+42.*I )); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, ~b, 0., c); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == 177.+69.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(~b), 0., begin(c)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == 177.+69.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == 109. + 68.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == 109. + 68.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::T(b), 0., blas::T(c)); // c⸆=a⸆b⸆, c=ba + BOOST_REQUIRE( c[0][1] == 109.+68.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(blas::T(c))); // c⸆=a⸆b⸆, c=ba + BOOST_REQUIRE( c[0][1] == 109.+68.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_1x3_3x1) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 9. - 1. * I, 1. + 1. * I}, + }; + multi::array const b = { + {11. - 2. * I}, + {7. - 3. * I}, + {8. - 1. * I}}; + { + multi::array c({1, 1}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + auto ar = +~a; + blas::gemm(1., ~ar, b, 0., c); // c=ab, c⸆=ba + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + auto ar = +~a; + blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(c)); // c=ab, c⸆=ba + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm(1., a, ~br, 0., c); + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::context ctxt; + blas::gemm_n(ctxt, 1., begin(a), size(a), begin(~br), 0., begin(c)); + BOOST_REQUIRE( c[0][0] == 84.-7.*I ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm(1., a, blas::H(br), 0., ~c); + BOOST_REQUIRE( c[0][0] == 80. + 53.*I ); + } + { + multi::array c({1, 1}); + auto br = +~b; + blas::gemm_n(1., begin(a), size(a), begin(blas::H(br)), 0., begin(~c)); + BOOST_REQUIRE( c[0][0] == 80. + 53.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_hermitized_square) { + using complex = std::complex; + constexpr complex I{0, 1}; + multi::array const a = { + {1. + 3. * I, 3. + 2. * I}, + {9. + 1. * I, 7. + 1. * I}, + }; + multi::array const b = { + {11. + 2. * I, 12. + 4. * I}, + { 7. + 1. * I, 19. - 9. * I}, + }; + { + multi::array c({2, 2}); + blas::gemm(1., a, b, 0., c); // c=ab, c†=b†a† + BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c†=b†a† + BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == 109. - 68.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == 109. - 68.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), blas::H(b), 0., blas::H(c)); // c†=a†b†, c=ba + BOOST_REQUIRE( c[1][0] == 184. - 40.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a + BOOST_REQUIRE( c[1][0] == 87. - 16.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a + BOOST_REQUIRE( c[1][0] == 87. - 16.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, blas::H(b), 0., c); // c=ab†, c†=ba† + BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); + } + { + multi::array c({2, 2}); + c = blas::gemm(1., a, blas::H(b)); // c=ab†, c†=ba† + BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); + } + { + multi::array c = blas::gemm(1., a, blas::H(b)); // c=ab†, c†=ba† + BOOST_REQUIRE( size(c) == 2 ); + BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); + } + { + auto c = multi::array(blas::gemm(1., a, blas::H(b))); // c=ab†, c†=ba† + BOOST_REQUIRE( size(c) == 2 ); + BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab†, c†=ba† + BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == 109. - 68.*I); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == 109. - 68.*I); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I}, + {9. - 1. * I}, + {1. + 1. * I}}; + multi::array const b = { + {11. - 2. * I}, + {7. - 3. * I}, + {8. - 1. * I}}; + { + multi::array c({1, 1}); + blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + } + { + multi::array c({1, 1}); + blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + } + { + multi::array c({1, 1}); + auto ha = +blas::hermitized(a); + blas::gemm(1., ha, b, 0., c); + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + + blas::gemm(1., blas::H(b), a, 0., c); + BOOST_REQUIRE( c[0][0] == 80.+53.*I ); + } + { + multi::array c({1, 1}); + auto ha = +blas::hermitized(a); + blas::gemm_n(1., begin(ha), size(ha), begin(b), 0., begin(c)); + BOOST_REQUIRE( c[0][0] == 80.-53.*I ); + + blas::gemm_n(1., begin(blas::H(b)), size(blas::H(b)), begin(a), 0., begin(c)); + BOOST_REQUIRE( c[0][0] == 80.+53.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_1x3_3x2) { + using complex = std::complex; + constexpr complex I{0, 1}; + multi::array const a = { + {1. + 2. * I, 9. - 1. * I, 1. + 1. * I} + }; + multi::array const b = { + {11. - 2. * I, 5. + 2. * I}, + { 7. - 3. * I, 2. + 1. * I}, + { 8. - 1. * I, 1. + 1. * I} + }; + { + multi::array c({1, 2}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 20.+21.*I ); + } + { + multi::array c({1, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 20.+21.*I ); + } + { + auto ar = +~a; + multi::array c({1, 2}); + blas::gemm(1., blas::H(ar), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 28.+3.*I ); + } + { + auto ar = +~a; + multi::array c({1, 2}); + blas::gemm_n(1., begin(blas::H(ar)), size(blas::H(ar)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 28.+3.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x2) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I}, + {9. - 1. * I}, + {1. + 1. * I}}; + multi::array const b = { + {11. - 2. * I, 5. + 2. * I}, + { 7. - 3. * I, 2. + 1. * I}, + { 8. - 1. * I, 1. + 1. * I} + }; + { + multi::array c({1, 2}); + blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 28.+3.*I ); + } + { + multi::array c({1, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][1] == 28.+3.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x2_3x2) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 5. + 2. * I}, + {9. - 1. * I, 9. + 1. * I}, + {1. + 1. * I, 2. + 2. * I} + }; + multi::array const b = { + {11. - 2. * I, 5. + 2. * I}, + { 7. - 3. * I, 2. + 1. * I}, + { 8. - 1. * I, 1. + 1. * I} + }; + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 125.-84.*I ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 125.-84.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x2_3x1) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 5. + 2. * I}, + {9. - 1. * I, 9. + 1. * I}, + {1. + 1. * I, 2. + 2. * I} + }; + multi::array const b = { + {11. - 2. * I}, + {7. - 3. * I}, + {8. - 1. * I}}; + { + multi::array c({2, 1}); + blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 125.-84.*I ); + } + { + multi::array c({2, 1}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 125.-84.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1_bis) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I}, + {9. - 1. * I}, + {1. + 1. * I}}; + multi::array const b = { + {11. - 2. * I}, + {7. - 3. * I}, + {8. - 1. * I}}; + { + multi::array c({1, 1}); + blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 80. - 53.*I ); + } + { + multi::array c({1, 1}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[0][0] == 80. - 53.*I ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_square_automatic) { + multi::array const a = { + {1., 3.}, + {9., 7.}, + }; + multi::array const b = { + {11., 12.}, + { 7., 19.}, + }; + { + multi::array c({2, 2}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 148 && c[1][1] == 241 ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == 148 && c[1][1] == 241 ); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][1] == 196. ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][1] == 169. ); + BOOST_REQUIRE( c[1][0] == 82. ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][1] == 154. ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 3. - 3. * I}, + {9. + 1. * I, 7. + 4. * I}, + }; + multi::array const b = { + {11. + 1. * I, 12. + 1. * I}, + { 7. + 8. * I, 19. - 2. * I}, + }; + namespace blas = multi::blas; + { + multi::array c({2, 2}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == complex(115, 104) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == complex(115, 104) ); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, blas::T(b), 0., c); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == complex(178, 75) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(blas::T(b)), 0., begin(c)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][0] == complex(178, 75) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic_part2) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 3. - 3. * I}, + {9. + 1. * I, 7. + 4. * I}, + }; + multi::array const b = { + {11. + 1. * I, 12. + 1. * I}, + { 7. + 8. * I, 19. - 2. * I}, + }; + namespace blas = multi::blas; + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), b, 0., c); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE(( c[1][1] == complex(180, 29) && c[1][0] == complex(53, 54) )); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a + BOOST_REQUIRE(( c[1][1] == complex(180, 29) && c[1][0] == complex(53, 54) )); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(( c[1][1] == complex(186, 65) && c[1][0] == complex(116, 25) )); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE(( c[1][1] == complex(186, 65) && c[1][0] == complex(116, 25) )); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == complex(115, 104) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0.0, begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][0] == complex(115, 104) ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a + BOOST_REQUIRE( c[1][0] == complex(111, 64) && c[1][1] == complex(158, -51) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic_part3) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 3. - 3. * I}, + {9. + 1. * I, 7. + 4. * I}, + }; + multi::array const b = { + {11. + 1. * I, 12. + 1. * I}, + { 7. + 8. * I, 19. - 2. * I}, + }; + namespace blas = multi::blas; + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a + BOOST_REQUIRE( c[1][0] == complex(111, 64) && c[1][1] == complex(158, -51) ); + } + { + multi::array c({2, 2}); + blas::gemm(1., a, blas::H(b), 0., c); // c=ab†, c†=ba† + BOOST_REQUIRE( c[1][0] == complex(188, 43) && c[1][1] == complex(196, 25) ); + auto c2 = +blas::gemm(1., a, blas::H(b)); + BOOST_REQUIRE( c2 == c ); + } + { + multi::array c({2, 2}); + blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab†, c†=ba† + auto const c2 = +blas::gemm(0.1, a, blas::H(b)); + BOOST_REQUIRE( c2 == c ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == complex(116, -25) && c[1][1] == complex(186, -65) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba + BOOST_REQUIRE( c[1][0] == complex(116, -25) && c[1][1] == complex(186, -65) ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::H(b), 0., c); // c=a⸆b†, c†=ba⸆† + BOOST_REQUIRE( c[1][0] == complex(118, 5) && c[1][1] == complex(122, 45) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::H(b)), 0., begin(c)); // c=a⸆b†, c†=ba⸆† + BOOST_REQUIRE( c[1][0] == complex(118, 5) && c[1][1] == complex(122, 45) ); + } + { + multi::array c({2, 2}); + blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == complex(116, 25) && c[1][1] == complex(186, 65) ); + } + { + multi::array c({2, 2}); + blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=a⸆b⸆, c⸆=ba + BOOST_REQUIRE( c[1][0] == complex(116, 25) && c[1][1] == complex(186, 65) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_nonsquare_automatic) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1. + 2. * I, 3. - 3. * I, 1. - 9. * I}, + {9. + 1. * I, 7. + 4. * I, 1. - 8. * I}, + }; + multi::array const b = { + {11. + 1. * I, 12. + 1. * I, 4. + 1. * I, 8. - 2. * I}, + { 7. + 8. * I, 19. - 2. * I, 2. + 1. * I, 7. + 1. * I}, + { 5. + 1. * I, 3. - 1. * I, 3. + 8. * I, 1. + 1. * I} + }; + { + multi::array c({2, 4}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(112, 12) ); + } + { + multi::array c({2, 4}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(112, 12) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_realcomplex_complex_nonsquare_automatic) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a = { + {1., 3., 1.}, + {9., 7., 1.}, + }; + multi::array const b = { + {11. + 1. * I, 12. + 1. * I, 4. + 1. * I, 8. - 2. * I}, + { 7. + 8. * I, 19. - 2. * I, 2. + 1. * I, 7. + 1. * I}, + { 5. + 1. * I, 3. - 1. * I, 3. + 8. * I, 1. + 1. * I} + }; + { + multi::array c = blas::gemm(1., a, b); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(53, 24) ); + } + { + multi::array c({2, 4}); + c = blas::gemm(1., a, b); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(53, 24) ); + } + { + multi::array c({2, 4}); + blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(53, 24) ); + } + { + multi::array c({2, 4}); + blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == complex(53, 24) ); + } + { + multi::array const a_real = { + {1., 3., 1.}, + {9., 7., 1.}, + }; + multi::array c({2, 4}); + blas::real_doubled(c) = blas::gemm(1., a_real, blas::real_doubled(b)); + + BOOST_REQUIRE( c[1][2] == complex(53, 24) ); + } +} + +BOOST_AUTO_TEST_CASE(submatrix_result_issue_97) { + using complex = std::complex; + constexpr complex I{0, 1}; + multi::array M = { + {2. + 3. * I, 2. + 1. * I, 1. + 2. * I}, + {4. + 2. * I, 2. + 4. * I, 3. + 1. * I}, + {7. + 1. * I, 1. + 5. * I, 0. + 3. * I}, + }; + auto M2 = +M({0, 3}, {0, 1}); + BOOST_REQUIRE( M2 == M({0, 3}, {0, 1}) ); +} + +BOOST_AUTO_TEST_CASE(blas_context_gemm) { + using complex = std::complex; + static constexpr complex I{0, 1}; + auto rand = [d = std::normal_distribution<>{}, g = std::mt19937{}]() mutable { return d(g) + d(g) * I; }; // NOLINT(cert-msc32-c, cert-msc51-cpp): test purposes + + multi::array A({30, 40}); + multi::array B({40, 50}); + + std::generate(A.elements().begin(), A.elements().end(), rand); + std::generate(B.elements().begin(), B.elements().end(), rand); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_hermitized_second_gemm_range) { + multi::array const a({2, 3}, 0.); + multi::array const b({4, 3}, 0.); + { + multi::array c({2, 4}); + c() = blas::gemm(0.1, a, blas::H(b)); + BOOST_REQUIRE_CLOSE(c[1][2], 0., 0.00001); + } + { + multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][2] == 0. ); + } + { + multi::array const a = { + {1, 3, 1}, + {9, 7, 1}, + }; + (void)a; + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_nonsquare_hermitized_second_gemm_range) { + using complex = std::complex; + multi::array const a({2, 3}, 0.); + multi::array const b({4, 3}, 0.); + { + multi::array c({2, 4}, 999.); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); + BOOST_REQUIRE( c[1][2] != 999. ); + } + { + multi::array c = blas::gemm(1., a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE( c[1][2] == 0. ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_hermitized_second) { + namespace blas = multi::blas; + multi::array const a = { + {1, 3, 1}, + {9, 7, 1}, + }; + multi::array const b = { + {11, 7, 5}, + {12, 19, 3}, + { 4, 2, 3}, + { 8, 7, 1} + }; + { + multi::array c({2, 4}); + blas::gemm(1., a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53. ); + } + { + multi::array c({2, 4}); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53. ); + } + { + multi::array c({2, 4}); + blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } + { + multi::array c({2, 4}); + blas::gemm_n(0.1, begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } + { + multi::array c({2, 4}); + c() = blas::gemm(0.1, a, blas::H(b)); + } + { + multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE_CLOSE(c[1][2], 5.3, 0.00001); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_real_nonsquare_hermitized_second) { + namespace blas = multi::blas; + using complex = std::complex; + multi::array const a = { + {1., 3., 1.}, + {9., 7., 1.}, + }; + multi::array const b = { + {11., 7., 5.}, + {12., 19., 3.}, + { 4., 2., 3.}, + { 8., 7., 1.} + }; + { + multi::array c({2, 4}); + blas::gemm(1., a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53. ); + } + { + multi::array c({2, 4}); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE( c[1][2] == 53. ); + } + { + multi::array c({2, 4}); + blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(real(c[1][2]), 5.3, 0.00001); + } + { + multi::array c({2, 4}); + blas::gemm_n(0.1, begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ + BOOST_REQUIRE_CLOSE(real(c[1][2]), 5.3, 0.00001); + } + { + multi::array c({2, 4}); + c() = blas::gemm(0.1, a, blas::H(b)); + } + { + multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ + BOOST_REQUIRE_CLOSE(real(c[1][2]), 5.3, 0.00001); + } +} + +BOOST_AUTO_TEST_CASE(blas_gemm_1xn_complex) { + using complex = std::complex; + multi::array const a({1, 100}, 1.); + multi::array const b({1, 100}, 1.); + + multi::array c({1, 1}, 999.); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); + BOOST_REQUIRE( c[0][0] == 100. ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_complex_inq_hydrogen_case) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a({3, 1}, 2. + 1. * I); + multi::array const b({1, 1}, 3. + 4. * I); + + multi::array c({3, 1}, 999.); + blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); + BOOST_TEST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); + BOOST_TEST_REQUIRE( c[1][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); + BOOST_TEST_REQUIRE( c[2][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_complex_inq_hydrogen_case_no_n_interface) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a({3, 1}, 2. + 1. * I); + multi::array const b({10, 1}, 3. + 4. * I); + + multi::array c({3, 10}, 999.); + blas::gemm(1., a, blas::H(b), 0., c); + BOOST_TEST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); + BOOST_TEST_REQUIRE( c[1][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); + BOOST_TEST_REQUIRE( c[0][1] == (2. + 1.*I)*std::conj(3. + 4.*I) ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case_complex_value_hermitized) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a({1, 1}, 2. + 1. * I); + multi::array const b({1, 1}, 3. + 4. * I); + + multi::array c({1, 1}, 999.); + c = blas::gemm(1., a, blas::H(b)); + BOOST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case_complex_value) { + using complex = std::complex; + complex const I{0, 1}; + multi::array const a({1, 1}, 2. + 1. * I); + multi::array const b({1, 1}, 3. + 4. * I); + + multi::array c({1, 1}, 999.); + c = blas::gemm(1., a, b); + BOOST_REQUIRE( c[0][0] == (2. + 1.*I)*(3. + 4.*I) ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case) { + using complex = std::complex; + multi::array const a({1, 1}, 2.); + multi::array const b({1, 1}, 3.); + + multi::array c({1, 1}, 999.); + c = blas::gemm(1., a, b); + BOOST_REQUIRE( c[0][0] == 6. ); +} + +BOOST_AUTO_TEST_CASE(blas_gemm_inq_case) { // https://gitlab.com/correaa/boost-multi/-/issues/97 + using complex = std::complex; + complex const I{0, 1}; + multi::array mat({10, 2}, 1.0 + 3. * I); + multi::array vec({10, 1}, -2.0 + 4. * I); + + mat({0, 10}, {1, 2}) = vec; + + namespace blas = multi::blas; + + { + auto olap1 = +blas::gemm(1., blas::H(mat), vec); + auto olap2 = +blas::gemm(1., blas::H(mat({0, 10}, {0, 1})), vec); + + BOOST_REQUIRE( blas::H(mat)[1].size() == (~vec)[0].size() ); + BOOST_REQUIRE( blas::dot(blas::H(mat)[0], (~vec)[0]) == olap1[0][0] ); + BOOST_REQUIRE( std::inner_product(blas::H(mat)[0].begin(), blas::H(mat)[0].end(), (~vec)[0].begin(), complex{0}) == olap1[0][0] ); + + multi::array mat2 = mat({0, 10}, {0, 1}); + auto olap3 = +blas::gemm(1., blas::H(mat2), vec); + + BOOST_REQUIRE(olap1[0][0] == olap2[0][0]); + BOOST_REQUIRE(olap3[0][0] == olap2[0][0]); + } + { + multi::array mat2 = mat({0, 3}, {0, 1}); + auto olap3 = +blas::gemm(1., blas::H(mat({0, 3}, {0, 1})), vec); + BOOST_REQUIRE( (+blas::gemm(1., blas::H(mat2), vec))[0][0] == (+blas::gemm(1., blas::H(mat({0, 3}, {0, 1})), vec))[0][0] ); + } +} + +BOOST_AUTO_TEST_CASE(blas_issue_109_part2) { + multi::array const A({3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({2, 4}, 999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(1.0, ~A, ~B, 0.0, ~C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[0][1] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} + +BOOST_AUTO_TEST_CASE(blas_issue_109) { + multi::array const A({3, 4}, 5.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({2, 3}, 7.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({4, 2}, 999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(1., ~A, ~B, 0., C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[0][1] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} + +BOOST_AUTO_TEST_CASE(blas_issue_109_part2_complex) { + multi::array, 2> const A({3, 4}, {5.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array, 2> const B({2, 3}, {7.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array, 2> C({2, 4}, {999.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm({1.0, 0.0}, ~A, ~B, {0.0, 0.0}, ~C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[0][1] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} + +BOOST_AUTO_TEST_CASE(blas_issue_109_complex) { + multi::array, 2> const A({3, 4}, {5.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array, 2> const B({2, 3}, {7.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array, 2> C({4, 2}, {999.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm({1.0, 0.0}, ~A, ~B, {0.0, 0.0}, C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[0][1] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} +#endif + +BOOST_AUTO_TEST_CASE(blas_issue_109_complex_mx2) { + multi::array, 2> const A({3, 4}, {5.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array, 2> const B({2, 3}, {7.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array, 2> C({4, 2}, {999.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm({1.0, 0.0}, ~A, ~B, {0.0, 0.0}, C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} + +BOOST_AUTO_TEST_CASE(blas_issue_109_complex_mx1) { + multi::array, 2> const A({3, 4}, {5.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + multi::array, 2> const B({1, 3}, {7.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + + multi::array, 2> C({4, 1}, {999.0, 0.0}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(std::complex{1.0, 0.0}, ~A, ~B, std::complex{0.0, 0.0}, C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} + +BOOST_AUTO_TEST_CASE(blas_issue_109_double_mx1) { + multi::array const A({3, 4}, 5.0); // NOLINT(readability-identifier-length) BLAS naming + multi::array const B({1, 3}, 7.0); // NOLINT(readability-identifier-length) BLAS naming + + multi::array C({4, 1}, 999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::gemm(1.0, ~A, ~B, 0.0, C); + + BOOST_TEST_REQUIRE( C[0][0] == 105.0 ); + BOOST_TEST_REQUIRE( C[1][0] == 105.0 ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemv.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemv.cpp new file mode 100644 index 0000000000..2f43dd1b1b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/gemv.cpp @@ -0,0 +1,291 @@ +// Copyright 2020-2024 Alfredo A. Correa + +#include + +#include + +#include "../../../adaptors/blas/gemv.hpp" +#include "../../../array.hpp" + +#include "../../../utility.hpp" + +#include "../../blas/axpy.hpp" +#include "../../blas/dot.hpp" +#include "../../blas/gemm.hpp" +#include "../../blas/nrm2.hpp" + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +using fp_types = boost::mpl::list; // old versions of Boost.Test need MPL Type lists explicitly + +template +auto MV(M const& a, VI const& x, VO&& y) -> VO&& { // NOLINT(readability-identifier-naming,readability-identifier-length) BLAS naming + std::transform( + begin(a), end(a), begin(y), + [&x](auto const& row) { return std::inner_product(begin(row), end(row), begin(x), 0.0); } + ); + return std::forward(y); +} + +// #ifdef _MULTI_USING_BLAS_MKL +// #include // for mkl_free_buffers +// struct Fixture { +// Fixture() {mkl_disable_fast_mm(); } // this is reported to solve memory leaks, but it doesn't with BLA_VENDOR=Intel10_64ilp (non seq) and INTEL_MKL_VERSION 20200004 +// ~Fixture() { mkl_free_buffers(); } // this is reported to solve memory leaks, but it doesn't with BLA_VENDOR=Intel10_64ilp (non seq) and INTEL_MKL_VERSION 20200004 +// }; + +// BOOST_GLOBAL_FIXTURE(Fixture); +// #endif + +BOOST_AUTO_TEST_CASE_TEMPLATE(multi_blas_gemv, T, fp_types) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + { 9.0, 24.0, 30.0, 9.0}, + { 4.0, 10.0, 12.0, 7.0}, + {14.0, 16.0, 36.0, 1.0}, + }; + multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming + blas::gemv_n(1.0, begin(a), size(a), begin(x), 0.0, begin(y)); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.0001); + if(!std::is_same_v) { // workaround Apple Accelerate BLAS bug in dot + BOOST_REQUIRE_CLOSE(y[2], +blas::dot(a[2], x), 0.0001); + } + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming + multi::array const aT = ~a; + blas::gemv_n(1.0, begin(~aT), size(~aT), begin(x), 0.0, begin(y)); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.0001); + + if(!std::is_same_v) { // workaround Apple Accelerate BLAS bug in dot + BOOST_REQUIRE_CLOSE(y[2], +blas::dot(a[2], x), 0.0001); + } + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming + auto mv = blas::gemv(1.0, a, x); + copy_n(mv.begin(), mv.size(), y.begin()); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + + multi::array w2(multi::extensions_t<1>{multi::iextension{size(a)}}); + MV(a, x, w2); + BOOST_REQUIRE_CLOSE(w2[0], y[0], 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming + y = blas::gemv(1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y = blas::gemv(1.0, a, x); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}, 0.); // NOLINT(readability-identifier-length) BLAS naming + y += blas::gemv(1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y = {4.0, 5.0, 6.0}; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.1, a, x, 1.0, y); // y = a*M*x + b*y + BOOST_REQUIRE_CLOSE(y[1], 105.43, 0.00001); + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(multi_blas_gemv_real, T, fp_types) { + namespace blas = multi::blas; + + using std::abs; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + { 9.0, 24.0, 30.0, 9.0}, + { 4.0, 10.0, 12.0, 7.0}, + {14.0, 16.0, 36.0, 1.0}, + }; + multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming + { + multi::array y = {4.0, 5.0, 6.0}; // NOLINT(readability-identifier-length) BLAS naming + T const alpha = 1.1; + T const beta = 1.2; + blas::gemv(alpha, a, x, beta, y); // y = a*M*x + b*y + + multi::array const y3 = {214.02, 106.43, 188.37}; + BOOST_REQUIRE( abs(y[1] - y3[1]) < 2e-14 ); + } + if constexpr(!std::is_same_v) { + auto Y = +blas::gemv(1.0, a, x); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE_CLOSE(Y[0], +blas::dot(a[0], x), 0.00001); + BOOST_REQUIRE_CLOSE(Y[1], +blas::dot(a[1], x), 0.00001); + BOOST_REQUIRE_CLOSE(Y[2], +blas::dot(a[2], x), 0.00001); + } + { + multi::array const x = {1.0, 2.0, 3.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const y = {4.0, 5.0, 6.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array const dot = blas::gemv(1., multi::array({x}), y); + if(!std::is_same_v) { // workaround Apple Accelerate BLAS bug in dot + BOOST_REQUIRE( dot[0] == blas::dot(x, y) ); + } + } + { + using blas::operators::operator%; + using blas::operators::operator-; + using blas::operators::operator^; + BOOST_REQUIRE_SMALL(((~+~a) % x - a % x) ^ 2, 1e-9); + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(multi_blas_gemv_real_complex, T, fp_types) { + namespace blas = multi::blas; + using complex = std::complex; + using std::abs; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const M = { + { {9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0}}, + { {4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0}}, + {{14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0}}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const X = { + {1.1, 0.0}, + {2.1, 0.0}, + {3.1, 0.0}, + {4.1, 0.0}, + }; + { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array Y = { + {4.0, 0.0}, + {5.0, 0.0}, + {6.0, 0.0}, + }; + + auto const alpha = T{1.1}; + auto const beta = T{1.2}; + + blas::gemv(alpha, M, X, beta, Y); // y = a*M*x + b*y + + multi::array const Y3 = { + {214.02, 0.0}, + {106.43, 0.0}, + {188.37, 0.0}, + }; + + using blas::operators::operator-; + T const n2{blas::nrm2(Y - Y3)}; + BOOST_REQUIRE_SMALL(n2, T{1.0e-4}); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex) { + namespace blas = multi::blas; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + using std::abs; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {2.0 + 3.0 * I, 2.0 + 1.0 * I, 1.0 + 2.0 * I}, + {4.0 + 2.0 * I, 2.0 + 4.0 * I, 3.0 + 1.0 * I}, + {7.0 + 1.0 * I, 1.0 + 5.0 * I, 0.0 + 3.0 * I}, + }; + multi::array const x = {1.0 + 2.0 * I, 2.0 + 1.0 * I, 9.0 + 2.0 * I}; // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE(( +blas::gemv(1., a, x) == multi::array{4.0 + 31.*I, 25.0 + 35.0*I, -4.0 + 53.0*I} )); + + auto aT = +~a; + BOOST_REQUIRE(( +blas::gemv(1., ~aT, x) == multi::array{4.0 + 31.0*I, 25.0 + 35.0*I, -4.0 + 53.0*I} )); + + BOOST_REQUIRE( +blas::gemv(1., ~a, x) == (multi::array{63.0 + 38.0*I, -1.0 + 62.0*I, -4.0 + 36.0*I}) ); + BOOST_REQUIRE( +blas::gemv(1., ~a, x) == + blas::gemv(1.0, aT, x) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_temporary) { + using complex = std::complex; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}, + {{0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}}, + {{0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}}, + }; + + auto const B = [](auto array) { // NOLINT(readability-identifier-length) BLAS naming + auto rand = [gauss = std::normal_distribution<>{}, gen = std::mt19937{1}]() mutable { return complex{gauss(gen), gauss(gen)}; }; // NOLINT(cert-msc32-c,cert-msc51-cpp) test purposes + std::generate(array.elements().begin(), array.elements().end(), rand); + return array; + }(multi::array({3, 3})); + + using blas::operators::operator*; + using blas::operators::operator-; + using blas::operators::operator^; + BOOST_REQUIRE( (((+(A*B))[0] - B[0])^2) == 0.0 ); + BOOST_REQUIRE( (((+(A*B))[1] - B[1])^2) == 0.0 ); + BOOST_REQUIRE( (((+(A*B))[2] - B[2])^2) == 0.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_context) { + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + { 9.0, 24.0, 30.0, 9.0}, + { 4.0, 10.0, 12.0, 7.0}, + {14.0, 16.0, 36.0, 1.0}, + }; + multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) conventional name in BLAS + + blas::context ctxt; + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + blas::gemv_n(&ctxt, 1.0, begin(a), size(a), begin(x), 0.0, begin(y)); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.0001); + BOOST_REQUIRE_CLOSE(y[2], +blas::dot(a[2], x), 0.0001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + multi::array const aT = ~a; + blas::gemv_n(&ctxt, 1.0, begin(~aT), size(~aT), begin(x), 0.0, begin(y)); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + BOOST_REQUIRE_CLOSE(y[2], +blas::dot(a[2], x), 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + auto&& mv = blas::gemv(&ctxt, 1.0, a, x); + copy_n(mv.begin(), mv.size(), y.begin()); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + y = blas::gemv(&ctxt, 1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + y = blas::gemv(1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS + y() = blas::gemv(1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y = blas::gemv(&ctxt, 1.0, a, x); // NOLINT(readability-identifier-length) conventional name in BLAS + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}, 0.0); // NOLINT(readability-identifier-length) conventional name in BLAS + y += blas::gemv(&ctxt, 1.0, a, x); + BOOST_REQUIRE_CLOSE(y[1], 91.3, 0.00001); + } + { + multi::array y = {4.0, 5.0, 6.0}; // NOLINT(readability-identifier-length) conventional name in BLAS + y += blas::gemv(&ctxt, 1.1, a, x); + BOOST_REQUIRE_CLOSE(y[1], 105.43, 0.00001); + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/herk.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/herk.cpp new file mode 100644 index 0000000000..02b3e6d44c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/herk.cpp @@ -0,0 +1,294 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include +#include + +#include + +namespace multi = boost::multi; + +template auto print(M const& mat, std::string const& msg = "") -> decltype(auto) { // NOLINT(fuchsia-default-arguments-declarations,fuchsia-default-arguments-calls) + using multi::size; + using std::cout; + cout << msg << "\n" + << '{'; + for(int i = 0; i != size(mat); ++i) { + cout << '{'; + for(auto j : mat[i].extension()) { // NOLINT(altera-unroll-loops) + cout << mat[i][j]; + if(j + 1 != size(mat[i])) { + cout << ", "; + } + } + cout << '}' << '\n'; + if(i + 1 != size(mat)) { + cout << ", "; + } + } + return cout << '}' << '\n'; +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk) { + namespace blas = multi::blas; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0 + 3.0 * I, 3.0 - 2.0 * I, 4.0 + 1.0 * I}, + {9.0 + 1.0 * I, 7.0 - 8.0 * I, 1.0 - 3.0 * I}, + }; + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional name in BLAS + blas::herk(a, c); + BOOST_REQUIRE( c[1][0] == complex(50.0, -49.0) ); + BOOST_REQUIRE( c[0][1] == complex(50.0, +49.0) ); + + multi::array const c_copy = blas::herk(1., a); + BOOST_REQUIRE( c == c_copy ); + + BOOST_REQUIRE( +blas::gemm(1.0, a, blas::H(a)) == blas::herk(a) ); + } +} + +BOOST_AUTO_TEST_CASE(inq_case) { + namespace blas = multi::blas; + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {0.0, 1.0, 2.0}, + {3.0, 4.0, 5.0}, + {6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0}, + }; + BOOST_REQUIRE( (+blas::gemm(1.0, a, blas::T(a)))[1][2] == 86.0 ); + { + multi::array c({4, 4}); // NOLINT(readability-identifier-length) conventional name in BLAS + blas::herk(1.0, a, c); + BOOST_REQUIRE( c[1][2] == (+blas::gemm(1.0, a, blas::T(a)))[1][2] ); + // BOOST_REQUIRE( c[2][1] == (+blas::gemm(1., a, blas::T(a)))[2][1] ); + } + { + multi::array const c = blas::herk(1.0, a); // NOLINT(readability-identifier-length) conventional name in BLAS + BOOST_REQUIRE( c == +blas::gemm(1., a, blas::T(a)) ); + BOOST_REQUIRE( blas::herk(a) == +blas::gemm(1.0, a, blas::T(a)) ); + BOOST_REQUIRE( blas::herk(2.0, a) == +blas::gemm(2.0, a, blas::T(a)) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk_real) { + namespace blas = multi::blas; + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0}, + }; + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) BLAS naming + blas::herk(1.0, a, c); + BOOST_REQUIRE( c[0][1] == 34.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case) { + namespace blas = multi::blas; + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0, 2.0, 3.0}, + }; + multi::array b = blas::herk(a); // NOLINT(readability-identifier-length) BLAS naming + + BOOST_REQUIRE( size(b) == 1 ); + BOOST_REQUIRE( b[0][0] == 1.0*1.0 + 2.0*2.0 + 3.0*3.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case_scale, *boost::unit_test::tolerance(0.00001)) { + namespace blas = multi::blas; + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0, 2.0, 3.0}, + }; + + multi::array b = blas::herk(0.1, a); // NOLINT(readability-identifier-length) BLAS naming + + BOOST_REQUIRE( size(b) == 1 ); + BOOST_TEST( b[0][0] == (1.0*1.0 + 2.0*2.0 + 3.0*3.0)*0.1 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case) { + namespace blas = multi::blas; + + using complex = std::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}}, + }; + multi::array b = blas::herk(1.0, a); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( size(b) == 1 ); + BOOST_REQUIRE( b[0][0] == 1.0*1.0 + 2.0*2.0 + 3.0*3.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case_scale, *boost::unit_test::tolerance(0.00001)) { + namespace blas = multi::blas; + + using complex = std::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const a = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}}, + }; + multi::array b = blas::herk(0.1, a); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( size(b) == 1 ); + BOOST_TEST( real( b[0][0]/0.1 ) == 1.0*1.0 + 2.0*2.0 + 3.0*3.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case) { + namespace blas = multi::blas; + + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0 + 2.0 * I, 2.0 + 3.0 * I, 3.0 + 4.0 * I}, + }; + multi::array b = blas::herk(a); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( size(b) == 1 ); + BOOST_REQUIRE( b[0][0] == std::norm(1.0 + 2.0*I) + std::norm(2.0 + 3.0*I) + std::norm(3.0 + 4.0*I) ); + + BOOST_TEST( std::sqrt(real(blas::herk(a)[0][0])) == blas::nrm2(a[0]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_out_param) { + namespace blas = multi::blas; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const a = {{1.0 + 2.0 * I}, {2.0 + 3.0 * I}, {3.0 + 4.0 * I}}; // NOLINT(readability-identifier-length) BLAS naming + multi::array b({1, 1}); // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( size(b) == 1 ); + + blas::herk(blas::filling::upper, 1.0, blas::H(a), 0.0, b); + + BOOST_REQUIRE( b[0][0] == std::norm(1.0 + 2.0*I) + std::norm(2.0 + 3.0*I) + std::norm(3.0 + 4.0*I) ); + + // BOOST_TEST( std::sqrt(real(b[0][0])) == blas::nrm2(blas::T(a)[0])() ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized) { + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) conventional name in BLAS + + // NOLINTNEXTLINE(readability-identifier-length) conventional name in BLAS + multi::array const a = { + {1.0 + 2.0 * I}, + {2.0 + 3.0 * I}, + {3.0 + 4.0 * I}, + }; + + namespace blas = multi::blas; + + multi::array b = blas::herk(blas::H(a)); // NOLINT(readability-identifier-length) BLAS naming + + BOOST_REQUIRE( size(b) == 1 ); + BOOST_REQUIRE( b[0][0] == std::norm(1.0 + 2.0*I) + std::norm(2.0 + 3.0*I) + std::norm(3.0 + 4.0*I) ); + + BOOST_TEST( std::sqrt(real(blas::herk(blas::H(a))[0][0])) == blas::nrm2(rotated(a)[0]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_auto) { + namespace blas = multi::blas; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const arr = { + {1.0 + 2.0 * I}, + {2.0 + 3.0 * I}, + {3.0 + 4.0 * I}, + }; + auto arr2 = blas::herk(1.0, blas::hermitized(arr)); + static_assert(std::is_same>{}); + BOOST_REQUIRE( size(arr2) == 1 ); + BOOST_REQUIRE( arr2[0][0] == std::norm(1.0 + 2.0*I) + std::norm(2.0 + 3.0*I) + std::norm(3.0 + 4.0*I) ); + + BOOST_TEST( std::sqrt(real(blas::herk(blas::H(arr))[0][0])) == blas::nrm2(rotated(arr)[0]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_identity) { + namespace blas = multi::blas; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const arr = { + {1.0 + 3.0 * I, 3.0 - 2.0 * I, 4.0 + 1.0 * I}, + {9.0 + 1.0 * I, 7.0 - 8.0 * I, 1.0 - 3.0 * I}, + }; + + { + multi::array arr2({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional one-letter operation BLASs + blas::herk(blas::filling::lower, 1.0, arr, 0.0, arr2); // c†=c=aa†=(aa†)†, `c` in lower triangular + BOOST_REQUIRE(( arr2[1][0] == complex{50.0, -49.0} )); + BOOST_REQUIRE( arr2[0][1] == 9999.0 ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional one-letter operation BLASs + static_assert(blas::is_conjugated{}); + + blas::herk(blas::filling::lower, 1.0, arr, 0.0, blas::H(c)); // c†=c=aa†=(aa†)†, `c` in upper triangular + + BOOST_REQUIRE(( blas::H(c)[1][0] == complex{50.0, -49.0} )); + BOOST_REQUIRE( blas::H(c)[0][1] == 9999.0 ); + } + { + multi::array c({3, 3}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) : conventional one-letter operation BLASs + herk(blas::filling::lower, 1.0, blas::T(arr), 0.0, blas::T(c)); // c†=c=aT(aT)† not supported + BOOST_REQUIRE(( transposed(c)[1][0] == complex{52.0, -90.0} )); + BOOST_REQUIRE( transposed(c)[0][1] == 9999.0 ); + } + { + multi::array c({3, 3}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) : conventional one-letter operation BLASs + blas::herk(blas::filling::lower, 1.0, blas::T(arr), 0.0, blas::H(blas::T(c))); // c†=c=aT(aT)† not supported + BOOST_REQUIRE(( blas::H(blas::T(c))[1][0] == complex{52.0, -90.0} )); + BOOST_REQUIRE( blas::H(blas::T(c))[0][1] == 9999.0 ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS + blas::herk(blas::filling::upper, 1.0, arr, 0.0, c); // c†=c=aa†=(aa†)†, `c` in upper triangular + BOOST_REQUIRE(( c[0][1] == complex{50.0, +49.0} )); + BOOST_REQUIRE( c[1][0] == 9999.0 ); + } + { + multi::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS + blas::herk(1., arr, c); // c†=c=aa†=(aa†)† + BOOST_REQUIRE(( c[0][1] == complex{50.0, +49.0} )); + BOOST_REQUIRE(( c[1][0] == complex{50.0, -49.0} )); + } + { + multi::array c({3, 3}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS + blas::herk(blas::filling::lower, 1.0, blas::H(arr), 0.0, c); // c†=c=aa†=(aa†)†, `c` in lower triangular + BOOST_REQUIRE(( c[1][0] == complex{52.0, 90.0} )); + BOOST_REQUIRE( c[0][1] == 9999.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_square) { + namespace blas = multi::blas; + + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + auto const nan = std::numeric_limits::quiet_NaN(); + + // NOLINTNEXTLINE(readability-identifier-length) lapack conventional name + multi::array const A = { + {12.9388 + I * 0.0, 9.80028 + I * -0.00011091, 9.66966 + I * -0.0114817}, + { nan + I * nan, 8.44604 + I * 0.0, 3.78646 + I * 0.0170734}, + { nan + I * nan, nan + I * nan, 7.70655 + I * 0.0}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) lapack conventional name + multi::array C({3, 3}, complex{0.0, 0.0}); + + blas::herk(boost::multi::blas::filling::upper, complex{1.0, 0.0}, A, complex{0.0, 0.0}, C); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/iamax.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/iamax.cpp new file mode 100644 index 0000000000..6cbac26add --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/iamax.cpp @@ -0,0 +1,71 @@ +// Copyright 2019-2024 Alfredo A. Correa + +// #define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS/cuBLAS iamax" + +#include + +#include "../../blas/iamax.hpp" + +#include "../../../adaptors/blas/cuda.hpp" +#include "../../../adaptors/cuda.hpp" +#include "../../../array.hpp" + +#include + +using std::cout; +namespace multi = boost::multi; +namespace blas = multi::blas; + +using complex = std::complex; +constexpr complex I{0.0, 1.0}; + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax) { + multi::array const A = { + {1.0 + 2. * I, 2.0, 3.0, 4.0}, + { 5.0, 6.0 + 3.0 * I, 7.0, 8.0}, + { 9.0, 10.0, 11.0 + 4. * I, 12.0}, + }; + + using blas::iamax; + + auto chess = [](auto const& a, auto const& b) { + using std::abs; + return abs(real(a)) + abs(imag(a)) < abs(real(b)) + abs(imag(b)); + }; + + BOOST_REQUIRE(iamax(A[1])==std::max_element(begin(A[1]), end(A[1]), chess)-begin(A[1])); + BOOST_REQUIRE(A[1][iamax(A[1])]==*std::max_element(begin(A[1]), end(A[1]), chess)); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_cuda) { + multi::cuda::array const A = { + {1.0 + 2.0 * I, 2.0, 3.0, 4.0}, + { 5.0, 6.0 + 3.0 * I, 7.0, 8.0}, + { 9.0, 10.0, 11.0 + 4.0 * I, 12.0}, + }; + using blas::iamax; + BOOST_REQUIRE(iamax(A[1]) == 1); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_real) { + multi::array const A = {1.0, 2.0, 3.0, 4.0}; + + auto i = blas::iamax(A); + + BOOST_REQUIRE( i == 3 ); + BOOST_REQUIRE( A[blas::iamax(A)] == 4.0 ); + + BOOST_REQUIRE( *blas::amax(A) == 4.0 ); +} + +using complex = std::complex; + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_complex) { + multi::array const A = {1.0, 2.0, 3.0, 4.0}; + + auto i = blas::iamax(A); + + BOOST_REQUIRE( i == 3 ); + BOOST_REQUIRE( A[blas::iamax(A)] == 4.0 ); + BOOST_REQUIRE( *blas::amax(A) == 4.0 ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/nrm2.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/nrm2.cpp new file mode 100644 index 0000000000..d4ae33f7d3 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/nrm2.cpp @@ -0,0 +1,141 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include + +#include + +namespace multi = boost::multi; + +using complex = multi::complex; +constexpr complex I{0.0, 1.0}; // NOLINT(readability-identifier-length) imaginary unit + +BOOST_AUTO_TEST_CASE(multi_blas_nrm2) { + namespace blas = multi::blas; + + // NOLINTNEXTLINE(readability-identifier-length) blas conventional name + multi::array const A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( blas::nrm2(A[1]) == std::sqrt(blas::dot(A[1], A[1])) ); + + { + multi::array const x = {1.0 + 1.0 * I, 3.0 + 2.0 * I, 3.0 + 4.0 * I}; // NOLINT(readability-identifier-length) blas conventional name + BOOST_REQUIRE( blas::dot(x, x) == (1.0 + 1.0*I)*(1.0 + 1.0*I) + (3.0 + 2.0*I)*(3.0 + 2.0*I) + (3.0 + 4.0*I)*(3.0 + 4.0*I) ); + using std::sqrt; + BOOST_REQUIRE( blas::nrm2(x) == sqrt(norm(1.0 + 1.0*I) + norm(3.0 + 2.0*I) + norm(3.0 + 4.0*I)) ); + } +} + +BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_real) { + namespace blas = multi::blas; + multi::array const cA = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + + double n = NAN; // NOLINT(readability-identifier-length) BLAS naming + blas::nrm2(rotated(cA)[1], n); + + // BOOST_REQUIRE( blas::nrm2(rotated(cA)[1], n) == std::sqrt( 2.0*2.0 + 6.0*6.0 + 10.0*10.0) ); // TODO(correaa) nrm2 is returning a pointer? + BOOST_REQUIRE( n == std::sqrt( 2.0*2.0 + 6.0*6.0 + 10.0*10.0) ); + // BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); + + // double n2 = blas::nrm2(rotated(cA)[1]); + // BOOST_REQUIRE( n == n2 ); + + // multi::array R(4); + // blas::nrm2( rotated(cA)[1], R[2]); + // BOOST_REQUIRE( R[2] == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); + + // multi::array R0; + // blas::nrm2( rotated(cA)[1], R0); + // BOOST_REQUIRE( R0 == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); + + // BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptor_blas_nrm2_operators) { + multi::array const X = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming + + double n = NAN; // NOLINT(readability-identifier-length) BLAS naming + + multi::blas::nrm2(X, n); + BOOST_REQUIRE( n == multi::blas::nrm2(X) ); +} + +// BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case){ +// using complex = std::complex; +// multi::array const cA = { +// {1., 2., 3., 4.}, +// {5., 6., 7., 8.}, +// {9., 10., 11., 12.} +// }; + +// using multi::blas::nrm2; +// double n; +// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); +// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); +//} + +// #if 0 +// BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_thrust){ +// using complex = thrust::complex; +// multi::array const cA = { +// {1., 2., 3., 4.}, +// {5., 6., 7., 8.}, +// {9., 10., 11., 12.} +// }; + +// using multi::blas::nrm2; +// double n; +// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); +// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); +//} + +// BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_types){ +// boost::mpl::for_each, +// thrust::complex//, +// // boost::multi::complex // TODO make this work +// >>([](auto cplx){ +// multi::array const cA = { +// {1., 2., 3., 4.}, +// {5., 6., 7., 8.}, +// {9., 10., 11., 12.} +// }; + +// using multi::blas::nrm2; +// double n; +// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); +// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); +// }); +//} +// #endif + +// BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex){ +// using complex = std::complex; complex const I{0,1}; +// multi::array const cA = { +// {1., 2. + 1.*I, 3., 4.}, +// {5., 6. + 4.*I, 7., 8.}, +// {9., 10. - 3.*I, 11., 12.} +// }; + +// using multi::blas::nrm2; +// double n; +// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) ); +// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) ); + +// using namespace multi::blas::operators; +// BOOST_TEST_REQUIRE( (rotated(cA)[1]^-1) == 1/std::sqrt(norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1])) , boost::test_tools::tolerance(1e-15) ); +// BOOST_TEST_REQUIRE( (rotated(cA)[1]^2) == norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) , boost::test_tools::tolerance(1e-15) ); +//} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/numeric.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/numeric.cpp new file mode 100644 index 0000000000..5097798bac --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/numeric.cpp @@ -0,0 +1,171 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_imag) { + using complex = std::complex; auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + namespace blas = multi::blas; + multi::array const array = { 1.0 + 2.0*I, 3.0 + 5.0*I, 9.0 + 2.0*I }; + BOOST_REQUIRE( blas::imag(array)[2] == 2.0 ); + BOOST_REQUIRE( blas::real(array)[2] == 9.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_conjugated) { + using complex = std::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array array = { + {1.0 - 3.0*I, 6.0 + 2.0*I}, + {8.0 + 2.0*I, 2.0 + 4.0*I}, + {2.0 - 1.0*I, 1.0 + 1.0*I} + }; + BOOST_REQUIRE( array[0][0] == 1.0 - 3.0*I ); + + multi::array const carray = { + {1.0 - 3.0*I, 6.0 + 2.0*I}, + {8.0 + 2.0*I, 2.0 + 4.0*I}, + {2.0 - 1.0*I, 1.0 + 1.0*I} + }; + BOOST_REQUIRE( carray[0][0] == 1.0 - 3.0*I ); + + namespace blas = multi::blas; + auto conjr = blas::make_conjugater(array.data_elements()); + + decltype(blas::make_conjugater(carray.data_elements())) ppp; // = BdataC; + ppp = conjr; + + BOOST_REQUIRE( *ppp == 1.0 + 3.0*I ); + +// static_assert( multi::blas::is_complex_array, 2>>{}, "!"); + static_assert( blas::is_complex_array{} ); + static_assert(! blas::is_conjugated{} ); + + auto&& conjd_array = blas::conj(array); + static_assert( blas::is_conjugated{} ); + + BOOST_REQUIRE( conjd_array[0][0] == 1.0 + 3.0*I ); + BOOST_REQUIRE( imag(*base(conjd_array)) == +3.0 ); + +// BOOST_TEST_REQUIRE( base(Bconj)->imag() == +3 ); + BOOST_REQUIRE( conjd_array[0][1] == rotated(conjd_array)[1][0] ); + BOOST_REQUIRE( rotated(conjd_array)[1][0] == conjd_array[0][1] ); + +// BOOST_REQUIRE( base(Bconj) == -3.0*I ); + static_assert( blas::is_complex_array{} ); + + BOOST_REQUIRE( blas::conj(conjd_array) == array ); + + BOOST_REQUIRE( blas::conj(array)[1][0] == std::conj(array[1][0]) ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay) { + using complex = std::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array arr = { + { 1.0 - 3.0*I, 6.0 + 2.0*I, 9.0 + 3.0*I}, + { 8.0 + 2.0*I, 2.0 + 4.0*I, 9.0 + 3.0*I}, + { 2.0 - 1.0*I, 1.0 + 1.0*I, 9.0 + 3.0*I}, + { 9.0 + 3.0*I, 9.0 + 3.0*I, 9.0 + 3.0*I} + }; + + namespace blas = multi::blas; + multi::array conj_arr{blas::conj(arr)}; + multi::array const conj_arr2 = blas::conj(arr); + + BOOST_REQUIRE( conj_arr[2][1] == std::conj(arr[2][1]) ); + BOOST_REQUIRE( blas::conj(arr)[2][1] == std::conj(arr[2][1]) ); + + BOOST_REQUIRE( blas::transposed(arr)[1][2] == arr[2][1] ); + BOOST_REQUIRE( blas::transposed(arr) == ~arr ); + + BOOST_REQUIRE( blas::conj(arr)[1][2] == blas::hermitized(arr)[2][1] ); + BOOST_REQUIRE( blas::conj(blas::transposed(arr)) == blas::hermitized(arr) ); + + BOOST_REQUIRE( blas::hermitized(arr)[2][1] == blas::conj(arr)[1][2] ); + BOOST_REQUIRE( blas::hermitized(arr) == blas::conj(blas::transposed(arr)) ); + + BOOST_REQUIRE( blas::real(arr)[2][1] == std::real(arr[2][1]) ); + BOOST_REQUIRE( blas::imag(arr)[2][1] == std::imag(arr[2][1]) ); + + multi::array const B_real_doubled = { + { 1.0, -3.0, 6.0, 2.0, 9.0, 3.0}, + { 8.0, 2.0, 2.0, 4.0, 9.0, 3.0}, + { 2.0, -1.0, 1.0, 1.0, 9.0, 3.0}, + { 9.0, 3.0, 9.0, 3.0, 9.0, 3.0} + }; + BOOST_REQUIRE( sizes(blas::real_doubled(arr)) == sizes(B_real_doubled) ); + BOOST_REQUIRE( blas::real_doubled(arr) == B_real_doubled ); +} + +#if defined(CUDA_FOUND) and CUDA_FOUND +#include + +BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay_thrust) { + using complex = thrust::complex; complex const I{0.0, 1.0}; + + multi::array B = { + {1.0 - 3.0*I, 6.0 + 2.0*I}, + {8.0 + 2.0*I, 2.0 + 4.0*I}, + {2.0 - 1.0*I, 1.0 + 1.0*I}, + }; + + namespace blas = multi::blas; + multi::array conjB = blas::conj(B); + BOOST_REQUIRE( conjB[1][2] == conj(B[1][2]) ); +} +#endif + +BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_imag_part) { + using complex = std::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array arr = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0} + }; + multi::array complex_arr = arr; + BOOST_REQUIRE( complex_arr[1][1] == arr[1][1] ); + + multi::array arr2 = { + {1.0 - 3.0*I, 6.0 + 2.0*I}, + {8.0 + 2.0*I, 2.0 + 4.0*I}, + {2.0 - 1.0*I, 1.0 + 1.0*I} + }; + + multi::array const arr2_real = { + {1.0, 6.0}, + {8.0, 2.0}, + {2.0, 1.0}, + }; + multi::array const arr2_imag = { + {-3.0, +2.0}, + {+2.0, +4.0}, + {-1.0, +1.0}, + }; + + using multi::blas::real; + using multi::blas::imag; + + BOOST_REQUIRE( arr2_real == real(arr2) ); + BOOST_REQUIRE( real(arr2) == arr2_real ); + BOOST_REQUIRE( imag(arr2) == arr2_imag ); + + BOOST_REQUIRE( arr2[1][0] == 8.0 + 2.0*I ); + BOOST_REQUIRE( arr2[1][0].imag() == 2.0 ); + + namespace blas = multi::blas; + BOOST_REQUIRE( blas::hermitized(arr2)[1][2] == std::conj( arr2[2][1] ) ); + + blas::hermitized(arr2)[1][2] = 20.0 + 30.0*I; + BOOST_REQUIRE( arr2[2][1] == 20.0 - 30.0*I ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/operations.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/operations.cpp new file mode 100644 index 0000000000..0131488765 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/operations.cpp @@ -0,0 +1,48 @@ +// © Alfredo A. Correa 2019-2024 + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS operations and cuda" +#define BOOST_TEST_DYN_LINK + +#include + +#include "../../blas/dot.hpp" + +#include "../../../array.hpp" +#include "../../blas/cuda.hpp" + +#include "../../../adaptors/cuda.hpp" +#include "../../../complex.hpp" + +#include +#include +#include + +using std::cout; +namespace multi = boost::multi; +namespace blas = multi::blas; + +using complex = std::complex; constexpr complex I{0.0, 1.0}; + +BOOST_AUTO_TEST_CASE(const blas_conjugated_cpu) { + multi::array const a = {5.0 + 2.0*I, 6.0 + 6.0*I, 7.0 + 2.0*I, 8.0 - 3.0*I}; + BOOST_REQUIRE( blas::C(a)[1] == conj(a[1]) ); + + namespace cuda = multi::cuda; + + cuda::array const agpu = {5.0 + 2.0*I, 6.0 + 6.0*I, 7.0 + 2.0*I, 8.0 - 3.0*I}; + BOOST_REQUIRE( blas::C(agpu)[1] == conj(agpu[1]) ); +} + +BOOST_AUTO_TEST_CASE(blas_conjugated_gpu){ +#if 0 + cuda::array const acu = {1.0 + I, 2.0 + 3.0*I, 3.0 + 2.0*I, 4.0 - 9.0*I}; + cuda::array const bcu = {5.0 + 2.0*I, 6.0 + 6.0*I, 7.0 + 2.0*I, 8.0 - 3.0*I}; + + { + cuda::array ccu; + blas::dot(acu, bcu, ccu); + BOOST_REQUIRE( ccu() == 19.0 - 27.0*I ); + } + BOOST_REQUIRE( blas::C(bcu)[1] == 2.0 - 3.0*I ); +#endif +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/scal.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/scal.cpp new file mode 100644 index 0000000000..b517e232cc --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/scal.cpp @@ -0,0 +1,88 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_n) { + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( (arr[0][2] == 3.0) && (arr[2][2] == 11.0) ); + + blas::scal_n(2.0, arr[2].begin(), arr[2].size()); + BOOST_REQUIRE( arr[0][2] == 3. && arr[2][2] == 11.0*2.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_it) { + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 11.0 ); + + blas::scal(2.0, arr[2].begin(), arr[2].end()); + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE(arr[2][2] == 11.0*2.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_real) { + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 11.0 ); + + BOOST_REQUIRE( blas::scal(1.0, arr[2]) == arr[2] ); + BOOST_REQUIRE( &blas::scal(1.0, arr[2]) == &arr[2] ); + BOOST_REQUIRE( +blas::scal(1.0, arr[2]) == arr[2] ); + + blas::scal(2.0, arr[2]); + BOOST_REQUIRE( arr[0][2] == 3.0 && arr[2][2] == 11.0*2.0 ); + + BOOST_REQUIRE( &blas::scal(1.0, arr[2]) == &arr[2] ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_real_2D) { + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 11.0 ); + + blas::scal(2.0, arr.elements()); + + BOOST_REQUIRE( arr[0][2] == 6.0 ); + BOOST_REQUIRE( arr[2][2] == 22.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_complex_2D) { + auto const I = std::complex(0.0, 1.0); // NOLINT(readability-identifier-length) blas conventional name + multi::array, 2> arr = { + {1.0 + 0.0*I, 2.0 + 0.0*I, 3.0 + 0.0*I, 4.0 + 0.0*I}, + {5.0 + 0.0*I, 6.0 + 0.0*I, 7.0 + 0.0*I, 8.0 + 0.0*I}, + {9.0 + 0.0*I, 10.0 + 0.0*I, 11.0 + 0.0*I, 12.0 + 0.0*I}, + }; + BOOST_REQUIRE( arr[0][2] == 3.0 ); + BOOST_REQUIRE( arr[2][2] == 11.0 ); + + blas::scal(2.0, arr.elements()); + + BOOST_REQUIRE( arr[0][2] == 6.0 ); + BOOST_REQUIRE( arr[2][2] == 22.0 ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/swap.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/swap.cpp new file mode 100644 index 0000000000..d87c8c6dc2 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/swap.cpp @@ -0,0 +1,89 @@ +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2019-2023 Alfredo A. Correa + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS swap" +#include + +#include "../../blas.hpp" + +#include + +#include +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001)) { + { + multi::array A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( A[0][2] == 3.0 ); + BOOST_REQUIRE( A[2][2] == 11.0 ); + + multi::blas::swap(A[0], A[2]); // blas swap + BOOST_REQUIRE( A[0][2] == 11.0 ); + BOOST_REQUIRE( A[2][2] == 3.0 ); + + swap(A[0], A[2]); // built-in swap + BOOST_REQUIRE( A[0][2] == 3.0 ); + BOOST_REQUIRE( A[2][2] == 11.0 ); + } + { + multi::array A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( A[0][0] == 1.0 ); + BOOST_REQUIRE( A[0][3] == 4.0 ); + + multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep) + BOOST_REQUIRE( A[0][0] == 4.0 ); + BOOST_REQUIRE( A[0][3] == 1.0 ); + + swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep) + BOOST_REQUIRE( A[0][0] == 1.0 ); + BOOST_REQUIRE( A[0][3] == 4.0 ); + } + { + using complex = std::complex; + complex const I{0, 1}; + multi::array A = { + {1.0 + 2. * I, 2.0, 3.0, 4.0 + 3.0 * I}, + { 5.0, 6.0, 7.0, 8.0}, + { 9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( A[0][0] == 1.0 + 2.0*I ); + BOOST_REQUIRE( A[0][3] == 4.0 + 3.0*I ); + + multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep) + BOOST_REQUIRE( A[0][0] == 4.0 + 3.0*I ); + BOOST_REQUIRE( A[0][3] == 1.0 + 2.0*I ); + + swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep) + BOOST_REQUIRE( A[0][0] == 1.0 + 2.0*I ); + BOOST_REQUIRE( A[0][3] == 4.0 + 3.0*I ); + } + { + multi::array A = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }; + BOOST_REQUIRE( A[0][2] == 3.0 ); + BOOST_REQUIRE( A[2][2] == 11.0 ); + + auto it = multi::blas::swap(begin(A[0]), end(A[0]) - 1, begin(A[2])); // blas swap + BOOST_REQUIRE( it == end(A[2]) - 1 ); + BOOST_REQUIRE( A[0][2] == 11.0 ); + BOOST_REQUIRE( A[2][2] == 3.0 ); + + using std::swap_ranges; + swap_ranges(begin(A[0]), end(A[0]), begin(A[2])); // built-in swap + BOOST_REQUIRE( A[0][2] == 3.0 ); + BOOST_REQUIRE( A[2][2] == 11.0 ); + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/syrk.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/syrk.cpp new file mode 100644 index 0000000000..329ebf7564 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/syrk.cpp @@ -0,0 +1,315 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_real) { + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0}, + }; + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::lower, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[2][1] == 19.0 ); + BOOST_REQUIRE( c[1][2] == 9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::upper, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[1][2] == 19.0 ); + BOOST_REQUIRE( c[2][1] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + namespace blas = multi::blas; + + using blas::filling; + using blas::syrk; + + syrk(filling::lower, 1.0, a, 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[1][0] == 34.0 ); + BOOST_REQUIRE( c[0][1] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + using blas::filling; + + syrk(filling::upper, 1.0, a, 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular + + BOOST_REQUIRE( c[0][1] == 34.0 ); + BOOST_REQUIRE( c[1][0] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + using blas::filling; + + syrk(filling::upper, 1.0, a, 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular + + BOOST_REQUIRE( c[0][1] == 34.0 ); + BOOST_REQUIRE( c[1][0] == 9999.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_real_special_case) { + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0, 3.0, 4.0}, + }; + { + multi::array c({1, 1}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + using blas::filling; + + syrk(filling::lower, 1.0, a, 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_TEST( c[0][0] == 1.0*1.0 + 3.0*3.0 + 4.0*4.0 ); + } + { + multi::array c({1, 1}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + using blas::filling; + + syrk(filling::upper, 1.0, a, 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_TEST( c[0][0] == 1.0*1.0 + 3.0*3.0 + 4.0*4.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex_real_case) { + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0 + I * 0.0, 3.0 + I * 0.0, 4.0 + I * 0.0}, + {9.0 + I * 0.0, 7.0 + I * 0.0, 1.0 + I * 0.0}, + }; + { + multi::array c({3, 3}, 9999.0 + I * 0.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::lower, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( real(c[2][1]) == 19.0 ); + BOOST_REQUIRE( real(c[1][2]) == 9999.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex) { + using complex = std::complex; + + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0 + 3.0 * I, 3.0 - 2.0 * I, 4.0 + 1.0 * I}, + {9.0 + 1.0 * I, 7.0 - 8.0 * I, 1.0 - 3.0 * I}, + }; + { + multi::array c({3, 3}, 9999.0 + I * 0.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + syrk(blas::filling::lower, 1.0, blas::T(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_TEST( real(c[2][1]) == - 3.0 ); + BOOST_TEST( imag(c[2][1]) == -34.0 ); + } + { + multi::array c({2, 2}, 9999.0 + I * 0.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + syrk(blas::filling::lower, 1.0, a, 0.0, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( c[1][0] == complex(18.0, -21.0) ); + BOOST_REQUIRE( c[0][1] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0 + I * 0.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + syrk(blas::filling::upper, 1.0, a, 0.0, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( c[0][1] == complex(18.0, -21.0) ); + BOOST_REQUIRE( c[1][0] == 9999.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_complex) { + using complex = std::complex; + + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0 + 3.0 * I, 3.0 - 2.0 * I, 4.0 + 1.0 * I}, + {9.0 + 1.0 * I, 7.0 - 8.0 * I, 1.0 - 3.0 * I}, + }; + + { + multi::array c({2, 2}, 9999.0 + I * 0.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + + syrk(filling::lower, 1.0, a, 0.0, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( c[1][0]==complex(18.0, -21.0) ); + BOOST_REQUIRE( c[0][1]==9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) // NOLINT(fuchsia-default-arguments-calls) + + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::lower, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( c[2][1] == complex(-3.0, -34.0) ); + BOOST_REQUIRE( c[1][2] == 9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) // NOLINT(fuchsia-default-arguments-calls) + + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::lower, 1.0, rotated(a), 0.0, c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( c[2][1] == complex(-3.0, -34.0) ); + BOOST_REQUIRE( c[1][2] == 9999.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_real) { + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0}, + }; + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + + syrk(filling::lower, 1.0, a, 0.0, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[1][0] == 34.0 ); + BOOST_REQUIRE( c[0][1] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + + syrk(filling::upper, 1.0, a, 0.0, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular + + BOOST_REQUIRE( c[0][1] == 34.0 ); + BOOST_REQUIRE( c[1][0] == 9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + + syrk(filling::lower, 1.0, rotated(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[2][1] == 19.0 ); + BOOST_REQUIRE( c[1][2] == 9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::lower, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[2][1] == 19.0 ); + BOOST_REQUIRE( c[1][2] == 9999.0 ); + } + { + multi::array c({3, 3}, 9999.0); // NOLINT(readability-identifier-length) + + namespace blas = multi::blas; + + using blas::filling; + using blas::transposed; + + syrk(filling::upper, 1.0, transposed(a), 0.0, c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in upper triangular + + BOOST_REQUIRE( c[1][2] == 19.0 ); + BOOST_REQUIRE( c[2][1] == 9999.0 ); + } + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + using multi::blas::transposed; + + syrk(filling::upper, 1.0, a, 0.0, transposed(c)); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular + + BOOST_REQUIRE( c[0][1] == 9999.0 ); + BOOST_REQUIRE( c[1][0] == 34.0 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_implicit_zero) { + // NOLINTNEXTLINE(readability-identifier-length) + multi::array const a = { + {1.0, 3.0, 4.0}, + {9.0, 7.0, 1.0}, + }; + { + multi::array c({2, 2}, 9999.0); // NOLINT(readability-identifier-length) + + using multi::blas::filling; + + syrk(filling::lower, 1.0, a, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular + + BOOST_REQUIRE( c[1][0] == 34.0 ); + BOOST_REQUIRE( c[0][1] == 9999.0 ); + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/traits.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/traits.cpp new file mode 100644 index 0000000000..db419b19ce --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/traits.cpp @@ -0,0 +1,19 @@ +// Copyright 2019-2024 Alfredo A. Correa + +// #define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS traits" +#include + +#include "../../blas/traits.hpp" + +#include + +namespace multi = boost::multi; +namespace blas = multi::blas; + +BOOST_AUTO_TEST_CASE(multi_adaptors_blas_traits) { + static_assert( blas::is_d{} ); + static_assert( blas::is_s{} ); + + static_assert( blas::is_c>{} ); + static_assert( blas::is_z>{} ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsm.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsm.cpp new file mode 100644 index 0000000000..21d742dc43 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsm.cpp @@ -0,0 +1,366 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_0x0) { + namespace blas = multi::blas; + multi::array const A; // NOLINT(readability-identifier-length) BLAS naming + + { + multi::array B; // NOLINT(readability-identifier-length) BLAS naming + // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1.0, A, B); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_1x1) { + namespace blas = multi::blas; + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + {10.0, }, + }; + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {3.0, }, + }; + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1.0, A, B); + // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + BOOST_REQUIRE_CLOSE( B[0][0] , 3.0/10.0 , 0.00001 ); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1.0, A, B))[0][0] , B_cpy[0][0] , 0.00001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {3.0, }, + }; + auto const B_cpy = B; + // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 2.0, A, B); + BOOST_REQUIRE_CLOSE( B[0][0] , 2.0*3.0/10.0 , 0.00001 ); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1.0, A, B))[0][0] , 2.*B_cpy[0][0] , 0.00001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {3.0, 4.0, 5.0}, + }; + auto const B_cpy = B; + // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1.0, A, B); + BOOST_REQUIRE_CLOSE( B[0][0] , 3./10. , 0.00001 ); + BOOST_REQUIRE_CLOSE( B[0][1] , 4./10. , 0.00001 ); + BOOST_REQUIRE_CLOSE( B[0][2] , 5./10. , 0.00001 ); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][1] , B_cpy[0][1] , 0.00001 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_square) { + namespace blas = multi::blas; + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0}, + {NAN, 7.0, 1.0}, + {NAN, NAN, 8.0} + }; + auto const A_cpy = triangular(blas::filling::upper, A); + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0}, + {2.0, 7.0, 1.0}, + {3.0, 4.0, 2.0} + }; + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); + BOOST_REQUIRE( (+blas::gemm(1., A_cpy, B))[1][2] == B_cpy[1][2] ); + } + { + auto const AT =+ ~A; + auto const AT_cpy = triangular(blas::filling::lower, AT); + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0}, + {2.0, 7.0, 1.0}, + {3.0, 4.0, 2.0} + }; + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), B); + BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); + BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), B))[1][2] == B_cpy[1][2] ); + } + { + auto const AT =+ ~A; + auto const AT_cpy = triangular(blas::filling::lower, AT); + multi::array const B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0}, + {2.0, 7.0, 1.0}, + {3.0, 4.0, 2.0} + }; + auto BT =+ ~B; + blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), blas::T(BT)); + BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2] , 0.107143 , 0.001 ); + BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), blas::T(BT)))[1][2] == B[1][2] ); + } + { + multi::array const B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0}, + {2.0, 7.0, 1.0}, + {3.0, 4.0, 2.0} + }; + auto BT =+ ~B; + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, blas::T(BT)); + BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143 , 0.001 ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex) { + namespace blas = multi::blas; + using complex = std::complex; auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 2.0*I, 3.0 - 1.0*I, 4.0 + 9.0*I}, + {NAN , 7.0 + 4.0*I, 1.0 + 8.0*I}, + {NAN , NAN , 8.0 + 2.0*I} + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 - 9.0*I, 3.0 + 2.0*I, 4.0 + 3.0*I}, + {2.0 - 2.0*I, 7.0 - 2.0*I, 1.0 - 1.0*I}, + {3.0 + 1.0*I, 4.0 + 8.0*I, 2.0 + 7.0*I} + }; + blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) + BOOST_REQUIRE_CLOSE( real(B[1][2]) , 2.33846 , 0.0001 ); + BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.0923077 , 0.0001 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_rectangular) { + namespace blas = multi::blas; + using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 2.0*I, 3.0 - 1.0*I, 4.0 + 9.0*I}, + {NAN , 7.0 + 4.0*I, 1.0 + 8.0*I}, + {NAN , NAN , 8.0 + 2.0*I} + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1. - 9.*I, 3. + 2.*I}, + {2. - 2.*I, 7. - 2.*I}, + {3. + 1.*I, 4. + 8.*I} + }; + blas::trsm(blas::side::left, blas::filling::lower, 2.0 + 1.0*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) + BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 ); + BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column) { + namespace blas = multi::blas; + using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 2.0*I, 3.0 - 1.0*I, 4.0 + 9.0*I}, + {NAN , 7.0 + 4.0*I, 1.0 + 8.0*I}, + {NAN , NAN , 8.0 + 2.0*I} + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1. - 9.*I}, + {2. - 2.*I}, + {3. + 1.*I} + }; + blas::trsm(blas::side::left, blas::filling::lower, 2.0 + 1.0*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) + BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001); + BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001); +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cpu) { + namespace blas = multi::blas; + using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imaginary unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 2.0*I, 3.0 - 1.0*I, 4.0 + 9.0*I}, + {NAN , 7.0 + 4.0*I, 1.0 + 8.0*I}, + {NAN , NAN , 8.0 + 2.0*I} + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 - 9.0*I}, + {2.0 - 2.0*I}, + {3.0 + 1.0*I} + }; + blas::trsm(blas::side::left, blas::filling::lower, 2.0 + 1.0*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) + BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 ); + BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_real) { + namespace blas = multi::blas; + multi::array const A = {{2.0, }, }; // NOLINT(readability-identifier-length) BLAS naming + { + multi::array B = {{1.0, 2.0, 3.0}, }; // NOLINT(readability-identifier-length) BLAS naming + BOOST_REQUIRE( B.size() == 1 ); + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::lower, 1.0, A, B); + BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0}, + {2.0}, + {3.0}, + }; + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::lower, 1.0, A, blas::T(B)); + BOOST_REQUIRE( blas::T(B)[0][1] == blas::T(B_cpy)[0][1]/A[0][0] ); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_complex) { + namespace blas = multi::blas; + using complex = std::complex; + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { {2.0, 0.0}, }, + }; + + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + { {1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0} }, + }; + auto const B_cpy = B; + blas::trsm(blas::side::left, blas::filling::lower, {1.0, 0.0}, A, B); + BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] ); + } + multi::array B1 = { + { {1.0, 0.0} }, + { {2.0, 0.0} }, + { {3.0, 0.0} }, + }; + multi::array B2 = { + { {1.0, 0.0} }, + { {2.0, 0.0} }, + { {3.0, 0.0} }, + }; + + blas::trsm(blas::side::left, blas::filling::lower, {1.0, 0.0}, A, blas::H(B1)); + + { + auto const B_cpy = B2; + blas::trsm(blas::side::right, blas::filling::upper, {1.0, 0.0}, blas::H(A), B2); + // BOOST_REQUIRE( (+blas::gemm(1., A, blas::H(B)))[0][1] == blas::H(B_cpy)[0][1] ); + BOOST_REQUIRE( (+blas::gemm(1., B2, blas::H(A)))[1][0] == B_cpy[1][0] ); + } + BOOST_REQUIRE( B1 == B2 ); +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_nonsquare) { + namespace blas = multi::blas; + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 40.0}, + {NAN, 7.0, 1.0}, + {NAN, NAN, 8.0} + }; + auto const A_cpy = triangular(blas::filling::upper, A); + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0, 8.0}, + {2.0, 7.0, 1.0, 9.0}, + {3.0, 4.0, 2.0, 1.0}, + }; + auto const B_cpy =+ B; + multi::array BT =+ ~B; + BOOST_REQUIRE( BT == ~B ); + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][2] , B_cpy[1][2] , 0.001); + + auto const BT_cpy = BT; + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, blas::T(BT)); + BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2], 0.107143, 0.001 ); + + BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, blas::T(BT)))[1][2] , blas::T(BT_cpy)[1][2] , 0.00001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0, 3.0, 4.0, 8.0}, + {2.0, 7.0, 1.0, 9.0}, + {3.0, 4.0, 2.0, 1.0}, + }; + multi::array AT = ~A; + multi::array BT = ~B; + blas::trsm(blas::side::left, blas::filling::upper, 1.0, blas::T(AT), B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); + + blas::trsm(blas::side::left, blas::filling::upper, 1.0, blas::T(AT), blas::T(BT)); + BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143, 0.001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0}, + {2.0}, + {3.0}, + }; + auto const B_cpy =+ B; + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) + BOOST_REQUIRE_CLOSE( B[2][0] , 0.375 , 0.00001 ); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0}, + {2.0}, + {3.0}, + }; + auto const B_cpy =+ B; + blas::trsm(blas::side::left, blas::filling::upper, 1.2, A, B); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1.0, A_cpy, B))[1][0] , 1.2*B_cpy[1][0] , 0.00001 ); + BOOST_REQUIRE_CLOSE( (+blas::gemm(1.0/1.2, A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 ); + } + { + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0}, + {2.0}, + {3.0}, + }; + multi::array BT = rotated(B); + blas::trsm(blas::side::left, blas::filling::upper, 1.0, A, blas::T(BT)); + BOOST_REQUIRE_CLOSE( (~BT)[2][0] , 0.375 , 0.00001); + } +} + +BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = std::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + using multi::blas::filling; + using multi::blas::hermitized; + blas::trsm(blas::side::left, blas::filling::upper, {1.0, 0.0}, A, blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = std::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsv.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsv.cpp similarity index 72% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsv.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsv.cpp index 16bfb73e97..c963c4705d 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsv.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/test/trsv.cpp @@ -1,7 +1,7 @@ #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- $CXX $0 -o $0x -lcudart -lcublas -lboost_unit_test_framework `pkg-config --libs blas`&&$0x&&rm $0x;exit #endif -// © Alfredo A. Correa 2019-2020 +// © Alfredo A. Correa 2019-2024 #define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS trsv" #define BOOST_TEST_DYN_LINK @@ -34,14 +34,13 @@ complex const I{0, 1}; namespace multi = boost::multi; namespace blas = multi::blas; -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cpu, *utf::tolerance(0.00001)){ - +BOOST_AUTO_TEST_CASE(const multi_blas_trsm_complex_cpu, *utf::tolerance(0.00001)){ multi::array const A = { - { 1. + 1.*I, 3. - 2.*I, 4. + 1.*I}, - {NAN , 7. - 10.*I, 1. + 2.*I}, - {NAN , NAN , 8. + 1.*I} + { 1.0 + 1.0*I, 3.0 - 2.0*I, 4.0 + 1.0*I}, + {NAN , 7.0 - 10.0*I, 1.0 + 2.0*I}, + {NAN , NAN , 8.0 + 1.0*I}, }; - multi::array b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I}; + multi::array b = {1.0 + 2.0*I, 3.0 + 1.0*I, 4.0 + 5.0*I}; blas::trsv(blas::filling::upper, blas::diagonal::general, A, b); BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 ); BOOST_TEST_REQUIRE( real(b[1]) == 0.2127 ); @@ -51,11 +50,11 @@ BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cpu, *utf::tolerance(0.00001)){ BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda, *utf::tolerance(0.0001)){ namespace cuda = multi::cuda; cuda::managed::array const A = { - { 1. + 1.*I, 3. - 2.*I, 4. + 1.*I}, - {NAN , 7. - 10.*I, 1. + 2.*I}, - {NAN , NAN , 8. + 1.*I} + { 1.0 + 1.0*I, 3.0 - 2.0*I, 4.0 + 1.0*I}, + {NAN , 7.0 - 10.0*I, 1.0 + 2.0*I}, + {NAN , NAN , 8.0 + 1.0*I}, }; - cuda::managed::array b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I}; + cuda::managed::array b = {1.0 + 2.0*I, 3.0 + 1.0*I, 4.0 + 5.0*I}; blas::trsv(blas::filling::upper, blas::diagonal::general, A, b); BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 ); @@ -66,11 +65,11 @@ BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda, *utf::tolerance(0.0001)){ BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda_managed, *utf::tolerance(0.00001)){ namespace cuda = multi::cuda; cuda::managed::array const A = { - { 1. + 1.*I, 3. - 2.*I, 4. + 1.*I}, - {NAN , 7. - 10.*I, 1. + 2.*I}, - {NAN , NAN , 8. + 1.*I} + { 1.0 + 1.0*I, 3.0 - 2.0*I, 4.0 + 1.0*I}, + {NAN , 7.0 - 10.0*I, 1.0 + 2.0*I}, + {NAN , NAN , 8.0 + 1.0*I} }; - cuda::managed::array b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I}; + cuda::managed::array b = {1.0 + 2.0*I, 3.0 + 1.0*I, 4.0 + 5.0*I}; blas::trsv(blas::filling::upper, A, b); // this operation happens in GPU when #include "adaptors/blas/cuda.hpp" multi::array const b_cpu = b; @@ -82,11 +81,11 @@ BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda_managed, *utf::tolerance(0.000 BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_cuda_managed, *utf::tolerance(0.00001)){ namespace cuda = multi::cuda; cuda::managed::array const A = { - { 1., 3., 4.}, - {NAN, 7., 1.}, - {NAN, NAN, 8.} + { 1.0, 3.0, 4.0}, + {NAN , 7.0, 1.0}, + {NAN , NAN , 8.0}, }; - cuda::managed::array b = {1., 3., 4.}; + cuda::managed::array b = {1.0, 3.0, 4.0}; blas::trsv(blas::filling::upper, A, b); // this operation happens in GPU when #include "adaptors/blas/cuda.hpp" multi::array const b_cpu = b; @@ -98,14 +97,13 @@ BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_cuda_managed, *utf::tolerance(0.0000 BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_cuda2, *utf::tolerance(0.00001)){ namespace blas = multi::blas; multi::cuda::array const A = { - { 1. + 1.*I, 3. - 2.*I, 4. + 1.*I}, - {NAN , 7. - 10.*I, 1. + 2.*I}, - {NAN , NAN , 8. + 1.*I} + { 1.0 + 1.0*I, 3.0 - 2.0*I, 4.0 + 1.0*I}, + {NAN , 7.0 - 10.0*I, 1.0 + 2.0*I}, + {NAN , NAN , 8.0 + 1.0*I}, }; - multi::cuda::array b = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I}; + multi::cuda::array b = {1.0 + 2.0*I, 3.0 + 1.0*I, 4.0 + 5.0*I}; blas::trsv(blas::filling::upper, blas::diagonal::general, A, b); BOOST_TEST_REQUIRE( real(b[0]) == -1.37259 ); BOOST_TEST_REQUIRE( real(b[1]) == 0.2127 ); BOOST_TEST_REQUIRE( real(b[2]) == 0.569231 ); } - diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/traits.hpp new file mode 100644 index 0000000000..45acab04ad --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/traits.hpp @@ -0,0 +1,41 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_TRAITS_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_TRAITS_HPP +#pragma once + +#include +#include + +namespace boost::multi::blas { + +// TODO(correaa) : create a BinaryDouble concept? + + template()/std::declval()), float>{} >> + auto is_s_aux(F&&) -> std::true_type ; + auto is_s_aux(...) -> std::false_type; + + template struct is_s : decltype(is_s_aux(std::declval())) {using archetype = float;}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + + template()/std::declval()), double>{}>> + auto is_d_aux(D&&) -> std::true_type ; + auto is_d_aux(...) -> std::false_type; + + template struct is_d : decltype(is_d_aux(std::declval())) {using archetype = double;}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + + template) && is_s().real())>{} && is_s().imag())>{}>> + auto is_c_aux(C&&) -> std::true_type; + auto is_c_aux(...) -> std::false_type; + + template struct is_c : decltype(is_c_aux(std::declval())) {using archetype = std::complex;}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + + template) && is_d().real())>{} && is_d().imag())>{}>> + auto is_z_aux(Z&&) -> std::true_type ; + auto is_z_aux(...) -> std::false_type; + + template struct is_z : decltype(is_z_aux(std::declval())) {using archetype = std::complex;}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + +} // end namespace boost::multi::blas +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsm.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsm.hpp new file mode 100644 index 0000000000..8128bba97d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsm.hpp @@ -0,0 +1,167 @@ +// Copyright 2019-2023 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_BLAS_TRSM_HPP +#define BOOST_MULTI_ADAPTORS_BLAS_TRSM_HPP + +#include "../blas/core.hpp" +#include "../blas/filling.hpp" +#include "../blas/operations.hpp" // uplo +#include "../blas/side.hpp" + +namespace boost::multi::blas { + +enum class diagonal : char { + unit = 'U', + non_unit = 'N', general = non_unit +}; + +template +auto triangular_parted(Array const& arr) { + class triangular_part { + Array const& ref_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + + public: + explicit triangular_part(Array const& ref) : ref_{ref} {} + static constexpr auto filling() { return Fill; } + using underlying_type __attribute__((unused)) = Array; + auto underlying() const -> Array const& { return ref_;} + }; + + return triangular_part{arr}; +} + +template +auto lower_parted(Array const& arr) {return triangular_parted(arr);} + +template +auto upper_parted(Array const& arr) {return triangular_parted(arr);} + +template auto L(Array const& arr) { return lower_parted(arr); } // NOLINT(readability-identifier-naming) BLAS naming +template auto U(Array const& arr) { return upper_parted(arr); } // NOLINT(readability-identifier-naming) BLAS naming + +template +auto triangular(multi::blas::filling f, Matrix const& m) { // NOLINT(readability-identifier-length) BLAS naming + auto ret =+ m; + switch(f) { + case multi::blas::filling::upper: + { + auto ext = extension(ret); + std::for_each(ext.begin(), ext.end(), [&ret](auto idx) { + std::fill_n(ret[idx].begin(), std::min(idx, size(~ret)), 0.0); + }); + } + break; + case multi::blas::filling::lower: + { + auto extt = extension(~ret); + std::for_each(extt.begin(), extt.end(), [&ret](auto jdx) { + std::fill_n( (~ret)[jdx].begin(), std::min(jdx, size( ret)), 0.0); + }); + } + break; + } + return ret; +} + +using core::trsm; + +template +auto trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b) // NOLINT(readability-function-cognitive-complexity,readability-identifier-length) cognitive load 115, BLAS naming +-> B2D&& { + if(a_side == blas::side::left ) {assert(size(~a) >= size( b));} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + if(a_side == blas::side::right) {assert(size( a) >= size(~b));} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + assert( stride( a) == 1 || stride(~a) == 1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + assert( stride( b) == 1 || stride(~b) == 1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + + if(size(b) != 0) { + #define CTXT std::forward(ctxt) + if constexpr(! is_conjugated{} && ! is_conjugated{}) { + if (stride( a)==1 && stride( b)==1) {CTXT->trsm(static_cast( (a_side)), static_cast(-a_fill), 'N', static_cast(a_diag), size( b), size(~b), alpha , base(a) , stride(~a), base(b) , stride(~b));} + else if(stride(~a)==1 && stride(~b)==1) {CTXT->trsm(static_cast(swap(a_side)), static_cast(+a_fill), 'N', static_cast(a_diag), size(~b), size( b), alpha , base(a) , stride( a), base(b) , stride( b));} + else if(stride( a)==1 && stride(~b)==1) {CTXT->trsm(static_cast(swap(a_side)), static_cast(-a_fill), 'T', static_cast(a_diag), size(~b), size( b), alpha , base(a) , stride(~a), base(b) , stride( b));} + else if(stride(~a)==1 && stride( b)==1) {CTXT->trsm(static_cast( (a_side)), static_cast(+a_fill), 'T', static_cast(a_diag), size( b), size(~b), alpha , base(a) , stride( a), base(b) , stride(~b));} + else {assert(0 && "not implemented in blas");} // LCOV_EXCL_LINE + } else if constexpr( is_conjugated{} && ! is_conjugated{}) { + if (stride( a)==1 && stride(~b)==1) {CTXT->trsm(static_cast(swap(a_side)), static_cast(-a_fill), 'C', static_cast(a_diag), size(~b), size( b), alpha , underlying(base(a)), stride(~a), base(b) , stride( b));} + else if(stride(~a)==1 && stride( b)==1) {CTXT->trsm(static_cast( (a_side)), static_cast(+a_fill), 'C', static_cast(a_diag), size( b), size(~b), alpha , underlying(base(a)), stride( a), base(b) , stride(~b));} + else {assert(0 && "not implemented in blas");} // LCOV_EXCL_LINE + } else if constexpr(! is_conjugated{} && is_conjugated{}) { + if (stride(~a)==1 && stride( b)==1) {CTXT->trsm(static_cast( (a_side)), static_cast(+a_fill), 'C', static_cast(a_diag), size( b), size(~b), conj(alpha), base(a) , stride( a), underlying(base(b)), stride(~b));} + // else if(stride( a)==1 && stride(~b)==1) {assert(0 && "not implemented in blas");} // LCOV_EXCL_LINE + else {assert(0 && "not implemented in blas");} // LCOV_EXCL_LINE + } else if constexpr( is_conjugated{} && is_conjugated{}) { + if (stride( a)==1 && stride(~b)==1) {CTXT->trsm(static_cast(swap(a_side)), static_cast(-a_fill), 'T', static_cast(a_diag), size(~b), size( b), conj(alpha), underlying(base(a)), stride(~a), underlying(base(b)), stride( b));} + else if(stride(~a)==1 && stride( b)==1) {CTXT->trsm(static_cast( (a_side)), static_cast(+a_fill), 'T', static_cast(a_diag), size( b), size(~b), conj(alpha), underlying(base(a)), stride( a), underlying(base(b)), stride(~b));} + else {assert(0 && "not implemented in blas");} // LCOV_EXCL_LINE + } + #undef CTXT + } + return std::forward(b); +} + +template +auto trsm(blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b) -> decltype(auto) { // NOLINT(readability-identifier-length) BLAS naming + if constexpr(! is_conjugated{}) {return trsm(default_context_of( a.base() ), a_side, a_fill, a_diag, alpha, a, std::forward(b));} + else {return trsm(default_context_of(underlying(a.base())), a_side, a_fill, a_diag, alpha, a, std::forward(b));} +} + +template +auto trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b) // NOLINT(readability-identifier-length) BLAS naming +->decltype(trsm(std::forward(ctxt), a_side, a_fill, blas::diagonal::non_unit, alpha, a, std::forward(b))) { + return trsm(std::forward(ctxt), a_side, a_fill, blas::diagonal::non_unit, alpha, a, std::forward(b)); } + +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = implicit_return_from_non_void_function + #else + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function + #endif +#elif defined __NVCOMPILER + #pragma diagnostic push + #pragma diag_suppress = implicit_return_from_non_void_function +#endif +template +auto trsm(blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b) -> decltype(auto) { // NOLINT(readability-identifier-length) BLAS naming + if constexpr(! is_conjugated{}) {return trsm(blas::default_context_of( a.base() ), a_side, a_fill, alpha, a, std::forward(b));} + else {return trsm(blas::default_context_of(underlying(a.base())), a_side, a_fill, alpha, a, std::forward(b));} +} +#if defined __NVCC__ + #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ + #pragma nv_diagnostic pop + #else + #pragma diagnostic pop + #endif +#elif defined __NVCOMPILER + #pragma diagnostic pop +#endif + +template +auto trsm(blas::side a_side, typename UTArr::underlying_type::element_type alpha, UTArr const& a, B2D&& b) // NOLINT(readability-identifier-length) BLAS naming +->decltype(trsm(a_side, a.filling(), blas::diagonal::non_unit, alpha, a.underlying(), std::forward(b))) { + return trsm(a_side, a.filling(), blas::diagonal::non_unit, alpha, a.underlying(), std::forward(b)); } + +namespace operators { + + template + auto operator/=(B2D&& b, UL const& a) // NOLINT(readability-identifier-length) BLAS naming + ->decltype(blas::trsm(blas::side::right, 1.0, a, std::forward(b))) { + return blas::trsm(blas::side::right, 1.0, a, std::forward(b)); } + + template + auto operator|=(B2D&& b, UL const& a) // NOLINT(readability-identifier-length) BLAS naming + ->decltype(blas::trsm(blas::side::left, 1.0, a, std::forward(b))) { + return blas::trsm(blas::side::left, 1.0, a, std::forward(b)); } + + using blas::U; + using blas::L; + +} // end namespace operators + +} // end namespace boost::multi::blas + +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/trsv.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsv.hpp similarity index 97% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/trsv.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsv.hpp index 154e2cf810..462b434a8e 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/trsv.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/blas/trsv.hpp @@ -1,6 +1,6 @@ -#ifndef MULTI_ADAPTORS_BLAS_TRSV_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- +#ifndef MULTI_ADAPTORS_BLAS_TRSV_HPP #define MULTI_ADAPTORS_BLAS_TRSV_HPP -// Copyright 2019-2021 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa #include "../blas/core.hpp" @@ -12,8 +12,6 @@ namespace boost::multi::blas { -//enum DIAG : char{U='U', N='N'}; - enum class diagonal : char {//typename std::underlying_type::type{ unit = 'U', non_unit = 'N', general = non_unit @@ -35,14 +33,14 @@ auto trsv(filling a_nonzero_side, diagonal a_diag, A2D const& a, X1D&& x) { auto base_a = trsv_base(a); auto base_x = trsv_base(x); - if(not is_conjugated{}) { - if(stride( a )==1) {trsv(static_cast(flip(a_nonzero_side)), 'N', static_cast(a_diag), size(x), base_a, stride(rotated(a)), base_x, stride(x));} + if(! is_conjugated{}) { + if (stride( a )==1) {trsv(static_cast(flip(a_nonzero_side)), 'N', static_cast(a_diag), size(x), base_a, stride(rotated(a)), base_x, stride(x));} else if(stride(rotated(a))==1) {trsv(static_cast( a_nonzero_side ), 'T', static_cast(a_diag), size(x), base_a, stride( a ), base_x, stride(x));} - else {assert(0);} + else {assert(0);} }else{ - if(stride( a )==1) {assert(0);} //TODO fallback to trsm? + if (stride( a )==1) {assert(0);} // TODO(correaa) fallback to trsm? else if(stride(rotated(a))==1) {trsv(static_cast( a_nonzero_side ), 'C', static_cast(a_diag), size(x), base_a, stride( a ), base_x, stride(x));} - else {assert(0);} + else {assert(0);} } } return std::forward(x); diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex.hpp new file mode 100644 index 0000000000..af5cf121cc --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex.hpp @@ -0,0 +1,161 @@ +// Copyright 2023-2024 Alfredo A. Correa + +#ifndef BOOST_MULTI_ADAPTORS_COMPLEX_HPP +#define BOOST_MULTI_ADAPTORS_COMPLEX_HPP +#pragma once + +#include // to define its traits + +namespace boost::multi { + +template struct [[nodiscard]] complex; + +template struct [[nodiscard]] imaginary; + +template +constexpr auto operator+(U real, imaginary imag) -> complex; + +template +struct [[nodiscard]] imaginary { + T _value; // NOLINT(misc-non-private-member-variables-in-classes) I want the class to be an aggregate + + using value_type = T; + + // constexpr explicit imaginary(T value) : value_{value} {} + template + friend constexpr auto operator+(U real, imaginary imag) -> complex; + // constexpr static imaginary i{T{1}}; // NOLINT(clang-diagnostic-error) "constexpr variable cannot have non-literal type"? + friend constexpr auto operator*(T real, imaginary imag) { + return imaginary{real * imag._value}; + } + [[nodiscard]] constexpr auto operator*(imaginary other) const { return -_value * other._value; } + [[nodiscard]] constexpr auto operator/(imaginary other) const { return _value / other._value; } + [[nodiscard]] constexpr auto operator+(imaginary other) const { return imaginary{_value + other._value}; } + [[nodiscard]] constexpr auto operator-(imaginary other) const { return imaginary{_value + other._value}; } + + [[nodiscard]] constexpr auto operator==(imaginary const& other) const { return _value == other._value; }; + [[nodiscard]] constexpr auto operator!=(imaginary const& other) const { return _value != other._value; }; +}; + +template<> +struct [[nodiscard]] imaginary { + template + friend constexpr auto operator*(T real, imaginary /*self*/) { return imaginary{real}; } + template + [[nodiscard]] constexpr auto operator*(imaginary other) const { return -other._value; } + template + [[nodiscard]] constexpr auto operator/(imaginary other) const { return T{1} / other._value; } +}; + +constexpr imaginary I{}; // NOLINT(readability-identifier-length) imaginary unit + +namespace literals { +// constexpr imaginary operator""_i(unsigned long long d) { +// return imaginary{static_cast(d)}; +// } + +constexpr auto operator"" _i(long double value) { return imaginary{static_cast(value)}; } +// constexpr auto operator"" i(long double value) {return imaginary{static_cast(value)};} +constexpr auto operator"" _I(long double value) { return imaginary{static_cast(value)}; } + +// constexpr auto operator"" f_i(long double value) {return imaginary{static_cast(value)};} +constexpr auto operator""_f_i(long double value) { return imaginary{static_cast(value)}; } +constexpr auto operator"" _if(long double value) { return imaginary{static_cast(value)}; } +constexpr auto operator""_F_I(long double value) { return imaginary{static_cast(value)}; } +constexpr auto operator"" _IF(long double value) { return imaginary{static_cast(value)}; } + +// template +// constexpr auto operator""_FI() noexcept {} + +} // namespace literals + +template +struct [[nodiscard]] complex { + using real_type = T; + +// using value_type /*[[deprecated("reason")]]*/ = T; + + real_type _real; // NOLINT(misc-non-private-member-variables-in-classes) complex should be an aggregate class + real_type _imag; // NOLINT(misc-non-private-member-variables-in-classes) complex should be an aggregate class + + template + friend constexpr auto operator+(U real, imaginary imag) -> complex; + + friend constexpr auto operator*(real_type scale, complex self) { return complex{scale * self._real, scale * self._imag}; } + friend constexpr auto operator/(complex self, real_type scale) { return complex{self._real / scale, self._imag / scale}; } + + friend constexpr auto operator+(T real, complex self) { return complex{real + self._real, self._imag}; } + friend constexpr auto operator-(T real, complex self) { return complex{real - self._real, self._imag}; } + + [[nodiscard]] constexpr auto real() const -> real_type { return _real; } + [[nodiscard]] constexpr auto imag() const -> real_type { return _imag; } + + friend constexpr auto conj(complex self) { return complex{self._real, -self._imag}; } + + constexpr auto operator==(complex const& other) const {return _real == other._real && _imag == other._imag;} + constexpr auto operator!=(complex const& other) const {return _real != other._real || _imag != other._imag;} + +// auto operator=(complex const&) -> complex& = default; + constexpr auto operator=(real_type re) -> complex& {(*this) = complex{re, real_type{0.0}}; return *this;} + + friend constexpr auto operator-(complex self) {return complex{-self._real, -self._imag};} + friend constexpr auto operator+(complex self) {return complex{+self._real, +self._imag};} + + friend constexpr auto operator+(complex z1, complex z2) {return complex{z1._real + z2._real, z1._imag + z2._imag};} + friend constexpr auto operator-(complex z1, complex z2) {return complex{z1._real - z2._real, z1._imag - z2._imag};} + + constexpr auto operator+=(complex other) -> complex& {_real += other._real; _imag += other._imag; return *this;} + constexpr auto operator-=(complex other) -> complex& {_real -= other._real; _imag -= other._imag; return *this;} + + friend constexpr auto operator*(complex z1, complex z2) { + return complex{z1._real * z2._real - z1._imag * z2._imag, z1._real * z2._imag + z1._imag * z2._real}; + } + friend constexpr auto operator/(complex z1, complex z2) { + auto const nrm = norm(z2); + return complex{ + (z1._real * z2._real + z1._imag * z2._imag)/nrm, + (z1._imag * z2._real - z1._real * z2._imag)/nrm + }; + + // typedef typename detail::promoted_numerical_type::type T; + + // // Find `abs` by ADL. + // using std::abs; + + // T s = abs(y.real()) + abs(y.imag()); + + // T oos = T(1.0) / s; + + // T ars = x.real() * oos; + // T ais = x.imag() * oos; + // T brs = y.real() * oos; + // T bis = y.imag() * oos; + + // s = (brs * brs) + (bis * bis); + + // oos = T(1.0) / s; + + // complex quot( ((ars * brs) + (ais * bis)) * oos + // , ((ais * brs) - (ars * bis)) * oos); + // return quot; + } + friend constexpr auto norm(complex self) { + return self._real*self._real + self._imag*self._imag; // TODO(correaa) revise this, use more exact formula + } + friend constexpr auto abs(complex self) { + // return hypot(z.real(), z.imag()); + using std::sqrt; + return sqrt(self._real*self._real + self._real*self._real); // bad! according to NR + // using std::abs; + // return self._real > self._imag? + // abs(self._real)*sqrt(real_type{1} + (self._imag/self._real)*(self._imag/self._real)) + // :abs(self._imag)*sqrt(real_type{1} + (self._real/self._imag)*(self._real/self._imag)) + // ; + } +}; + +template +constexpr auto operator+(U real, imaginary imag) -> complex { return {real, imag._value}; } + +} // end namespace boost::multi +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/CMakeLists.txt new file mode 100644 index 0000000000..b257d0ce89 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/CMakeLists.txt @@ -0,0 +1,6 @@ +# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- +cmake_minimum_required(VERSION 3.13) # for reference Ubuntu 20.04 uses 3.16, 3.18 for BLAS::BLAS + +include_directories(${CMAKE_BINARY_DIR}) + +add_subdirectory(test) diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/adl.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/adl.hpp new file mode 100644 index 0000000000..ef2e85bbfd --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/adl.hpp @@ -0,0 +1,171 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +// TODO(correaa) move this header to blas/numeric + +#ifndef BOOST_MULTI_ADAPTORS_COMPLEX_ADL_HPP +#define BOOST_MULTI_ADAPTORS_COMPLEX_ADL_HPP +#pragma once + +// #include + +// #include "detail/fix_complex_traits.hpp" + +#include +#include // for forward + +#define BOOST_MULTI_DECLRETURN(ExpR) -> decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +#define BOOST_MULTI_JUSTRETURN(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing + +namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace multi { + +inline constexpr class /*adl_conj_t*/ { + template constexpr auto _(priority<1> /**/, As&&... args) const BOOST_MULTI_JUSTRETURN(std::conj(std::forward(args)...)) template constexpr auto _(priority<2> /**/, As&&... args) const BOOST_MULTI_DECLRETURN(conj(std::forward(args)...)) template constexpr auto _(priority<3> /**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).conj(std::forward(args)...)) + + public : template + constexpr auto + operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<3>{}, std::forward(args)...)) +} adl_conj; + +inline constexpr class /*adl_real_t*/ { + template constexpr auto _(priority<1> /**/, As&&... args) const BOOST_MULTI_DECLRETURN(std::real(std::forward(args)...)) template constexpr auto _(priority<2> /**/, As&&... args) const BOOST_MULTI_DECLRETURN(real(std::forward(args)...)) template constexpr auto _(priority<3> /**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).real(std::forward(args)...)) + + public : template + constexpr auto + operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<3>{}, std::forward(args)...)) +} adl_real; + +inline constexpr class /*adl_imag_t*/ { + template constexpr auto _(priority<1> /**/, As&&... args) const BOOST_MULTI_DECLRETURN(std::imag(std::forward(args)...)) template constexpr auto _(priority<2> /**/, As&&... args) const BOOST_MULTI_DECLRETURN(imag(std::forward(args)...)) template constexpr auto _(priority<3> /**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).imag(std::forward(args)...)) + + public : template + constexpr auto + operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<3>{}, std::forward(args)...)) +} adl_imag; + +struct real_t; +struct imag_t; + +template +struct _complex { // NOLINT(readability-identifier-naming) deprecating this + using value_type = ValueType; + + private: + value_type re_; + value_type im_; + + public: + _complex() = default; + + constexpr explicit _complex(value_type real) : re_{real}, im_{value_type{0}} {} + constexpr _complex(value_type real, value_type imag) // NOLINT(bugprone-easily-swappable-parameters) + : re_{real}, im_{imag} {} + + constexpr explicit _complex(std::complex const& other) : re_{other.real()}, im_{other.imag()} {} + + template< + class T, + std::enable_if_t< + sizeof(T) == 2 * sizeof(value_type) && + std::is_assignable().real())>{} && std::is_assignable().imag())>{}, + int> = 0> + constexpr explicit operator T const&() const& { + return reinterpret_cast(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + template< + class T, + std::enable_if_t< + sizeof(T) == 2 * sizeof(value_type) && + std::is_assignable().real())>{} && + std::is_assignable().imag())>{}, + int> = 0> + constexpr explicit operator T&() & { return reinterpret_cast(*this); } // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + + constexpr auto std() const& -> std::complex const& { + return reinterpret_cast const&>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + constexpr auto std() & -> std::complex& { + return reinterpret_cast&>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + + friend constexpr auto abs(_complex const& self) { return abs(self.std()); } + friend constexpr auto operator-(_complex const& self, _complex const& other) + -> _complex { return self.std() - other.std(); } + + constexpr auto real() & -> value_type& { return re_; } + constexpr auto real() const& -> value_type const& { return re_; } + + constexpr auto imag() & -> value_type& { return im_; } + constexpr auto imag() const& -> value_type const& { return im_; } + + template constexpr auto operator+=(Real const& other) & -> decltype(re_ += other, *this) { return re_ += other, *this; } + template constexpr auto operator-=(Real const& other) & -> decltype(re_ -= other, *this) { return re_ -= other, *this; } + template constexpr auto operator*=(Real const& other) & -> decltype(re_ *= other, im_ *= other, *this) { return re_ *= other, im_ *= other, *this; } + template constexpr auto operator/=(Real const& other) & -> decltype(re_ /= other, im_ /= other, *this) { return re_ /= other, im_ /= other, *this; } + + template constexpr auto operator+=(Complex const& other) & -> decltype(re_ += other.re, im_ += other.im, *this) { return re_ += other.re, im_ += other.im, *this; } + template constexpr auto operator-=(Complex const& other) & -> decltype(re_ -= other.re, im_ -= other.im, *this) { return re_ -= other.re, im_ -= other.im, *this; } +}; + +struct real_t { + template::element, typename ValueType = typename E::value_type> + constexpr auto operator()(Array&& array) const + -> decltype(std::forward(array).template reinterpret_array_cast<_complex>().template member_cast(&_complex::real)) { + return std::forward(array).template reinterpret_array_cast<_complex>().template member_cast(&_complex::real); + } + template::value_type, + std::enable_if_t< + sizeof(T) == 2 * sizeof(ValueType) && + std::is_assignable()))>{} && + std::is_assignable()))>{}, + int> = 0> + constexpr auto operator()(T& value) const -> ValueType& { return reinterpret_cast&>(value).real; } // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[0] + template::value_type, + std::enable_if_t< + sizeof(T) == 2 * sizeof(ValueType) && + std::is_assignable()))>{} && + std::is_assignable()))>{}, + int> = 0> + auto operator()(T const& value) const -> ValueType const& { + return reinterpret_cast const&>(value).real; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[0] + } +}; + +struct imag_t { + template::element, typename ValueType = typename E::value_type> + constexpr auto operator()(Array&& array) const + -> decltype(std::forward(array).template reinterpret_array_cast<_complex>().template member_cast(&_complex::imag)) { + return std::forward(array).template reinterpret_array_cast<_complex>().template member_cast(&_complex::imag); + } + template::value_type, + std::enable_if_t< + sizeof(T) == 2 * sizeof(ValueType) && + std::is_assignable()))>{} && + std::is_assignable()))>{}, + int> = 0> + constexpr auto operator()(T& value) const -> ValueType& { + return reinterpret_cast&>(value).imag; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[1] + } + template::value_type, + std::enable_if_t< + sizeof(T) == 2 * sizeof(ValueType) && + std::is_assignable()))>{} && + std::is_assignable()))>{}, + int> = 0> + constexpr auto operator()(T const& value) const -> ValueType const& { + return reinterpret_cast const&>(value).imag; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[1] + } +}; + +//[[maybe_unused]] static constexpr real_t real; +//[[maybe_unused]] static constexpr imag_t imag; + +} // end namespace multi +} // end namespace boost + +#undef BOOST_MULTI_DECLRETURN +#undef BOOST_MULTI_JUSTRETURN + +#endif // BOOST_MULTI_ADAPTORS_COMPLEX_ADL_HPP diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/CMakeLists.txt similarity index 54% rename from external_codes/boost_multi/multi/include/multi/adaptors/blas/test/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/CMakeLists.txt index 0da34d9eb1..d17b35fd89 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/CMakeLists.txt @@ -1,56 +1,32 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- -#[=[Multi Test suite can be run like this: - mkdir -p build - cd build - cmake .. [-DENABLE_CUDA=1] - make -j - ctest -j --output-on-error [-T memcheck] - exit -#]=] -cmake_minimum_required(VERSION 3.11) +cmake_minimum_required(VERSION 3.16) -set(CMAKE_VERBOSE_MAKEFILE ON) - -#project( -# boost-multi-adaptors-blas-test -# VERSION 0.1 -# LANGUAGES CXX -#) - -set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -if((NOT - CMAKE_CXX_COMPILER_ID - STREQUAL - "PGI" - ) - AND (NOT - DART_COMPILER_NAME - STREQUAL - "nvcc" - ) -) - find_package(Boost REQUIRED COMPONENTS unit_test_framework) - link_libraries("-lboost_unit_test_framework") - - find_package(BLAS REQUIRED) - find_path( - BLAS_INCLUDE_DIRS - cblas.h - /usr/include - /usr/local/include - $ENV{BLAS_HOME}/include - ) -# include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) - link_libraries(${BLAS_LIBRARIES}) -else() - find_package(Boost REQUIRED) # cmake cannot detect this component with pgi compiler - link_libraries("-lboost_unit_test_framework") - - link_libraries("-lblas") # cmake cannot detect BLAS with pgi/nvc++ but it ships with its own version -endif() +# if((NOT +# CMAKE_CXX_COMPILER_ID +# STREQUAL +# "PGI" +# ) +# AND (NOT +# DART_COMPILER_NAME +# STREQUAL +# "nvcc" +# ) +# AND (NOT +# DART_COMPILER_NAME +# STREQUAL +# "icpc" +# ) +# ) +# find_package(Boost REQUIRED COMPONENTS unit_test_framework) +# link_libraries("-lboost_unit_test_framework") +# else() +# find_package(Boost REQUIRED) # cmake cannot detect this component with pgi compiler +# link_libraries("-lboost_unit_test_framework") + +# link_libraries("-lblas") # cmake cannot detect BLAS with pgi/nvc++ but it ships with its own version +# endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "PGI") add_definitions(-DRETURN_BY_STACK) @@ -58,54 +34,19 @@ endif() if(ENABLE_CUDA OR DEFINED CXXCUDA) enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") -# find_package(CUDA QUIET) -# if(CUDA_FOUND) -# message("CUDA found") -# include_directories(${CUDA_INCLUDE_DIRS}) -# else() -# message("CUDA not found") -# endif() - -# include(FindCUDA/select_compute_arch) -# cuda_detect_installed_gpus(INSTALLED_GPU_CCS_1) -# string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2) -# string( -# REPLACE " " -# ";" -# INSTALLED_GPU_CCS_3 -# "${INSTALLED_GPU_CCS_2}" -# ) -# string( -# REPLACE "." -# "" -# CUDA_ARCH_LIST -# "${INSTALLED_GPU_CCS_3}" -# ) -# set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() endif() enable_testing() -list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17 include(CTest) -configure_file("config.hpp.in" ${CMAKE_BINARY_DIR}/config.hpp) - include_directories(${CMAKE_BINARY_DIR}) # file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) set(TEST_SRCS - axpy.cpp - copy.cpp - dot.cpp - herk.cpp - gemv.cpp - gemm.cpp - numeric.cpp - scal.cpp - traits.cpp - trsm.cpp + complex.cpp ) foreach(TEST_FILE ${TEST_SRCS}) @@ -116,24 +57,28 @@ foreach(TEST_FILE ${TEST_SRCS}) #set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") target_compile_options(${TEST_EXE} PRIVATE -std=c++17) endif() - # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) - target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") - target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) - target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + target_link_libraries(${TEST_EXE} PRIVATE multi) - target_link_libraries(${TEST_EXE} PRIVATE multi) target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) - target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + + # target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_DYN_LINK=1) + target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_MODULE="C++ Unit Tests for Multi complex") + + if(NOT ENABLE_CUDA AND (NOT DART_COMPILER_NAME STREQUAL "nvcc" ) + AND (NOT DEFINED ENABLE_CIRCLE) ) target_compile_options( ${TEST_EXE} @@ -147,7 +92,9 @@ foreach(TEST_FILE ${TEST_SRCS}) $<$,$>: -Wextra -Wpedantic - -Wmove> + -Wmove + -Wno-error=\#warnings + > $<$: -Wextra -wd161 @@ -180,6 +127,9 @@ foreach(TEST_FILE ${TEST_SRCS}) $<$: /W4> ) + else() + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_TEST_DYN_LINK=1") + # target_compile_options (${TEST_EXE} PRIVATE -Werror -Wall) endif() add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) endforeach() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/complex.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/complex.cpp new file mode 100644 index 0000000000..5154a110bd --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/complex/test/complex.cpp @@ -0,0 +1,131 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +#include "../../complex.hpp" +#include + +#include + +namespace multi = boost::multi; + +using float_types = boost::mpl::list; + +BOOST_AUTO_TEST_CASE_TEMPLATE(complex_ctors, T, float_types) { + { + multi::complex const zeta = T{1.0} + multi::imaginary{T{2.0}}; + BOOST_REQUIRE( zeta.real() == T{1.0}); + BOOST_REQUIRE( zeta.imag() == T{2.0}); + } + // { + // multi::complex zeta = T{1.0} + T{2.0} * multi::imaginary::i; + // BOOST_REQUIRE( zeta.real() == T{1.0}); + // BOOST_REQUIRE( zeta.imag() == T{2.0}); + // } + // { + // multi::complex zeta = T{1.0} + multi::imaginary{T{2.0}}; + // BOOST_REQUIRE( zeta.real() == T{1.0}); + // BOOST_REQUIRE( zeta.imag() == T{2.0}); + // } +} + +BOOST_AUTO_TEST_CASE(double_complex_literals) { + using multi::literals::operator""_I; + multi::complex const zeta = 1.0 + 2.0_I; + // multi::complex zeta = 1.0 + 2.0i; // literal i is not standard + + BOOST_REQUIRE( zeta.real() == 1.0 ); + BOOST_REQUIRE( zeta.imag() == 2.0 ); +} + +BOOST_AUTO_TEST_CASE(imaginary_equal) { + using multi::literals::operator""_I; + multi::imaginary const zeta = 2.0_I; + + BOOST_REQUIRE( zeta == multi::imaginary{2.0} ); +} + +BOOST_AUTO_TEST_CASE(imaginary_assign) { + using multi::literals::operator""_I; + multi::imaginary zeta; // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) + zeta = 2.0_I; + + BOOST_REQUIRE( zeta == multi::imaginary{2.0} ); +} + +BOOST_AUTO_TEST_CASE(float_complex_literals) { + using multi::literals::operator""_IF; + // multi::complex const zeta = 1.0f + 2.0 _i; // may induced an undesired or forbidden conversion + // multi::complex const zeta = 1.0f + 2.0 f_i; // literal f_i is not standard + // multi::complex const zeta = 1.0f + 2.0_f_i; + multi::complex const zeta = 1.0F + 2.0_IF; + + BOOST_REQUIRE( zeta.real() == 1.0F ); + BOOST_REQUIRE( zeta.imag() == 2.0F ); +} + +BOOST_AUTO_TEST_CASE(float_complex_assignment) { + using multi::literals::operator""_IF; + multi::complex zeta; // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) + + zeta = 1.0F + 2.0_IF; + BOOST_REQUIRE( zeta.real() == 1.0F ); + BOOST_REQUIRE( zeta.imag() == 2.0F ); + + zeta = 1.0F; + BOOST_REQUIRE( zeta.real() == 1.0F ); + BOOST_REQUIRE( zeta.imag() == 0.0F ); +} + +BOOST_AUTO_TEST_CASE(float_complex_aggregate) { + static_assert(std::is_aggregate_v>); + + // auto const c = multi::complex{._real = 1.0, ._imag = 2.0}; + + // BOOST_REQUIRE( real(zeta) == 1.0F ); + // BOOST_REQUIRE( imag(zeta) == 2.0F ); +} + +BOOST_AUTO_TEST_CASE(double_complex_abs) { + using multi::literals::operator""_I; + multi::complex const zeta = 1.0 + 2.0_I; + + BOOST_REQUIRE( abs(zeta) <= std::max(zeta.real(), zeta.imag()) ); +} + +BOOST_AUTO_TEST_CASE(double_complex_plus_eq) { + using multi::literals::operator""_I; + multi::complex zeta = 1.0 + 2.0_I; + multi::complex const yeta = 1.0 + 2.0_I; + + zeta += yeta; + + BOOST_REQUIRE( zeta == 2.0 * yeta ); + BOOST_REQUIRE( zeta == yeta / 0.5 ); +} + +// BOOST_AUTO_TEST_CASE(complex_member_cast) { +// multi::array, 2> A = { +// { {1., 2.}, {3., 4.}}, +// {{22., 33.}, {5., 9.}}, +// }; + +// { +// auto&& Areal = A.member_cast(&multi::complex::re); +// auto&& Aimag = A.member_cast(&multi::complex::im); + +// BOOST_REQUIRE(Areal[1][0] == 22.); +// BOOST_REQUIRE(Aimag[1][0] == 33.); +// } +// { +// auto&& Areal = A.member_cast(&multi::complex::re); +// auto&& Aimag = A.member_cast(&multi::complex::im); + +// BOOST_REQUIRE(Areal[1][0] == 22.); +// BOOST_REQUIRE(Aimag[1][0] == 33.1); +// } +// } diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/.gitignore b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/.gitignore similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/.gitignore rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/.gitignore diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/cuda.iml b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/cuda.iml similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/cuda.iml rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/cuda.iml diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/modules.xml b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/modules.xml similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/modules.xml rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/modules.xml diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/vcs.xml b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/vcs.xml similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/.idea/vcs.xml rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/.idea/vcs.xml diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/CMakeLists.txt similarity index 59% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/CMakeLists.txt index 7df8618925..f4d6090a5c 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/CMakeLists.txt @@ -1,8 +1,8 @@ cmake_minimum_required(VERSION 3.11) #project( -# boost-multi-adaptor-cuda -# VERSION 0.1 -# LANGUAGES CXX +# boost-multi-adaptor-cuda +# VERSION 0.1 +# LANGUAGES CXX #) set(CMAKE_VERBOSE_MAKEFILE ON) @@ -13,9 +13,16 @@ set(CMAKE_CXX_EXTENSIONS OFF) find_package(Boost REQUIRED COMPONENTS unit_test_framework) +if(ENABLE_HIP) + enable_language(HIP) # may need `module load rocm` + enable_testing() + + add_subdirectory(cublas/test) +endif() + if(ENABLE_CUDA OR DEFINED CXXCUDA) enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 --extended-lambda --expt-relaxed-constexpr") + # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 --extended-lambda --expt-relaxed-constexpr") enable_testing() list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.1 diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/algorithms/copy.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/algorithms/copy.hpp similarity index 91% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/algorithms/copy.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/algorithms/copy.hpp index e4e3cfff42..75908ab645 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/algorithms/copy.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/algorithms/copy.hpp @@ -4,22 +4,19 @@ clang++ -D_TEST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY -x cuda --cuda-gpu-arch=sm_6 rm $0x; exit #endif -#ifndef MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP -#define MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP +#ifndef BOOST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP +#define BOOST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP #include -//#include #include "../../../adaptors/cuda.hpp" //#include "../algorithms/for_each.hpp" -//#include "/home/correaa/prj/alf/boost/iterator/zipper.hpp" - -#ifndef HD +#ifndef BOOST_MULTI_HD #if defined(__CUDACC__) -#define HD __host__ __device__ +#define BOOST_MULTI_HD __host__ __device__ #else -#define HD +#define BOOST_MULTI_HD #endif #endif @@ -67,7 +64,6 @@ array_iterator copy( }} } - #ifdef _TEST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY #define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA copy" #define BOOST_TEST_DYN_LINK @@ -102,7 +98,7 @@ BOOST_AUTO_TEST_CASE(copy_by_iterator){ multi::cuda::array B(extensions(A)); B() = A(); -// BOOST_REQUIRE( A[13] == B[13] ); +// BOOST_REQUIRE( A[13] == B[13] ); } BOOST_AUTO_TEST_CASE(copy_by_pointer){ @@ -115,7 +111,7 @@ BOOST_AUTO_TEST_CASE(copy_by_pointer){ multi::cuda::array B(extensions(A)); B = A; -// BOOST_REQUIRE( A[13] == B[13] ); +// BOOST_REQUIRE( A[13] == B[13] ); } @@ -164,7 +160,7 @@ BOOST_AUTO_TEST_CASE(cuda_copy){ BOOST_REQUIRE( CUDA_SLOW( B[10] == 99. ) ); #endif -/* multi::cuda::for_each_n( +/* multi::cuda::for_each_n( boost::iterators::zip(begin(A), begin(B)), size(A), []__device__(auto&& e){ @@ -173,14 +169,14 @@ BOOST_AUTO_TEST_CASE(cuda_copy){ } );*/ -// auto l = -// BOOST_REQUIRE( l == end(B) ); -// std::cout << B[8] << std::endl; -// multi::cuda::array A(10, 99.); -// BOOST_REQUIRE( CUDA_SLOW( A[5] == 99. ) ); -// int uno = 1.; -// for_each(begin(A), end(A), [uno]__device__(auto&& e){e = uno;}); -// BOOST_REQUIRE( CUDA_SLOW( A[5] == 1. ) ); +// auto l = +// BOOST_REQUIRE( l == end(B) ); +// std::cout << B[8] << std::endl; +// multi::cuda::array A(10, 99.); +// BOOST_REQUIRE( CUDA_SLOW( A[5] == 99. ) ); +// int uno = 1.; +// for_each(begin(A), end(A), [uno]__device__(auto&& e){e = uno;}); +// BOOST_REQUIRE( CUDA_SLOW( A[5] == 1. ) ); } #if 0 @@ -231,5 +227,7 @@ BOOST_AUTO_TEST_CASE(cuda_timing){ #endif #endif -#endif +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_ADAPTORS_CUDA_ALGORITHMS_COPY_HPP diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas.hpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/call.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/call.hpp new file mode 100644 index 0000000000..fb50aaa38d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/call.hpp @@ -0,0 +1,39 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP +#define BOOST_MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP +#pragma once + +#include "../cublas/error.hpp" + +#if not defined(MULTI_USE_HIP) +#include // cudaDeviceSynchronize +#else +#include // cudaDeviceSynchronize +#endif + +#if not defined(MULTI_USE_HIP) +#define hicup(name) cuda##name +#define HICUP(name) CU##name +#else +#define hicup(name) hip##name +#define HICUP(name) HIP##name +#endif + +namespace boost::multi::cuda::cublas{ + +template // needs C++17 +void call(Args... args){ + auto e = static_cast(Function(args...)); + if(e != cublas::error::success) { throw std::system_error{e, "cannot call function "+ std::string{__PRETTY_FUNCTION__}}; } +} + +#define CUBLAS_(F) call + +} + +#undef hicup +#undef HICUP +#endif // BOOST_MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/context.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/context.hpp new file mode 100644 index 0000000000..2ad055f783 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/context.hpp @@ -0,0 +1,380 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#pragma once + +// #include +#include + +#include +#include + +#if not defined(MULTI_USE_HIP) +#include // for thrust::cuda::pointer +#else +#include // for thrust::cuda::pointer +#include +#endif + +#if not defined(MULTI_USE_HIP) +#define hicup(name) cuda##name +#define hicu(name) cu##name +#define HICU(name) CU##name +#else +#define hicup(name) hip##name +#define hicu(name) hip##name +#define HICU(name) HIP##name +#endif + +namespace boost { +namespace multi::cuda::cublas { + +class operation { + hicu(blasOperation_t) impl_; + + public: + explicit operation(char trans) : impl_{[=]{ + switch(trans) { + case 'N': return HICU(BLAS_OP_N); + case 'T': return HICU(BLAS_OP_T); + case 'C': return HICU(BLAS_OP_C); + default : assert(0); + } + return hicu(blasOperation_t){}; + }()} {} + operator hicu(blasOperation_t)() const{return impl_;} +}; + +class side { + hicu(blasSideMode_t) impl_; + + public: + explicit side(char trans) : impl_{[=] { + switch(trans) { + case 'L': return HICU(BLAS_SIDE_LEFT); + case 'R': return HICU(BLAS_SIDE_RIGHT); + default: assert(0); + + } + assert(0); return hicu(blasSideMode_t){}; + }()} {} + operator hicu(blasSideMode_t)() const {return impl_;} +}; + +class filling { + hicu(blasFillMode_t) impl_; + + public: + explicit filling(char trans) : impl_{[=] { + switch(trans) { + case 'L': return HICU(BLAS_FILL_MODE_LOWER); + case 'U': return HICU(BLAS_FILL_MODE_UPPER); + } + assert(0); return hicu(blasFillMode_t){}; + }()} {} + operator hicu(blasFillMode_t)() const {return impl_;} +}; + +class diagonal { + hicu(blasDiagType_t) impl_; + + public: + explicit diagonal(char trans) : impl_{[=] { + switch(trans) { + case 'N': return HICU(BLAS_DIAG_NON_UNIT); + case 'U': return HICU(BLAS_DIAG_UNIT); + } + assert(0); return hicu(blasDiagType_t){}; + }()} {} + operator hicu(blasDiagType_t)() const {return impl_;} +}; + +using blas::is_s; +using blas::is_d; +using blas::is_c; +using blas::is_z; + +using std::is_assignable; +using std::is_assignable_v; +using std::is_convertible_v; + +// enum class type {S, D, C, Z}; + +// template +// constexpr auto type_of(T const& = {}) -> cublas::type { +// static_assert(is_s{} || is_d{} || is_c{} || is_z{}); +// if(is_s{}) {return type::S;} +// else if(is_d{}) {return type::D;} +// else if(is_c{}) {return type::C;} +// else if(is_z{}) {return type::Z;} +// } + +#if defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + using Complex = hipblasComplex; + using DoubleComplex = hipblasDoubleComplex; +#else // __CUDA__ __NVCC__ or clang cuda + using Complex = cuComplex; + using DoubleComplex = cuDoubleComplex; +#endif + +template{}, int> =0> constexpr auto data_cast(T * p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T * p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T * p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T * p) {return reinterpret_cast(p);} + +template{}, int> =0> constexpr auto data_cast(T const* p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T const* p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T const* p) {return reinterpret_cast(p);} +template{}, int> =0> constexpr auto data_cast(T const* p) {return reinterpret_cast(p);} + +class context : private std::unique_ptr::element_type, decltype(&hicu(blasDestroy))> { + using pimpl_t = std::unique_ptr::element_type, decltype(&hicu(blasDestroy))>; + hicup(Stream_t) stream() const {hicup(Stream_t) streamId; cuda::cublas::call(this->get(), &streamId); return streamId;} + template + void sync_call(Args... args) const { + call(const_cast(this)->get(), args...); + this->synchronize(); + } + template + void sync_call(Args... args) { + call(this->get(), args...); + this->synchronize(); + } + + public: + using pimpl_t::get; + static context& get_instance() { + thread_local context ctxt; + return ctxt; + }; + context() : pimpl_t{[] {hicu(blasHandle_t) h; hicu(blasCreate)(&h); return h;}(), &hicu(blasDestroy)} {} + using ssize_t = int; + // static int version() {int ret; cuda::cublas::call(nullptr, &ret); return ret;} // no hipblasGetVersion available + void synchronize() const { + // cudaError_t e = cudaDeviceSynchronize(); + auto s = stream(); + if(s != 0) {throw std::logic_error("CUBLAS stream expected to be zero");} + hicup(Error_t) e = hicup(StreamSynchronize)(s); + if(e != hicup(Success)) {throw std::runtime_error{"cannot synchronize stream in cublas context"};} + } + + template< + class XP, class X = typename std::pointer_traits::element_type, + class YP, class Y = typename std::pointer_traits::element_type, + class = decltype(std::swap(std::declval(), std::declval())), + std::enable_if_t>, int> = 0 + > + void swap(ssize_t n, XP x, ssize_t incx, YP y, ssize_t incy) const { + if(is_s{}) {sync_call(n, (float *)raw_pointer_cast(x), incx, (float *)raw_pointer_cast(y), incy);} + if(is_d{}) {sync_call(n, (double *)raw_pointer_cast(x), incx, (double *)raw_pointer_cast(y), incy);} + if(is_c{}) {sync_call(n, (Complex *)raw_pointer_cast(x), incx, (Complex *)raw_pointer_cast(y), incy);} + if(is_z{}) {sync_call(n, (DoubleComplex*)raw_pointer_cast(x), incx, (DoubleComplex*)raw_pointer_cast(y), incy);} + } + + template< + class XP, class X = typename std::pointer_traits::element_type, + class YP, class Y = typename std::pointer_traits::element_type, + class = decltype(std::declval() = std::declval()), + std::enable_if_t>, int> = 0 + > + void copy(ssize_t n, XP x, ssize_t incx, YP y, ssize_t incy) const { + if(is_s{}) {sync_call(n, (float const*)raw_pointer_cast(x), incx, (float *)raw_pointer_cast(y), incy);} + if(is_d{}) {sync_call(n, (double const*)raw_pointer_cast(x), incx, (double *)raw_pointer_cast(y), incy);} + if(is_c{}) {sync_call(n, (Complex const*)raw_pointer_cast(x), incx, (Complex *)raw_pointer_cast(y), incy);} + if(is_z{}) {sync_call(n, (DoubleComplex const*)raw_pointer_cast(x), incx, (DoubleComplex*)raw_pointer_cast(y), incy);} + } + + template::element_type, + class = decltype(std::declval() *= ALPHA{}), + std::enable_if_t>, int> = 0 + > + void scal(ssize_t n, ALPHA const& alpha, XP x, ssize_t incx) const { + if(is_s{}) {sync_call(n, (float const*)alpha, (float *)::thrust::raw_pointer_cast(x), incx);} + if(is_d{}) {sync_call(n, (double const*)alpha, (double *)::thrust::raw_pointer_cast(x), incx);} + if(is_c{}) {sync_call(n, (Complex const*)alpha, (Complex *)::thrust::raw_pointer_cast(x), incx);} + if(is_z{}) {sync_call(n, (DoubleComplex const*)alpha, (DoubleComplex*)::thrust::raw_pointer_cast(x), incx);} + } + + template::element_type, class YP, class Y = typename std::pointer_traits::element_type, + typename = decltype(std::declval() = ALPHA{}*X{} + Y{}), + std::enable_if_t> and std::is_convertible_v>, int> = 0 + > + void axpy(ssize_t n, ALPHA const* alpha, XP x, ssize_t incx, YP y, ssize_t incy) { + if(is_d{}) {sync_call(n, (double const*)alpha, (double const*)raw_pointer_cast(x), incx, (double *)raw_pointer_cast(y), incy);} + if(is_z{}) {sync_call(n, (DoubleComplex const*)alpha, (DoubleComplex const*)raw_pointer_cast(x), incx, (DoubleComplex*)raw_pointer_cast(y), incy);} + } + + template::element_type, class XXP, class XX = typename std::pointer_traits::element_type, class BETA, class YYP, class YY = typename std::pointer_traits::element_type, + typename = decltype(std::declval() = ALPHA{}*(AA{}*XX{} + AA{}*XX{})), + std::enable_if_t> and std::is_convertible_v> and std::is_convertible_v>, int> = 0 + > + auto gemv(char transA, ssize_t m, ssize_t n, ALPHA const* alpha, AAP aa, ssize_t lda, XXP xx, ssize_t incx, BETA const* beta, YYP yy, ssize_t incy) { + if(is_d{}) {sync_call(operation{transA}, m, n, (double const*)alpha, (double const*)::thrust::raw_pointer_cast(aa), lda, (double const*)::thrust::raw_pointer_cast(xx), incx, (double const*)beta, (double *)::thrust::raw_pointer_cast(yy), incy);} + if(is_z{}) {sync_call(operation{transA}, m, n, (DoubleComplex const*)alpha, (DoubleComplex const*)::thrust::raw_pointer_cast(aa), lda, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (DoubleComplex const*)beta, (DoubleComplex*)::thrust::raw_pointer_cast(yy), incy);} + } + + template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, class BETA, class CCP, class CC = typename std::pointer_traits::element_type, + typename = decltype(std::declval() = ALPHA{}*(AA{}*BB{} + AA{}*BB{})), + class = std::enable_if_t> and std::is_convertible_v> and std::is_convertible_v>> + > + void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { + /*MULTI_MARK_SCOPE("cublasXgemm");*/ + if(is_d{}) {sync_call(cuda::cublas::operation{transA}, cuda::cublas::operation{transB}, m, n, k, (double const*)alpha, (double const*)::thrust::raw_pointer_cast(aa), lda, (double const*)::thrust::raw_pointer_cast(bb), ldb, (double const*)beta, (double *)::thrust::raw_pointer_cast(cc), ldc);} + if(is_z{}) {sync_call(cuda::cublas::operation{transA}, cuda::cublas::operation{transB}, m, n, k, (DoubleComplex const*)alpha, (DoubleComplex const*)::thrust::raw_pointer_cast(aa), lda, (DoubleComplex const*)::thrust::raw_pointer_cast(bb), ldb, (DoubleComplex const*)beta, (DoubleComplex*)::thrust::raw_pointer_cast(cc), ldc);} + } + + template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, + std::enable_if_t< + is_z{} and is_z{} and is_assignable{} and is_assignable{} and + is_convertible_v> and is_convertible_v> + ,int> =0 + > + void trsm(char side, char ul, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb) { + sync_call(cuda::cublas::side{side}, cuda::cublas::filling{ul}, cuda::cublas::operation{transA}, cuda::cublas::diagonal{diag}, m, n, (DoubleComplex const*)&alpha, (DoubleComplex*)raw_pointer_cast(aa), lda, (DoubleComplex*)raw_pointer_cast(bb), ldb); + } + + template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, + std::enable_if_t< + is_d{} and is_d{} and is_assignable{} and is_assignable{} and + is_convertible_v> and is_convertible_v> + ,int> =0 + > + void trsm(char side, char ul, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb) { + sync_call( + cuda::cublas::side{side}, + cuda::cublas::filling{ul}, + cuda::cublas::operation{transA}, + cuda::cublas::diagonal{diag}, + m, n, (double const*)&alpha, (double const*)raw_pointer_cast(aa), lda, (double*)raw_pointer_cast(bb), ldb + ); + } + + template< + class XXP, class XX = typename std::pointer_traits::element_type, + class YYP, class YY = typename std::pointer_traits::element_type, + class RRP, class RR = typename std::pointer_traits::element_type, + std::enable_if_t< + is_d{} and is_d{} and is_d{} and is_assignable{} and + is_convertible_v> and is_convertible_v> + and is_convertible_v + , int> =0 + > + void dot(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { + hicu(blasPointerMode_t) mode; + auto s = hicu(blasGetPointerMode)(get(), &mode); assert( s == HICU(BLAS_STATUS_SUCCESS) ); + assert( mode == HICU(BLAS_POINTER_MODE_HOST) ); + sync_call(n, ::thrust::raw_pointer_cast(xx), incx, ::thrust::raw_pointer_cast(yy), incy, rr); + } + + template< + class XXP, class XX = typename std::pointer_traits::element_type, + class YYP, class YY = typename std::pointer_traits::element_type, + class RRP, class RR = typename std::pointer_traits::element_type, + std::enable_if_t< + is_z{} and is_z{} and is_z{} and is_assignable{} and + is_convertible_v> and is_convertible_v> + and (is_convertible_v> or is_convertible_v) + , int> =0 + > + void dotc(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { + hicu(blasPointerMode_t) mode; + auto s = hicu(blasGetPointerMode)(get(), &mode); assert( s == HICU(BLAS_STATUS_SUCCESS) ); + assert( mode == HICU(BLAS_POINTER_MODE_HOST) ); + // cublasSetPointerMode(get(), CUBLAS_POINTER_MODE_DEVICE); + if constexpr(is_convertible_v>) { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (DoubleComplex const*)::thrust::raw_pointer_cast(yy), incy, (DoubleComplex*)::thrust::raw_pointer_cast(rr) ); + } else { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (DoubleComplex const*)::thrust::raw_pointer_cast(yy), incy, (DoubleComplex*)rr); + } + } + + template< + class XXP, class XX = typename std::pointer_traits::element_type, + class RRP, class RR = typename std::pointer_traits::element_type, + std::enable_if_t< + is_z{} and is_d{} and is_assignable{} and + is_convertible_v> and (is_convertible_v> or is_convertible_v) + , int> =0 + > + void asum(int n, XXP xx, int incx, RRP rr) { + if(is_convertible_v>) {hicu(blasSetPointerMode)(get(), HICU(BLAS_POINTER_MODE_DEVICE));} + if constexpr(is_convertible_v>) { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (double*)::thrust::raw_pointer_cast(rr) ); + } else { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (double*) rr ); + } + if(is_convertible_v>) {hicu(blasSetPointerMode)(get(), HICU(BLAS_POINTER_MODE_HOST));} + } + + template< + class XXP, class XX = typename std::pointer_traits::element_type, + class RRP, class RR = typename std::pointer_traits::element_type, + std::enable_if_t< + is_z{} and is_d{} and is_assignable{} and + is_convertible_v> and (is_convertible_v> or is_convertible_v) + , int> =0 + > + void nrm2(int n, XXP xx, int incx, RRP rr) { + if(is_convertible_v>) {hicu(blasSetPointerMode)(get(), HICU(BLAS_POINTER_MODE_DEVICE));} + if constexpr(is_convertible_v>) { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (double*)::thrust::raw_pointer_cast(rr) ); + } else { + sync_call(n, (DoubleComplex const*)::thrust::raw_pointer_cast(xx), incx, (double*) rr ); + } + if(is_convertible_v>) {hicu(blasSetPointerMode)(get(), HICU(BLAS_POINTER_MODE_HOST));} + } + + template< + class XXP, class XX = typename std::pointer_traits::element_type, + class YYP, class YY = typename std::pointer_traits::element_type, + class RRP, class RR = typename std::pointer_traits::element_type, + std::enable_if_t< + is_z{} and is_z{} and is_z{} and is_assignable{} and + is_convertible_v> and is_convertible_v> + and (is_convertible_v> or is_convertible_v) + , int> =0 + > + void dotu(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { + hicu(blasPointerMode_t) mode; + auto s = hicu(blasGetPointerMode)(get(), &mode); assert( s == HICU(BLAS_STATUS_SUCCESS) ); + assert( mode == HICU(BLAS_POINTER_MODE_HOST) ); + // cublasSetPointerMode(get(), CUBLAS_POINTER_MODE_DEVICE); + if constexpr(is_convertible_v>) { + sync_call(n, reinterpret_cast(::thrust::raw_pointer_cast(xx)), incx, reinterpret_cast(::thrust::raw_pointer_cast(yy)), incy, reinterpret_cast(::thrust::raw_pointer_cast(rr)) ); + } else { + sync_call(n, reinterpret_cast(::thrust::raw_pointer_cast(xx)), incx, reinterpret_cast(::thrust::raw_pointer_cast(yy)), incy, reinterpret_cast(rr)); + } + // cublasSetPointerMode(get(), CUBLAS_POINTER_MODE_HOST); + } +}; + +} // end namespace multi::cuda::cublas +} // end namespace boost + +namespace boost::multi::blas { + + template<> struct is_context : std::true_type {}; + template<> struct is_context : std::true_type {}; + + template::element_type, std::enable_if_t>{}, int> =0> + boost::multi::cuda::cublas::context* default_context_of(Ptr const&) { + namespace multi = boost::multi; + return &multi::cuda::cublas::context::get_instance(); + } + + template + boost::multi::cuda::cublas::context* + #if defined(__HIPCC__) + default_context_of(::thrust::pointer const&) { + #else // __NVCC__ + default_context_of(::thrust::pointer const&) { + #endif + namespace multi = boost::multi; + return &multi::cuda::cublas::context::get_instance(); + } +} + +#undef hicup +#undef hicu +#undef HICU diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/error.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/error.hpp new file mode 100644 index 0000000000..8a97d989fa --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/error.hpp @@ -0,0 +1,82 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP +#define BOOST_MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP + +#if !defined(MULTI_USE_HIP) +#include // cublasStatus_t +#else +#include // cublasStatus_t +#endif + +#include +#include // std::error_category +#include // std::underlying_type + +#if !defined(MULTI_USE_HIP) +#define hicup(name) cuda##name +#define hicu(name) cu##name +#define HICU(name) CU##name +#define HICUP(name) CU##name +#else +#define hicup(name) hip##name +#define hicu(name) hip##name +#define HICU(name) HIP##name +#define HICUP(name) HIP##name +#endif + +namespace boost::multi::cuda::cublas{ + +enum class error : typename std::underlying_type::type{ + success = HICUP(BLAS_STATUS_SUCCESS), + not_initialized = HICUP(BLAS_STATUS_NOT_INITIALIZED), + allocation_failed = HICUP(BLAS_STATUS_ALLOC_FAILED), + invalid_value = HICUP(BLAS_STATUS_INVALID_VALUE), + architecture_mismatch = HICUP(BLAS_STATUS_ARCH_MISMATCH), + mapping_error = HICUP(BLAS_STATUS_MAPPING_ERROR), + execution_failed = HICUP(BLAS_STATUS_EXECUTION_FAILED), + internal_error = HICUP(BLAS_STATUS_INTERNAL_ERROR), + not_supported = HICUP(BLAS_STATUS_NOT_SUPPORTED), +// license_error = HICUP(BLAS_STATUS_LICENSE_ERROR), // not supported by hip +}; + +std::string inline error_string(enum cublas::error err){ //https://stackoverflow.com/questions/13041399/equivalent-of-cudageterrorstring-for-cublas + switch(err){ + case cublas::error::success : return "CUBLAS_STATUS_SUCCESS" ; + case cublas::error::not_initialized : return "CUBLAS_STATUS_NOT_INITIALIZED" ; + case cublas::error::allocation_failed : return "CUBLAS_STATUS_ALLOC_FAILED" ; + case cublas::error::invalid_value : return "CUBLAS_STATUS_INVALID_VALUE" ; + case cublas::error::architecture_mismatch: return "CUBLAS_STATUS_ARCH_MISMATCH" ; + case cublas::error::mapping_error : return "CUBLAS_STATUS_MAPPING_ERROR" ; + case cublas::error::execution_failed : return "CUBLAS_STATUS_EXECUTION_FAILED"; + case cublas::error::internal_error : return "CUBLAS_STATUS_INTERNAL_ERROR" ; + case cublas::error::not_supported : return "CUBLAS_STATUS_NOT_SUPPORTED" ; +// case cublas::error::license_error : return "CUBLAS_STATUS_LICENSE_ERROR" ; + } + return "cublas status "; +} + +struct error_category : std::error_category{ + char const* name() const noexcept override{return "cublas wrapper";} + std::string message(int err) const override{return error_string(static_cast(err));} + static error_category& instance(){static cublas::error_category instance; return instance;} +}; + +inline std::error_code make_error_code(cublas::error err) noexcept{ + return {int(err), cublas::error_category::instance()}; +} + +} + +namespace std { + template<> struct is_error_code_enum<::boost::multi::cuda::cublas::error> : true_type{}; +} + +#undef hicu +#undef hicup +#undef HICU +#undef HICUP + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/CMakeLists.txt new file mode 100644 index 0000000000..c66046fb99 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/CMakeLists.txt @@ -0,0 +1,125 @@ +cmake_minimum_required(VERSION 3.17) # for CUDAToolkit + +set(CMAKE_VERBOSE_MAKEFILE ON) + +find_package(Boost REQUIRED COMPONENTS unit_test_framework) + +if((NOT + CMAKE_CXX_COMPILER_ID + STREQUAL + "PGI" + ) + AND (NOT + CMAKE_CXX_COMPILER_ID + STREQUAL + "NVHPC" + ) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "nvcc" + ) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "icpc" + ) +) + # find_package(Boost REQUIRED COMPONENTS unit_test_framework) + # link_libraries("-lboost_unit_test_framework") + find_package(BLAS REQUIRED) + find_path( + BLAS_INCLUDE_DIRS + cblas.h + /usr/include + /usr/local/include + $ENV{BLAS_HOME}/include + ) +# include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) + link_libraries(${BLAS_LIBRARIES}) +else() + link_libraries("-lblas") # cmake cannot detect BLAS with pgi/nvc++ but it ships with its own version +endif() + +# include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) + +if(ENABLE_HIP) + enable_language(HIP) + find_package(hipBLAS REQUIRED) + find_package(rocthrust REQUIRED) +endif() + +if(ENABLE_CUDA OR DEFINED CXXCUDA) + enable_language(CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() + find_package(CUDAToolkit REQUIRED COMPONENTS cuBLAS) # requires cmake 3.17 +endif() + +enable_testing() +include(CTest) + +include_directories(${CMAKE_BINARY_DIR}) + +if(ENABLE_HIP) + add_executable(all.hip.x all.hip) + #set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE HIP) + target_link_libraries(all.hip.x PRIVATE multi roc::hipblas Boost::unit_test_framework) + target_compile_definitions(all.hip.x PRIVATE BOOST_TEST_DYN_LINK=1) + + add_test(NAME all.hip.x COMMAND ./all.hip.x) +endif() + +if(ENABLE_CUDA) +# find_package(CUDA REQUIRED) + +find_package(CUDAToolkit REQUIRED COMPONENTS cuBLAS) + +# file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) +set(TEST_SRCS + all.cu + axpy.cu + dot.cu + gemm.cu + gemv.cu + # herk.cu + scal.cu + trsm.cu +) + +foreach(TEST_FILE ${TEST_SRCS}) + if(ENABLE_CUDA OR DEFINED CXXCUDA) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + + # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") + target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) + + # target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) + target_include_directories(${TEST_EXE} PRIVATE ${CUDA_INCLUDE_DIRS}) + + #target_compile_options(${TEST_EXE} PRIVATE --expt-relaxed-constexpr) + + target_link_libraries(${TEST_EXE} PRIVATE Boost::unit_test_framework) + + target_link_libraries(${TEST_EXE} PRIVATE ${CUDA_LIBRARIES}) + target_link_libraries(${TEST_EXE} PRIVATE CUDA::cublas) + target_link_libraries(${TEST_EXE} PRIVATE multi Boost::unit_test_framework) + + # target_include_directories(${TEST_EXE} PRIVATE /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/math_libs/include) + + # if(NOT ENABLE_CUDA) target_compile_options (${TEST_EXE} PRIVATE $<$: -Werror -Wall -Wextra -fno-common -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion + # $<$,$>: + # -Werror -Wall -Wextra -fno-common -Wpedantic -Wmove> $<$: -Werror -Wall -Wextra -fno-common -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized + # -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings + # -Werror -diag-error:3846 > $<$: /W4>) endif() + + set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + # target_compile_options(${TEST_EXE} PRIVATE -extended-lambda) + + add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) + endif() +endforeach() +endif() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.cu new file mode 100644 index 0000000000..b1ebb0f215 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.cu @@ -0,0 +1,2013 @@ +// Copyright 2023 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS all" +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +namespace multi = boost::multi; + +using complex = thrust::complex; + +template> +auto generate_ABx() { + complex const I{0.0, 1.0}; + multi::array x = { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}; + + multi::array A = { + { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}, + { 5.0 + I*0.0, 6.0 + I*0.0, 7.0 + I*0.0, 8.0 + I*0.0}, + { 9.0 + I*0.0, 10.0 + I*0.0, 11.0 + I*0.0, 12.0 + I*0.0}, + {13.0 + I*0.0, 14.0 + I*0.0, 15.0 + I*0.0, 16.0 + I*0.0}, + }; + + multi::array B = { + { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}, + { 5.0 + I*0.0, 6.0 + I*0.0, 7.0 + I*0.0, 8.0 + I*0.0}, + { 9.0 + I*0.0, 10.0 + I*0.0, 11.0 + I*0.0, 12.0 + I*0.0}, + {13.0 + I*0.0, 14.0 + I*0.0, 15.0 + I*0.0, 16.0 + I*0.0}, + }; + + return std::make_tuple(std::move(x), std::move(A), std::move(B)); +} + +BOOST_AUTO_TEST_CASE(cublas_scal_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + { + using T = complex; + auto [x, A, B] = generate_ABx >(); + auto const s = 2.0 + I*3.0; + blas::scal(s, x); // x_i <- s*x_i + + { + auto [x2, A2, B2] = generate_ABx >(); + auto xx = +x2; + blas::scal(s, xx); + BOOST_REQUIRE(xx == x); + } + { + auto [x2, A2, B2] = generate_ABx >(); + using blas::operators::operator*=; + x2 *= s; + BOOST_REQUIRE(x == x2); + } + { + auto [x2, A2, B2] = generate_ABx >(); + thrust::transform(x2.begin(), x2.end(), x2.begin(), [s] __device__ (T& e) {return s*e;}); + + BOOST_REQUIRE(x == x2); + } + { + auto [x2, A2, B2] = generate_ABx >(); + thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + + BOOST_REQUIRE(x == x2); + } + } +} + +BOOST_AUTO_TEST_CASE(cublas_copy_complex) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array y = { 1.0 + I*9.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + blas::copy(x, y); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + { + thrust::copy(begin(x), end(x), begin(y)); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + blas::copy_n(x.begin(), x.size(), y.begin()); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + y() = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + multi::array yy = blas::copy(x); + BOOST_REQUIRE( static_cast(yy[0]) == 1.0 + I*8.0 ); + } + { + y = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + { + using blas::operators::operator<<; + y << x; + // BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } + BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } +} + +#if 1 +BOOST_AUTO_TEST_CASE(cublas_swap_complex) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array y = { 1.0 + I*9.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + blas::swap(x, y); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + { + thrust::swap_ranges(begin(x), end(x), begin(y)); + thrust::swap_ranges(begin(x), end(x), begin(y)); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + } + { + using blas::operator^; + (x^y); + (x^y); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + double res; + blas::asum_n(x.begin(), x.size(), &res); + { + double res2; + res2 = blas::asum(x); + BOOST_REQUIRE( res == res2 ); + } + { + double res2 = blas::asum(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = std::transform_reduce( + x.begin(), x.end(), double{}, std::plus<>{}, [](T const& e) {return std::abs(e.real()) + std::abs(e.imag());} + ); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = thrust::transform_reduce( + x.begin(), x.end(), [] __device__ (T const& e) {return std::abs(e.real()) + std::abs(e.imag());}, + double{}, thrust::plus<>{} + ); + BOOST_REQUIRE( res == res2 ); + } + { + multi::static_array> res2({}, 0.0); + res2.assign( &blas::asum(x) ); + res2 = blas::asum(x); + BOOST_REQUIRE(( res == static_cast>::element_ref>(res2) )); + BOOST_REQUIRE(( res == static_cast(res2) )); + // BOOST_REQUIRE( res == res2 ); + } + { + multi::array> res2 = blas::asum(x); + BOOST_REQUIRE(( res == static_cast>::element_ref>(res2) )); + BOOST_REQUIRE(( res == static_cast(res2) )); + // BOOST_REQUIRE( res == res2 ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( x != 0 ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::contains_nan; + BOOST_REQUIRE( not contains_nan(x) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( isfinite(x) ); + BOOST_REQUIRE( not isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_nans) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, std::numeric_limits::quiet_NaN() + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + { + using blas::operators::contains_nan; + BOOST_REQUIRE( contains_nan(x) ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( not (x != 0) ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( not isfinite(x) ); + BOOST_REQUIRE( not isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_inf) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, std::numeric_limits::infinity() + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + // double res; + { + using blas::operators::contains_nan; + BOOST_REQUIRE( not contains_nan(x) ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( (x != 0) ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( not isfinite(x) ); + BOOST_REQUIRE( isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_nrm2_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + double res; + blas::nrm2(x, res); + { + double res2; + res2 = blas::nrm2(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = +blas::nrm2(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = sqrt(thrust::transform_reduce( + x.begin(), x.end(), [] __device__ (T const& e) {return thrust::norm(e);}, + double{}, thrust::plus<>{} + )); + BOOST_REQUIRE( res == res2 ); + } + { + multi::array> res2 = blas::nrm2(x); + BOOST_REQUIRE(( res == static_cast(res2) )); + } +} + +BOOST_AUTO_TEST_CASE(cublas_dot_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array const y = { 1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*5.0, 4.0 + I*7.0}; + + { + T res; + blas::dot(x, y, res); + { + complex res2; + res2 = blas::dot(x, y); + BOOST_REQUIRE(res == res2); + } + { + multi::array res2(complex{1.0, 0.0}); + res2 = blas::dot(x, y); + BOOST_REQUIRE( static_cast(res2) == res ); + } + { + using blas::operators::operator,; + auto res2 = +(x, y); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(x, y); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}); + BOOST_REQUIRE(res == res2); + } + } + { + T res; + blas::dot(blas::C(x), y, res); + { + using blas::operators::operator,; + using blas::operators::operator*; + auto res2 = +(*x, y); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(blas::C(x), y); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return conj(t1)*t2;}); + BOOST_REQUIRE(res == res2); + } + } + { + T res; + blas::dot(x, blas::C(y), res); + { + using blas::operators::operator,; + auto res2 = +(x, blas::C(y)); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(x, blas::C(y)); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return t1*conj(t2);}); + BOOST_REQUIRE(res == res2); + } + { + BOOST_REQUIRE( blas::dot(blas::C(x), x) == pow(blas::nrm2(x), 2.0) ); + BOOST_REQUIRE( blas::dot(x, blas::C(x)) == pow(blas::nrm2(x), 2.0) ); + + using blas::operators::operator,; + using blas::operators::operator*; + using blas::operators::abs; + using blas::operators::norm; + using blas::operators::operator^; + + BOOST_REQUIRE( (*x, x) == pow(abs(x), 2.0) ); + BOOST_REQUIRE( (*x, x) == pow(abs(x), 2) ); + BOOST_REQUIRE( (*x, x) == norm(x) ); + + BOOST_REQUIRE( (x, *x) == pow(abs(x), 2.0) ); + BOOST_REQUIRE( (x, *x) == pow(abs(x), 2) ); + BOOST_REQUIRE( (x, *x) == norm(x) ); + + BOOST_REQUIRE( (*x, x) == (x^2) ); + } + } + { + // T res; + // blas::dot(blas::C(x), blas::C(y), res); + multi::array res({1, 1}, 0.0); + auto rr = blas::gemm( 1.0, x.partitioned(1), blas::H(y.partitioned(1)), 0.0, res)[0][0]; + // { + // using blas::operators::operator,; + // auto res2 = +(x, blas::C(y)); + // BOOST_REQUIRE(res == res2); + // } + // { + // auto res2 = +blas::dot(x, blas::C(y)); + // BOOST_REQUIRE(res == res2); + // } + // { + // // auto [x2, A2, B2] = generate_ABx >(); + // // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + // auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return t1*conj(t2);}); + // BOOST_REQUIRE(res == res2); + // } + // { + // BOOST_REQUIRE( blas::dot(blas::C(x), x) == pow(blas::nrm2(x), 2.0) ); + // BOOST_REQUIRE( blas::dot(x, blas::C(x)) == pow(blas::nrm2(x), 2.0) ); + + // using blas::operators::operator,; + // using blas::operators::operator*; + // using blas::operators::abs; + // using blas::operators::norm; + // using blas::operators::operator^; + + // BOOST_REQUIRE( (*x, x) == pow(abs(x), 2.0) ); + // BOOST_REQUIRE( (*x, x) == pow(abs(x), 2) ); + // BOOST_REQUIRE( (*x, x) == norm(x) ); + + // BOOST_REQUIRE( (x, *x) == pow(abs(x), 2.0) ); + // BOOST_REQUIRE( (x, *x) == pow(abs(x), 2) ); + // BOOST_REQUIRE( (x, *x) == norm(x) ); + + // BOOST_REQUIRE( (*x, x) == (x^2) ); + // } + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_one) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(1.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 3.2 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [] __device__ (auto const& ex, auto const& ey) {return ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator+=; + yy += x; + BOOST_REQUIRE( yy == y ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_mone) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(-1.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [] __host__ __device__ (T ex, T ey) {return -1.0*ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator-=; + yy -= x; + BOOST_REQUIRE( yy == y ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator-=; + yy -= x; + yy -= y; + using blas::operators::norm; + BOOST_REQUIRE( norm(yy) == 0 ); + using blas::operators::operator==; + BOOST_REQUIRE( operator==(yy, 0) ); + BOOST_REQUIRE( yy == 0 ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_alpha) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::cuda::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(3.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 5.4 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [aa=3.0] __device__ (T ex, T ey) {return aa*ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator+=; + using blas::operators::operator*; + yy += 3.0*x; + BOOST_REQUIRE( yy == y ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_conj_complex_zero) { + namespace blas = multi::blas; + using T = complex; + complex const I{0.0, 1.0}; + using Alloc = thrust::cuda::allocator; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, A, x, 0.0, y); + { + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(A), end(A), begin(yy), [&x] (auto const& Ac) {return blas::dot(Ac, x);}); + + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, A, x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, A, x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + + } + { + using blas::operators::operator%; + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_conj_zero) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, blas::T(A), x, 0.0, y); + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + using blas::operators::operator*; + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), [&x] (auto const& Ac) {return blas::dot(Ac, x);}); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, blas::T(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, blas::T(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + using blas::operators::operator%; + + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy = ~A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +template void what(T&&) = delete; + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_zero) { + namespace blas = multi::blas; + using T = complex; + complex const I{0.0, 1.0}; + using Alloc = thrust::cuda::allocator; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, blas::J(A), x, 0.0, y); + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(A), end(A), begin(yy), [&x] (auto const& Ac) { + using blas::operators::operator*; // nvcc 11.8 needs this to be inside lambda + return blas::dot(*Ac, x);} + ); + + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, blas::J(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, blas::J(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + + } + { + using blas::operators::operator%; + using blas::operators::operator*; + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = *A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_conjtrans_zero) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + + // blas::gemv(1.0, blas::H(A), x, 0.0, y); + + { + // TODO(correaa) MKL gives an error here + #if 0 + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), [&x] (auto const& Ac) { + using blas::operators::operator*; // nvcc 11.8 needs this to be inside lambda + return blas::dot(*Ac, x);} + ); + + BOOST_REQUIRE_CLOSE( static_cast(yy[0]).real() , 61.7, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[1]).real() , 97.0, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[2]).real() , 169.8, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[3]).real() , 27.7, 1.e-7 ); + + using blas::operators::operator*; + BOOST_REQUIRE_CLOSE( static_cast(yy[0]).real() , (+blas::dot(*(~A)[0], x)).real() , 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[1]).real() , (+blas::dot(*(~A)[1], x)).real() , 1.e-7 ); + #endif + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_trans_one) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(3.0 + I*4.0, blas::T(A), x, 1.0, y); + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + // using blas::operators::operator*; + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), begin(yy), [&x,aa=3.0 + I*4.0,bb=1.0] (auto const& Ac, complex e) {return aa*blas::dot(Ac, x) + bb*e;}); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy += blas::gemv(3.0 + I*4.0, blas::T(A), x); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + using blas::operators::operator*; + yy += (3.0 + I*4.0)* ~A % x; + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_none) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, A, B, {0.0, 0.0}, C); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), end(C_copy), + [&B, aa=1.0, bb=0] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, A, B); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, A, B); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += A*B; + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, A, blas::T(B), {0.0, 0.0}, C); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, A, blas::T(B)); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, B, Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, A, blas::T(B)); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += A * ~B; + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(A * ~B); + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, blas::T(A), B, {0.0, 0.0}, C); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, blas::T(A), Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, blas::T(A), B); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, blas::T(A), B); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, blas::T(A), Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += ~A * B; + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(~A * B); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, blas::T(A), blas::T(B), {0.0, 0.0}, C); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Br, auto&& Cc) {return blas::gemv(aa, blas::T(A), Br, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, blas::T(A), blas::T(B)); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, B, Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, blas::T(A), blas::T(B)); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Br, auto&& Cc) {return blas::gemv(aa, blas::T(A), Br, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += ~A * ~B; + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(~A * ~B); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, A, blas::J(B), {0.0, 0.0}, C); + blas::gemm({1.0, 0.0}, blas::T(B), blas::H(A), {0.0, 0.0}, C_copy); + { + auto const [is, js] = C.extensions(); + for(auto i : is) { + for(auto j : js) { + C[i][j] *= 0.0; + for(auto k : B.extension()) { + C[i][j] += A[i][k]*conj(B[k][j]); + } + } + } + } + // TODO(correaa) MKL gives an error here + // unknown location(0): fatal error: in "cublas_one_gemv_complex_conjtrans_zero": memory access violation at address: 0x00000007: no mapping at fault address + #if 0 + { + std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + return std::transform( + begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& Ce) { + return 1.0*blas::dot(Ar, blas::C(Bc)) + 0.0*Ce; + } + ), std::move(Cr); + }); + } + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).real() == +static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).imag() == -static_cast(C[0][1]).imag() ); + #endif + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, blas::J(A), B, {0.0, 0.0}, C); + // blas::gemm({1.0, 0.0}, blas::T(B), blas::H(A), {0.0, 0.0}, C_copy); + // { + // auto const [is, js] = C.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // C[i][j] *= 0.0; + // for(auto k : B.extension()) { + // C[i][j] += A[i][k]*conj(B[k][j]); + // } + // } + // } + // } + // { + // std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + // return std::transform( + // begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) { + // return 1.0*blas::dot(Ar, blas::C(BCr)) + 0.0*Ce; + // } + // ), std::move(Cr); + // }); + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).real() == +static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).imag() == -static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, blas::J(A), blas::J(B), {0.0, 0.0}, C); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, A, blas::H(B), {0.0, 0.0}, C); + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform( + begin(A), end(A), begin(CC), begin(CC), + [&B, aa = 1.0, bb = 0.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::J(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC = blas::gemm({1.0, 0.0}, A, blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = A* ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_second_plus) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, A, blas::H(B), {1.0, 0.0}, C); + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform( + begin(A), end(A), begin(CC), begin(CC), + [&B, aa = 1.0, bb = 1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::J(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC += blas::gemm({1.0, 0.0}, A, blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC += A* ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::H(A), B, {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*B[k][j] ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BT=transposed(B)](auto const& Ac, auto&& Cr) { + std::transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c) { + return blas::dot(blas::C(Ac), Bc, std::move(c)); + }); + return std::move(Cr); + }); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC = blas::gemm({1.0, 0.0}, blas::H(A), B); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~*A *B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::H(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + CC = blas::gemm({1.0, 0.0}, blas::H(A), blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~*A * ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_herm) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~A * ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_trans) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + //blas::gemm({1.0, 0.0}, blas::H(A), blas::T(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::H(A), blas::T(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~*A * ~B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_herm) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + // blas::gemm({1.0, 0.0}, blas::J(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~A * ~*B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_conj) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::cuda::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + //blas::gemm({1.0, 0.0}, blas::H(A), blas::J(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~A * ~*B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag() , -0.147059 , 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const_UTH) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 0.0 + 0.0*I, 0.0 - 0.0*I}, + { 3.0 + 0.0*I, 7.0 - 3.0*I, 0.0 + 0.0*I}, + { 4.0 - 10.0*I, 1.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(blas::H(A)), B); + BOOST_REQUIRE_CLOSE( static_cast(B[1][1]).imag(), -0.0811359, 0.001); + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag(), -0.147059, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), B); // B←A⁻¹.B, B†←A⁻¹.B† + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +} + +// BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_conj_second) { +// namespace blas = multi::blas; +// using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit +// using Alloc = thrust::cuda::allocator; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array const A = { +// { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, +// { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, +// { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, +// }; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array B = { +// {1.0 + 1.0*I, 2.0 + 1.0*I}, +// {5.0 + 3.0*I, 9.0 + 3.0*I}, +// {3.0 + 1.0*I, 1.0 - 1.0*I}, +// }; + +// using multi::blas::trsm; + +// blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), blas::J(B)); // B*←A⁻¹.B*, B^T←A⁻¹.B^T +// BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +// } + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_operator) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::universal_allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using blas::operators::operator|=; + using blas::operators::U; + B |= U(A); // B←A⁻¹.B, B†←A⁻¹.B† + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::right, {1.0, 0.0}, blas::U(A), B); // B←B.A⁻¹, B←B/A, B†←A⁻¹†.B† + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), 1.60142, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_LT) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + { 1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + { 5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::right, {1.0, 0.0}, blas::L(blas::T(A)), B); // B←B.Aᵀ⁻¹, B←B/Aᵀ, B†←Aᵀ⁻¹†.B†, Bᵀ←A⁻¹.Bᵀ, Bᵀ←Bᵀ\A + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), -0.0882353, 0.001); +} + +// BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_LH) { +// namespace blas = multi::blas; +// using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit +// using Alloc = thrust::cuda::allocator; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array const A = { +// { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, +// { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, +// { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, +// }; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array B = { +// { 1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, +// { 5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, +// }; + +// using multi::blas::trsm; + +// blas::trsm(blas::side::right, {1.0, 0.0}, blas::U(blas::J(A)), B); // B←B.A*⁻¹, B←B/A*, B*←B*.A⁻¹ +// BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), -0.0882353, 0.001); +// } + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_operator) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::cuda::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + using blas::operators::operator/=; + B /= blas::U(A); + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), 1.60142, 0.001); +} + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.hip b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.hip new file mode 100644 index 0000000000..f76986a42d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/all.hip @@ -0,0 +1,2051 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS all" +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +namespace multi = boost::multi; + +using complex = thrust::complex; + +template> +auto generate_ABx() { + complex const I{0.0, 1.0}; + multi::array x = { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}; + + multi::array A = { + { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}, + { 5.0 + I*0.0, 6.0 + I*0.0, 7.0 + I*0.0, 8.0 + I*0.0}, + { 9.0 + I*0.0, 10.0 + I*0.0, 11.0 + I*0.0, 12.0 + I*0.0}, + {13.0 + I*0.0, 14.0 + I*0.0, 15.0 + I*0.0, 16.0 + I*0.0}, + }; + + multi::array B = { + { 1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}, + { 5.0 + I*0.0, 6.0 + I*0.0, 7.0 + I*0.0, 8.0 + I*0.0}, + { 9.0 + I*0.0, 10.0 + I*0.0, 11.0 + I*0.0, 12.0 + I*0.0}, + {13.0 + I*0.0, 14.0 + I*0.0, 15.0 + I*0.0, 16.0 + I*0.0}, + }; + + return std::make_tuple(std::move(x), std::move(A), std::move(B)); +} + +BOOST_AUTO_TEST_CASE(cublas_scal_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + { + using T = complex; + auto [x, A, B] = generate_ABx >(); + auto const s = 2.0 + I*3.0; + blas::scal(s, x); // x_i <- s*x_i + + { + auto [x2, A2, B2] = generate_ABx >(); + auto xx = +x2; + blas::scal(s, xx); + BOOST_REQUIRE(xx == x); + } + { + auto [x2, A2, B2] = generate_ABx >(); + using blas::operators::operator*=; + x2 *= s; + BOOST_REQUIRE(x == x2); + } + { + auto [x2, A2, B2] = generate_ABx >(); + thrust::transform(x2.begin(), x2.end(), x2.begin(), [s] __device__ (T& e) {return s*e;}); + + BOOST_REQUIRE(x == x2); + } + { + auto [x2, A2, B2] = generate_ABx >(); + thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + + BOOST_REQUIRE(x == x2); + } + } +} + +#if 1 +BOOST_AUTO_TEST_CASE(cublas_copy_complex_device) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::device_allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array y = { 1.0 + I*9.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + blas::copy(x, y); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + { + thrust::copy(begin(x), end(x), begin(y)); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + blas::copy_n(x.begin(), x.size(), y.begin()); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + y() = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + multi::array yy = blas::copy(x); + BOOST_REQUIRE( static_cast(yy[0]) == 1.0 + I*8.0 ); + } + { + y = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + { + using blas::operators::operator<<; + y << x; + // BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } + BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } +} + + +BOOST_AUTO_TEST_CASE(cublas_copy_complex) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array y = { 1.0 + I*9.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + blas::copy(x, y); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + { + thrust::copy(begin(x), end(x), begin(y)); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + blas::copy_n(x.begin(), x.size(), y.begin()); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + y() = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + multi::array yy = blas::copy(x); + BOOST_REQUIRE( static_cast(yy[0]) == 1.0 + I*8.0 ); + } + { + y = blas::copy(x); + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*8.0 ); + } + { + { + using blas::operators::operator<<; + y << x; + // BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } + BOOST_REQUIRE(( static_cast(y[0]) == 1.0 + I*8.0 )); // this can't be used with a free operator<< + } +} + +#if 1 +BOOST_AUTO_TEST_CASE(cublas_swap_complex) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array y = { 1.0 + I*9.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + blas::swap(x, y); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + { + thrust::swap_ranges(begin(x), end(x), begin(y)); + thrust::swap_ranges(begin(x), end(x), begin(y)); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + } + { + using blas::operator^; + (x^y); + (x^y); + BOOST_REQUIRE( static_cast(x[0]) == 1.0 + I*9.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + double res; + blas::asum_n(x.begin(), x.size(), &res); + { + double res2; + res2 = blas::asum(x); + BOOST_REQUIRE( res == res2 ); + } + { + double res2 = blas::asum(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = std::transform_reduce( + x.begin(), x.end(), double{}, std::plus<>{}, [](T const& e) {return std::abs(e.real()) + std::abs(e.imag());} + ); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = thrust::transform_reduce( + x.begin(), x.end(), [] __device__ (T const& e) {return std::abs(e.real()) + std::abs(e.imag());}, + double{}, thrust::plus<>{} + ); + BOOST_REQUIRE( res == res2 ); + } + { + multi::static_array> res2({}, 0.0); + res2.assign( &blas::asum(x) ); + res2 = blas::asum(x); + BOOST_REQUIRE(( res == static_cast>::element_ref>(res2) )); + BOOST_REQUIRE(( res == static_cast(res2) )); + // BOOST_REQUIRE( res == res2 ); + } + { + multi::array> res2 = blas::asum(x); + BOOST_REQUIRE(( res == static_cast>::element_ref>(res2) )); + BOOST_REQUIRE(( res == static_cast(res2) )); + // BOOST_REQUIRE( res == res2 ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( x != 0 ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::contains_nan; + BOOST_REQUIRE( not contains_nan(x) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( isfinite(x) ); + BOOST_REQUIRE( not isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_nans) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, std::numeric_limits::quiet_NaN() + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + { + using blas::operators::contains_nan; + BOOST_REQUIRE( contains_nan(x) ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( not (x != 0) ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( not isfinite(x) ); + BOOST_REQUIRE( not isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_asum_complex_inf) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, std::numeric_limits::infinity() + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + // double res; + { + using blas::operators::contains_nan; + BOOST_REQUIRE( not contains_nan(x) ); + } + { + using blas::operators::operator==; + using blas::operators::operator!=; + BOOST_REQUIRE( (x != 0) ); + BOOST_REQUIRE( not (x == 0) ); + } + { + using blas::operators::isfinite; + using blas::operators::isinf; + BOOST_REQUIRE( not isfinite(x) ); + BOOST_REQUIRE( isinf(x) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_nrm2_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + + double res; + blas::nrm2(x, res); + { + double res2; + res2 = blas::nrm2(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = +blas::nrm2(x); + BOOST_REQUIRE( res == res2 ); + } + { + auto res2 = sqrt(thrust::transform_reduce( + x.begin(), x.end(), [] __device__ (T const& e) {return thrust::norm(e);}, + double{}, thrust::plus<>{} + )); + BOOST_REQUIRE( res == res2 ); + } + { + multi::array> res2 = blas::nrm2(x); + BOOST_REQUIRE(( res == static_cast(res2) )); + } +} + +BOOST_AUTO_TEST_CASE(cublas_dot_complex_column) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { 1.0 + I*8.0, 2.0 + I*6.0, 3.0 + I*5.0, 4.0 + I*3.0}; + multi::array const y = { 1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*5.0, 4.0 + I*7.0}; + + { + T res; + blas::dot(x, y, res); + { + complex res2; + res2 = blas::dot(x, y); + BOOST_REQUIRE(res == res2); + } + { + multi::array res2(complex{1.0, 0.0}); + res2 = blas::dot(x, y); + BOOST_REQUIRE( static_cast(res2) == res ); + } + { + using blas::operators::operator,; + auto res2 = +(x, y); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(x, y); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}); + BOOST_REQUIRE(res == res2); + } + } + { + T res; + blas::dot(blas::C(x), y, res); + { + using blas::operators::operator,; + using blas::operators::operator*; + auto res2 = +(*x, y); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(blas::C(x), y); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return conj(t1)*t2;}); + BOOST_REQUIRE(res == res2); + } + } + { + T res; + blas::dot(x, blas::C(y), res); + { + using blas::operators::operator,; + auto res2 = +(x, blas::C(y)); + BOOST_REQUIRE(res == res2); + } + { + auto res2 = +blas::dot(x, blas::C(y)); + BOOST_REQUIRE(res == res2); + } + { + // auto [x2, A2, B2] = generate_ABx >(); + // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return t1*conj(t2);}); + BOOST_REQUIRE(res == res2); + } + { + BOOST_REQUIRE( blas::dot(blas::C(x), x) == pow(blas::nrm2(x), 2.0) ); + BOOST_REQUIRE( blas::dot(x, blas::C(x)) == pow(blas::nrm2(x), 2.0) ); + + using blas::operators::operator,; + using blas::operators::operator*; + using blas::operators::abs; + using blas::operators::norm; + using blas::operators::operator^; + + BOOST_REQUIRE( (*x, x) == pow(abs(x), 2.0) ); + BOOST_REQUIRE( (*x, x) == pow(abs(x), 2) ); + BOOST_REQUIRE( (*x, x) == norm(x) ); + + BOOST_REQUIRE( (x, *x) == pow(abs(x), 2.0) ); + BOOST_REQUIRE( (x, *x) == pow(abs(x), 2) ); + BOOST_REQUIRE( (x, *x) == norm(x) ); + + BOOST_REQUIRE( (*x, x) == (x^2) ); + } + } + { + // T res; + // blas::dot(blas::C(x), blas::C(y), res); + multi::array res({1, 1}, 0.0); + auto rr = blas::gemm( 1.0, x.partitioned(1), blas::H(y.partitioned(1)), 0.0, res)[0][0]; + // { + // using blas::operators::operator,; + // auto res2 = +(x, blas::C(y)); + // BOOST_REQUIRE(res == res2); + // } + // { + // auto res2 = +blas::dot(x, blas::C(y)); + // BOOST_REQUIRE(res == res2); + // } + // { + // // auto [x2, A2, B2] = generate_ABx >(); + // // thrust::for_each(x2.begin(), x2.end(), [s] __device__ (T& e) {return e*=s;}); + // auto res2 = thrust::inner_product(x.begin(), x.end(), y.begin(), T{}, thrust::plus<>{}, [] __device__ (T const& t1, T const& t2) {return t1*conj(t2);}); + // BOOST_REQUIRE(res == res2); + // } + // { + // BOOST_REQUIRE( blas::dot(blas::C(x), x) == pow(blas::nrm2(x), 2.0) ); + // BOOST_REQUIRE( blas::dot(x, blas::C(x)) == pow(blas::nrm2(x), 2.0) ); + + // using blas::operators::operator,; + // using blas::operators::operator*; + // using blas::operators::abs; + // using blas::operators::norm; + // using blas::operators::operator^; + + // BOOST_REQUIRE( (*x, x) == pow(abs(x), 2.0) ); + // BOOST_REQUIRE( (*x, x) == pow(abs(x), 2) ); + // BOOST_REQUIRE( (*x, x) == norm(x) ); + + // BOOST_REQUIRE( (x, *x) == pow(abs(x), 2.0) ); + // BOOST_REQUIRE( (x, *x) == pow(abs(x), 2) ); + // BOOST_REQUIRE( (x, *x) == norm(x) ); + + // BOOST_REQUIRE( (*x, x) == (x^2) ); + // } + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_one) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(1.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 3.2 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [] __device__ (auto const& ex, auto const& ey) {return ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator+=; + yy += x; + BOOST_REQUIRE( yy == y ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_mone) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(-1.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 1.0 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [] __host__ __device__ (T ex, T ey) {return -1.0*ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator-=; + yy -= x; + BOOST_REQUIRE( yy == y ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator-=; + yy -= x; + yy -= y; + using blas::operators::norm; + BOOST_REQUIRE( norm(yy) == 0 ); + using blas::operators::operator==; + BOOST_REQUIRE( operator==(yy, 0) ); + BOOST_REQUIRE( yy == 0 ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_axpy_complex_alpha) { + namespace blas = multi::blas; + complex const I{0.0, 1.0}; + + using T = complex; + using Alloc = thrust::hip::allocator; + + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + + blas::axpy(3.0, x, y); + std::cout << y[0] << std::endl; + BOOST_REQUIRE( static_cast(y[0]) == 5.4 + I*0.0 ); + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + thrust::transform(x.begin(), x.end(), yy.begin(), yy.begin(), [aa=3.0] __device__ (T ex, T ey) {return aa*ex + ey;}); + BOOST_TEST( yy == y , boost::test_tools::per_element() ); + } + { + multi::array yy = { {2.1, 0.0}, {4.1, 0.0}, {6.1, 0.0}, {11.0, 0.0} }; + using blas::operators::operator+=; + using blas::operators::operator*; + yy += 3.0*x; + BOOST_REQUIRE( yy == y ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_conj_complex_zero) { + namespace blas = multi::blas; + using T = complex; + complex const I{0.0, 1.0}; + using Alloc = thrust::hip::allocator; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, A, x, 0.0, y); + { + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(A), end(A), begin(yy), [&x] (auto const& Ac) {return blas::dot(Ac, x);}); + + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, A, x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, A, x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + + } + { + using blas::operators::operator%; + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_conj_zero) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, blas::T(A), x, 0.0, y); + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + using blas::operators::operator*; + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), [&x] (auto const& Ac) {return blas::dot(Ac, x);}); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, blas::T(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, blas::T(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + using blas::operators::operator%; + + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy = ~A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +template void what(T&&) = delete; + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_zero) { + namespace blas = multi::blas; + using T = complex; + complex const I{0.0, 1.0}; + using Alloc = thrust::hip::allocator; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + multi::array const x = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(1.0, blas::J(A), x, 0.0, y); + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(A), end(A), begin(yy), [&x] (auto const& Ac) { + using blas::operators::operator*; // nvcc 11.8 needs this to be inside lambda + return blas::dot(*Ac, x);} + ); + + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = blas::gemv(1.0, blas::J(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = blas::gemv(1.0, blas::J(A), x); + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + + } + { + using blas::operators::operator%; + using blas::operators::operator*; + + multi::array yy = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0} }; // NOLINT(readability-identifier-length) BLAS naming + yy = *A % x; + BOOST_REQUIRE( static_cast(y[0]) == static_cast(yy[0]) ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_conjtrans_zero) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + + // blas::gemv(1.0, blas::H(A), x, 0.0, y); + + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), [&x] (auto const& Ac) { + using blas::operators::operator*; // nvcc 11.8 needs this to be inside lambda + return blas::dot(*Ac, x);} + ); + + BOOST_REQUIRE_CLOSE( static_cast(yy[0]).real() , 61.7, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[1]).real() , 97.0, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[2]).real() , 169.8, 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[3]).real() , 27.7, 1.e-7 ); + + using blas::operators::operator*; + BOOST_REQUIRE_CLOSE( static_cast(yy[0]).real() , (+blas::dot(*(~A)[0], x)).real() , 1.e-7 ); + BOOST_REQUIRE_CLOSE( static_cast(yy[1]).real() , (+blas::dot(*(~A)[1], x)).real() , 1.e-7 ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemv_complex_trans_one) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + { 9.0 + I*0.0, 24.0 + I* 0.0, 30.0 + I* 0.0, 9.0 + I* 0.0 }, + { 4.0 + I*0.0, 10.0 + I* 0.0, 12.0 + I* 0.0, 7.0 + I* 0.0 }, + { 14.0 + I*0.0, 16.0 + I* 0.0, 36.0 + I* 0.0, 1.0 + I* 0.0 }, + }; + multi::array const x = { 1.1 + I* 0.0, 2.1 + I* 0.0, 3.1 + I* 0.0}; // NOLINT(readability-identifier-length) BLAS naming + multi::array y = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + blas::gemv(3.0 + I*4.0, blas::T(A), x, 1.0, y); + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + // using blas::operators::operator*; + std::transform(begin(transposed(A)), end(transposed(A)), begin(yy), begin(yy), [&x,aa=3.0 + I*4.0,bb=1.0] (auto const& Ac, complex e) {return aa*blas::dot(Ac, x) + bb*e;}); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + yy += blas::gemv(3.0 + I*4.0, blas::T(A), x); + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } + { + multi::array yy = { 1.1 + I* 0.0, 2.1 +I* 0.0, 3.1 + I* 0.0, 6.7 + I*0.0 }; // NOLINT(readability-identifier-length) BLAS naming + using blas::operators::operator*; + yy += (3.0 + I*4.0)* ~A % x; + + BOOST_REQUIRE_CLOSE( static_cast(y[0]).real(), static_cast(yy[0]).real(), 1e-7 ); + BOOST_REQUIRE( static_cast(y[1]) == static_cast(yy[1]) ); + BOOST_REQUIRE( static_cast(y[2]) == static_cast(yy[2]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_none) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, A, B, {0.0, 0.0}, C); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), end(C_copy), + [&B, aa=1.0, bb=0] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, A, B); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, A, B); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += A*B; + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, A, blas::T(B), {0.0, 0.0}, C); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, A, blas::T(B)); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, B, Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, A, blas::T(B)); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += A * ~B; + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(A * ~B); + + std::transform(begin(A), end(A), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, blas::T(A), B, {0.0, 0.0}, C); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, blas::T(A), Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, blas::T(A), B); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, blas::T(A), B); + + std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, blas::T(A), Bc, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += ~A * B; + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(~A * B); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + blas::gemm({1.0, 0.0}, blas::T(A), blas::T(B), {0.0, 0.0}, C); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=0.0] (auto const& Br, auto&& Cc) {return blas::gemv(aa, blas::T(A), Br, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C = blas::gemm(1.0 + I*0.0, blas::T(A), blas::T(B)); + + // std::transform(begin(transposed(B)), end(transposed(B)), begin(transposed(C_copy)), begin(transposed(C_copy)), + // [&A, aa=1.0, bb=0.0] (auto const& Bc, auto&& Cc) {return blas::gemv(aa, A, Bc, bb, std::move(Cc));} + // ); + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=0.0] (auto const& Ac, auto&& Cr) { + return blas::gemv(aa, B, Ac, bb, std::move(Cr)); + }); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + C += blas::gemm(1.0 + I*0.0, blas::T(A), blas::T(B)); + + std::transform(begin(B), end(B), begin(transposed(C_copy)), begin(transposed(C_copy)), + [&A, aa=1.0, bb=1.0] (auto const& Br, auto&& Cc) {return blas::gemv(aa, blas::T(A), Br, bb, std::move(Cc));} + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += ~A * ~B; + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=1.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto C_copy = C; + using blas::operators::operator*; + using blas::operators::operator+=; + C += 2.0*(~A * ~B); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(C_copy), begin(C_copy), [&B, aa=2.0, bb=1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, B, Ar, bb, std::move(Cr)); + } + ); + + BOOST_REQUIRE( static_cast(C_copy[1][0]) == static_cast(C[1][0]) ); + BOOST_REQUIRE( static_cast(C_copy[0][1]) == static_cast(C[0][1]) ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, A, blas::J(B), {0.0, 0.0}, C); + blas::gemm({1.0, 0.0}, blas::T(B), blas::H(A), {0.0, 0.0}, C_copy); + { + auto const [is, js] = C.extensions(); + for(auto i : is) { + for(auto j : js) { + C[i][j] *= 0.0; + for(auto k : B.extension()) { + C[i][j] += A[i][k]*conj(B[k][j]); + } + } + } + } + { + std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + return std::transform( + begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& Ce) { + return 1.0*blas::dot(Ar, blas::C(Bc)) + 0.0*Ce; + } + ), std::move(Cr); + }); + } + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).real() == +static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).imag() == -static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, blas::J(A), B, {0.0, 0.0}, C); + // blas::gemm({1.0, 0.0}, blas::T(B), blas::H(A), {0.0, 0.0}, C_copy); + // { + // auto const [is, js] = C.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // C[i][j] *= 0.0; + // for(auto k : B.extension()) { + // C[i][j] += A[i][k]*conj(B[k][j]); + // } + // } + // } + // } + // { + // std::transform(begin(A), end(A), begin(CC), begin(CC), [BT = transposed(B)](auto const& Ar, auto&& Cr) { + // return std::transform( + // begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) { + // return 1.0*blas::dot(Ar, blas::C(BCr)) + 0.0*Ce; + // } + // ), std::move(Cr); + // }); + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).real() == +static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(C_copy[1][0]).imag() == -static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = std::allocator; // thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + { + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + auto CC = C; + auto C_copy = CC; + // blas::gemm({1.0, 0.0}, blas::J(A), blas::J(B), {0.0, 0.0}, C); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_second) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, A, blas::H(B), {0.0, 0.0}, C); + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform( + begin(A), end(A), begin(CC), begin(CC), + [&B, aa = 1.0, bb = 0.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::J(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC = blas::gemm({1.0, 0.0}, A, blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = A* ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_second_plus) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, A, blas::H(B), {1.0, 0.0}, C); + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform( + begin(A), end(A), begin(CC), begin(CC), + [&B, aa = 1.0, bb = 1.0] (auto const& Ar, auto&& Cr) { + return blas::gemv(aa, blas::J(B), Ar, bb, std::move(Cr)); + } + ); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC += blas::gemm({1.0, 0.0}, A, blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC += A* ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_first) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::H(A), B, {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*B[k][j] ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + std::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BT=transposed(B)](auto const& Ac, auto&& Cr) { + std::transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c) { + return blas::dot(blas::C(Ac), Bc, std::move(c)); + }); + return std::move(Cr); + }); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + CC = blas::gemm({1.0, 0.0}, blas::H(A), B); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~*A *B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_both) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::H(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + CC = blas::gemm({1.0, 0.0}, blas::H(A), blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~*A * ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_trans_herm) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + { + multi::array CC({2, 2}, {3.0, 0.0}); + + CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } + { + multi::array CC({2, 2}, {3.0, 0.0}); + using blas::operators::operator*; + using blas::operators::operator~; + CC = ~A * ~*B; + + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_trans) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + //blas::gemm({1.0, 0.0}, blas::H(A), blas::T(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::H(A), blas::T(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~*A * ~B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_conj_herm) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + // blas::gemm({1.0, 0.0}, blas::J(A), blas::H(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~A * ~*B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(cublas_one_gemm_complex_herm_conj) { + namespace blas = multi::blas; + using T = complex; + using Alloc = thrust::hip::allocator; + complex const I{0.0, 1.0}; + + // NOLINT(readability-identifier-length) BLAS naming + multi::array const A = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + + multi::array const B = { + {3.0 - 4.0 * I, 19.0 - 1.0 * I}, + {1.0 + 5.0 * I, 8.0 - 8.0 * I}, + }; + multi::array C({2, 2}, {3.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + //blas::gemm({1.0, 0.0}, blas::H(A), blas::J(B), {0.0, 0.0}, C); + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // auto const [is, js] = CC.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // CC[i][j] = 0.0; + // for(auto k : A.extension()) { + // CC[i][j] += 1.0*conj(A[k][i])*conj(B[j][k]) ; + // } + // } + // } + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // thrust::transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BP = &B] __device__ (multi::array::reference const& Ac, multi::array::reference&& Cr) { + // thrust::transform(begin(*BP), end(*BP), begin(Cr), begin(Cr), [APc = &Ac] __device__ (multi::array::reference const& Bc, complex&& c) { + // return conj(thrust::inner_product(begin(*APc), end(*APc), begin(Bc), 0.0*c, std::plus<>{}, [] __device__ (complex const& a, complex const& b) {return a*b;})); + // // return conj(+blas::dot(Ac, Bc, std::move(c))); + // }); + // return std::move(Cr); + // }); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + + // CC = blas::gemm({1.0, 0.0}, blas::T(A), blas::H(B)); + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } + // { + // multi::array CC({2, 2}, {3.0, 0.0}); + // using blas::operators::operator*; + // using blas::operators::operator~; + // CC = ~A * ~*B; + + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).real() == static_cast(C[1][0]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[1][0]).imag() == static_cast(C[1][0]).imag() ); + + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).real() == static_cast(C[0][1]).real() ); + // BOOST_TEST_REQUIRE( static_cast(CC[0][1]).imag() == static_cast(C[0][1]).imag() ); + // } +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag() , -0.147059 , 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const_UTH) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 0.0 + 0.0*I, 0.0 - 0.0*I}, + { 3.0 + 0.0*I, 7.0 - 3.0*I, 0.0 + 0.0*I}, + { 4.0 - 10.0*I, 1.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(blas::H(A)), B); + BOOST_REQUIRE_CLOSE( static_cast(B[1][1]).imag(), -0.0811359, 0.001); + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag(), -0.147059, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), B); // B←A⁻¹.B, B†←A⁻¹.B† + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +} + +// BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_conj_second) { +// namespace blas = multi::blas; +// using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit +// using Alloc = thrust::hip::allocator; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array const A = { +// { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, +// { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, +// { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, +// }; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array B = { +// {1.0 + 1.0*I, 2.0 + 1.0*I}, +// {5.0 + 3.0*I, 9.0 + 3.0*I}, +// {3.0 + 1.0*I, 1.0 - 1.0*I}, +// }; + +// using multi::blas::trsm; + +// blas::trsm(blas::side::left, {1.0, 0.0}, blas::U(A), blas::J(B)); // B*←A⁻¹.B*, B^T←A⁻¹.B^T +// BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +// } + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_operator) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::universal_allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I}, + {3.0 + 1.0*I, 1.0 - 1.0*I}, + }; + + using blas::operators::operator|=; + using blas::operators::U; + B |= U(A); // B←A⁻¹.B, B†←A⁻¹.B† + BOOST_REQUIRE_CLOSE( static_cast(B[2][1]).imag() , -0.0882353, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::right, {1.0, 0.0}, blas::U(A), B); // B←B.A⁻¹, B←B/A, B†←A⁻¹†.B† + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), 1.60142, 0.001); +} + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_LT) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + { 1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + { 5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + blas::trsm(blas::side::right, {1.0, 0.0}, blas::L(blas::T(A)), B); // B←B.Aᵀ⁻¹, B←B/Aᵀ, B†←Aᵀ⁻¹†.B†, Bᵀ←A⁻¹.Bᵀ, Bᵀ←Bᵀ\A + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), -0.0882353, 0.001); +} + +// BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_LH) { +// namespace blas = multi::blas; +// using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit +// using Alloc = thrust::hip::allocator; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array const A = { +// { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, +// { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, +// { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, +// }; + +// // NOLINTNEXTLINE(readability-identifier-length) BLAS naming +// multi::array B = { +// { 1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, +// { 5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, +// }; + +// using multi::blas::trsm; + +// blas::trsm(blas::side::right, {1.0, 0.0}, blas::U(blas::J(A)), B); // B←B.A*⁻¹, B←B/A*, B*←B*.A⁻¹ +// BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), -0.0882353, 0.001); +// } + +BOOST_AUTO_TEST_CASE(UTA_blas_trsm_complex_nonsquare_default_diagonal_gemm_check_no_const_right_operator) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using Alloc = thrust::hip::allocator; + + multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + multi::array B = { // NOLINT(readability-identifier-length) BLAS naming + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + using multi::blas::trsm; + + using blas::operators::operator/=; + B /= blas::U(A); + BOOST_REQUIRE_CLOSE( static_cast(B[1][2]).imag(), 1.60142, 0.001); +} +#endif +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/axpy.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/axpy.cu new file mode 100644 index 0000000000..e8b5b71b3c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/axpy.cu @@ -0,0 +1,66 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS gemv" +#include + +#include + +#include +#include +#include +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(blas_axpy_complex) { + namespace blas = multi::blas; + using complex = thrust::complex; + + { + multi::thrust::cuda::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const const_arr = arr; + + multi::thrust::cuda::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + blas::axpy(complex{2.0, 0.0}, x, arr[1]); // arr can't be const + + multi::array arr_copy = arr; + BOOST_REQUIRE(( arr_copy[1][0] == complex{23.0, 0.0} )); + } + { + multi::thrust::cuda::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const const_arr = arr; + + multi::thrust::cuda::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + arr[1] += blas::axpy(complex{2.0, 0.0}, x); + + multi::array arr_copy = arr; + BOOST_REQUIRE(( arr_copy[1][0] == complex{23.0, 0.0} )); + } + { + multi::thrust::cuda::array arr = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}, {4.0, 0.0}}, + {{5.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}}, + {{9.0, 0.0}, {10.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}}, + }; + auto const const_arr = arr; + + multi::thrust::cuda::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming + arr[1] = blas::axpy(2.0, x); // blas::axpy(complex{2.0, 0.0}, x); + + multi::array arr_copy = arr; + std::cout << arr_copy[1][0] << std::endl; + BOOST_REQUIRE(( arr_copy[1][0] == complex{23.0, 0.0} )); + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/dot.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/dot.cu new file mode 100644 index 0000000000..1d6a4bd68d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/dot.cu @@ -0,0 +1,82 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS dot" +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace multi = boost::multi; + +// BOOST_AUTO_TEST_CASE(cublas_dot_out_param_complex_C) { +// namespace blas = multi::blas; +// using complex = thrust::complex; +// complex const I{0.0, 1.0}; + +// multi::thrust::cuda::array const x = {1.0 + 0.0*I, 2.0 + 0.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming +// multi::thrust::cuda::array const y = {1.0 + 0.0*I, 2.0 + 2.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming + +// complex res{0.0, 0.0}; +// blas::dot(blas::C(x), y, res); +// // BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return conj(alpha)*omega;}) ); +// } + +BOOST_AUTO_TEST_CASE(cublas_dot_out_array0D_complex_C) { + namespace blas = multi::blas; + using complex = thrust::complex; + complex const I{0.0, 1.0}; + + multi::thrust::cuda::array const x = {1.0 + 0.0*I, 2.0 + 0.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const y = {1.0 + 0.0*I, 2.0 + 2.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming + + multi::thrust::cuda::array res{complex{0.0, 0.0}}; + blas::dot(blas::C(x), y, res); + + { + multi::array res_copy{complex{0.0, 0.0}}; + res_copy = res; + BOOST_REQUIRE(( *res_copy.base() == complex{14.0, 4.0} )); + } + { + multi::array res_copy{res}; + BOOST_REQUIRE(( *res_copy.base() == complex{14.0, 4.0} )); + } +} + +// BOOST_AUTO_TEST_CASE(blas_dot_functional_complex_C) { +// namespace blas = multi::blas; +// using complex = thrust::complex; +// complex const I{0.0, 1.0}; + +// multi::thrust::cuda::array const x = {1.0 + 0.0*I, 2.0 + 0.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming +// multi::thrust::cuda::array const y = {1.0 + 0.0*I, 2.0 + 2.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming + +// complex res = blas::dot(blas::C(x), y); +// BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return conj(alpha)*omega;}) ); +// } + +// BOOST_AUTO_TEST_CASE(blas_dot_functional_mutate_complex_C) { +// namespace blas = multi::blas; +// using complex = thrust::complex; +// complex const I{0.0, 1.0}; + +// multi::thrust::cuda::array const x = {1.0 + 0.0*I, 2.0 + 0.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming +// multi::thrust::cuda::array const y = {1.0 + 0.0*I, 2.0 + 2.0*I, 3.0 + 0.0*I}; // NOLINT(readability-identifier-length) BLAS naming + +// complex res; +// res = blas::dot(blas::C(x), y); +// BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.0, 0.0}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return conj(alpha)*omega;}) ); +// } diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemm.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemm.cu new file mode 100644 index 0000000000..70f0216da0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemm.cu @@ -0,0 +1,97 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS gemm" +#include + +#include + +#include +#include +#include +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex) { + namespace blas = multi::blas; + using complex = thrust::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const M_gpu = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + + multi::thrust::cuda::array const X_gpu = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; + + multi::thrust::cuda::array Y_gpu = { {4.0, 0.0}, {5.0, 0.0}, {6.0, 0.0} }; + + blas::gemv(/*alpha*/ 1.1, M_gpu, X_gpu, /*beta*/ 1.2, Y_gpu); // y = a*M*x + b*y + + multi::array const Y_copy = Y_gpu; + + using blas::operators::operator-; + BOOST_REQUIRE_SMALL( +blas::nrm2(Y_copy - multi::array{ {214.02, 0.0}, {106.43, 0.0}, {188.37, 0.0} }) , 1e-13); +} + +BOOST_AUTO_TEST_CASE(cublas_gemv_real) { + namespace blas = multi::blas; + using T = double; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const M_gpu = { + { 9.0, 24.0, 30.0, 9.0 }, + { 4.0, 10.0, 12.0, 7.0 }, + { 14.0, 16.0, 36.0, 1.0 }, + }; + + multi::thrust::cuda::array const X_gpu = { 1.1, 2.1, 3.1, 4.1 }; + + multi::thrust::cuda::array Y_gpu = { 4.0, 5.0, 6.0 }; + + blas::gemv(/*alpha*/ 1.1, M_gpu, X_gpu, /*beta*/ 1.2, Y_gpu); // y = a*M*x + b*y + + multi::array const Y_copy = Y_gpu; + + using blas::operators::operator-; + BOOST_REQUIRE_SMALL( +blas::nrm2(Y_copy - multi::array{ 214.02, 106.43, 188.37 }) , 1e-13); +} + +BOOST_AUTO_TEST_CASE(cublas_gemm_nh) { + namespace blas = multi::blas; + + using complex = thrust::complex; + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imaginary unit + + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const a = { + {1.0 - 2.0 * I, 9.0 - 1.0 * I}, + {2.0 + 3.0 * I, 1.0 - 2.0 * I}, + }; + { + multi::thrust::cuda::array c({2, 2}, {9999.0, 0.0}); // NOLINT(readability-identifier-length) conventional BLAS naming + blas::gemm({1.0, 0.0}, a, a, {0.0, 0.0}, c); // c=aa†, c†=aa† + + multi::array const c_copy = c; + BOOST_REQUIRE( c_copy[1][0] == 16.0 - 2.0*I ); + BOOST_REQUIRE( c_copy[0][1] == 14.0 - 38.0*I ); + } + { + auto const c = +blas::gemm(complex{1.0, 0.0}, a, a); // c=aa†, c†=aa† + + multi::array const c_copy = c; + BOOST_REQUIRE( c_copy[1][0] == 16.0 - 2.0*I ); + BOOST_REQUIRE( c_copy[0][1] == 14.0 - 38.0*I ); + } + { + multi::thrust::cuda::array c({2, 2}, {0.0, 0.0}); + c += blas::gemm(complex{1.0, 0.0}, a, a); // c=aa†, c†=aa† + + multi::array const c_copy = c; + BOOST_REQUIRE( c_copy[1][0] == 16.0 - 2.0*I ); + BOOST_REQUIRE( c_copy[0][1] == 14.0 - 38.0*I ); + } +} \ No newline at end of file diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemv.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemv.cu new file mode 100644 index 0000000000..aef3158bc4 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/gemv.cu @@ -0,0 +1,80 @@ +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2023 Alfredo A. Correa + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS gemv" +#include + +#include + +#include +#include +#include +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex) { + namespace blas = multi::blas; + using complex = thrust::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const M_gpu = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + + multi::thrust::cuda::array const X_gpu = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; + + multi::thrust::cuda::array Y_gpu = { {4.0, 0.0}, {5.0, 0.0}, {6.0, 0.0} }; + + blas::gemv(/*alpha*/ 1.1, M_gpu, X_gpu, /*beta*/ 1.2, Y_gpu); // y = a*M*x + b*y + + multi::array const Y_copy = Y_gpu; + + using blas::operators::operator-; + BOOST_REQUIRE_SMALL( +blas::nrm2(Y_copy - multi::array{ {214.02, 0.0}, {106.43, 0.0}, {188.37, 0.0} }) , 1e-13); +} + +BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex_value) { + namespace blas = multi::blas; + using complex = thrust::complex; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const M_gpu = { + { { 9.0, 0.0}, {24.0, 0.0}, {30.0, 0.0}, {9.0, 0.0} }, + { { 4.0, 0.0}, {10.0, 0.0}, {12.0, 0.0}, {7.0, 0.0} }, + { {14.0, 0.0}, {16.0, 0.0}, {36.0, 0.0}, {1.0, 0.0} }, + }; + + multi::thrust::cuda::array const X_gpu = { {1.1, 0.0}, {2.1, 0.0}, {3.1, 0.0}, {4.1, 0.0} }; + + auto const Y_gpu = +blas::gemv(/*alpha*/ 1.1, M_gpu, X_gpu); // y = a*M*x + + multi::array const Y_copy = Y_gpu; + + using blas::operators::operator-; + BOOST_REQUIRE_SMALL( +blas::nrm2(Y_copy - multi::array{ {209.22, 0.0}, {100.43, 0.0}, {181.17, 0.0} }) , 1e-13); +} + +BOOST_AUTO_TEST_CASE(cublas_gemv_real) { + namespace blas = multi::blas; + using T = double; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::thrust::cuda::array const M_gpu = { + { 9.0, 24.0, 30.0, 9.0 }, + { 4.0, 10.0, 12.0, 7.0 }, + { 14.0, 16.0, 36.0, 1.0 }, + }; + + multi::thrust::cuda::array const X_gpu = { 1.1, 2.1, 3.1, 4.1 }; + + multi::thrust::cuda::array Y_gpu = { 4.0, 5.0, 6.0 }; + + blas::gemv(/*alpha*/ 1.1, M_gpu, X_gpu, /*beta*/ 1.2, Y_gpu); // y = a*M*x + b*y + + multi::array const Y_copy = Y_gpu; + + using blas::operators::operator-; + BOOST_REQUIRE_SMALL( +blas::nrm2(Y_copy - multi::array{ 214.02, 106.43, 188.37 }) , 1e-13); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/herk.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/herk.cu similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/herk.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/herk.cu diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/scal.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/scal.cu new file mode 100644 index 0000000000..a35c9dc0a2 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/scal.cu @@ -0,0 +1,51 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS gemv" +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(cublas_scal_complex_column) { + namespace blas = multi::blas; + + using complex = thrust::complex; + complex const I{0.0, 1.0}; + multi::thrust::cuda::array arr = { + {1.0 + I*0.0, 2.0 + I*0.0, 3.0 + I*0.0, 4.0 + I*0.0}, + {5.0 + I*0.0, 6.0 + I*0.0, 7.0 + I*0.0, 8.0 + I*0.0}, + {9.0 + I*0.0, 10.0 + I*0.0, 11.0 + I*0.0, 12.0 + I*0.0}, + }; + + blas::scal(2.0, (~arr)[1]); + + multi::array arr_copy = arr; + + BOOST_REQUIRE(( (~arr_copy)[1][2] == complex{20.0, 0.0} )); +} + +BOOST_AUTO_TEST_CASE(cublas_scal_complex) { + namespace blas = multi::blas; + + using complex = thrust::complex; + complex const I{0.0, 1.0}; + multi::array const x_copy = { {1.0 + I*1.0}, {2.0 + I*2.0}, {3.0 + I*3.0} }; + auto x = x_copy; + + auto const alpha = complex{2.0, 3.0}; + blas::scal(alpha, x); // x <- alpha*x + + BOOST_REQUIRE(( x[1] == alpha*x_copy[1] )); +} \ No newline at end of file diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/trsm.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/trsm.cu new file mode 100644 index 0000000000..3b0259e61c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/cublas/test/trsm.cu @@ -0,0 +1,126 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS trsm" +#include + +#include +#include + +#include + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(unit_trsm_multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 0.0*I, 3.0 + 1.2*I, 5.0 - 12.0*I}, + { 0.0 + 0.0*I, 1.0 + 0.0*I, 2.1 + 1.1*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 1.0 + 0.0*I}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + multi::thrust::cuda::array const A_gpu = A; + multi::thrust::cuda::array B_gpu = B; + + using multi::blas::trsm; + using multi::blas::filling; + using multi::blas::hermitized; + + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::unit, complex{1.0, 0.0}, A , blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( B[1][0].real() , -43.439999999999998 , 0.001); + BOOST_REQUIRE_CLOSE( B[1][0].imag() , -13.000000000000002 , 0.001); + + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::unit, complex{1.0, 0.0}, A_gpu, blas::H(B_gpu)); + multi::array B_cpy = B_gpu; + + BOOST_REQUIRE_CLOSE( B_cpy[1][0].real() , -43.439999999999998 , 0.001); + BOOST_REQUIRE_CLOSE( B_cpy[1][0].imag() , -13.000000000000002 , 0.001); +} + +BOOST_AUTO_TEST_CASE(trsm_multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + multi::thrust::cuda::array const A_gpu = A; + multi::thrust::cuda::array B_gpu = B; + + using multi::blas::trsm; + using multi::blas::filling; + using multi::blas::hermitized; + + // B = ConjugateTranspose[Inverse[A] . ConjugateTranspose[B]] + // ConjugateTranspose[B] = Inverse[A] . ConjugateTranspose[B] + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::non_unit, complex{1.0, 0.0}, A , blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( B[1][0].real(), -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( B[1][0].imag(), 0.046772461520104877, 0.001); + + BOOST_REQUIRE_CLOSE( real(blas::H(B)[0][1]), -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( imag(blas::H(B)[0][1]), -0.046772461520104877, 0.001); + + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::non_unit, complex{1.0, 0.0}, A_gpu, blas::H(B_gpu)); + cudaDeviceSynchronize(); + + multi::array B_cpy = B_gpu; + BOOST_REQUIRE_CLOSE( B_cpy[1][0].real() , -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( B_cpy[1][0].imag() , 0.046772461520104877, 0.001); +} + +BOOST_AUTO_TEST_CASE(default_param_unit_trsm_multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { + namespace blas = multi::blas; + using complex = thrust::complex; complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array const A = { + { 1.0 + 4.0*I, 3.0 + 0.0*I, 4.0 - 10.0*I}, + { 0.0 + 0.0*I, 7.0 - 3.0*I, 1.0 + 0.0*I}, + { 0.0 + 0.0*I, 0.0 + 0.0*I, 8.0 - 2.0*I}, + }; + // NOLINTNEXTLINE(readability-identifier-length) BLAS naming + multi::array B = { + {1.0 + 1.0*I, 2.0 + 1.0*I, 3.0 + 1.0*I}, + {5.0 + 3.0*I, 9.0 + 3.0*I, 1.0 - 1.0*I}, + }; + + multi::thrust::cuda::array const A_gpu = A; + multi::thrust::cuda::array B_gpu = B; + + using multi::blas::trsm; + using multi::blas::filling; + using multi::blas::hermitized; + + // B = ConjugateTranspose[Inverse[A] . ConjugateTranspose[B]] + // ConjugateTranspose[B] = Inverse[A] . ConjugateTranspose[B] + blas::trsm(blas::side::left, blas::filling::upper, complex{1.0, 0.0}, A , blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† + BOOST_REQUIRE_CLOSE( B[1][0].real(), -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( B[1][0].imag(), 0.046772461520104877, 0.001); + + BOOST_REQUIRE_CLOSE( real(blas::H(B)[0][1]), -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( imag(blas::H(B)[0][1]), -0.046772461520104877, 0.001); + + blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::non_unit, complex{1.0, 0.0}, A_gpu, blas::H(B_gpu)); + cudaDeviceSynchronize(); + + multi::array B_cpy = B_gpu; + BOOST_REQUIRE_CLOSE( B_cpy[1][0].real() , -0.72562939983295538 , 0.001); + BOOST_REQUIRE_CLOSE( B_cpy[1][0].imag() , 0.046772461520104877, 0.001); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/runtime/error.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/runtime/error.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/cuda/runtime/error.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cuda/runtime/error.hpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft.hpp new file mode 100644 index 0000000000..21b2508f0f --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft.hpp @@ -0,0 +1,481 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_CUFFTW_HPP +#define BOOST_MULTI_ADAPTORS_CUFFTW_HPP + +#include "../adaptors/../utility.hpp" +#include "../adaptors/../array.hpp" + +#include + +#include +#include + +#include // for raw_pointer_cast + +#if not defined(__HIP_ROCclr__) +#include +#include +#endif + +namespace boost{ +namespace multi{ +namespace cufft{ + +// cuFFT API errors +static char const* _cudaGetErrorEnum(cufftResult error) { + switch (error) { + case CUFFT_SUCCESS: return "CUFFT_SUCCESS"; + + case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED"; + case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED"; + case CUFFT_INCOMPLETE_PARAMETER_LIST: return "CUFFT_INCOMPLETE_PARAMETER_LIST"; + case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR"; + case CUFFT_INVALID_DEVICE: return "CUFFT_INVALID_DEVICE"; + case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN"; + case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE"; + case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE"; + case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE"; + case CUFFT_NO_WORKSPACE: return "CUFFT_NO_WORKSPACE"; + case CUFFT_NOT_IMPLEMENTED:return "CUFFT_NOT_IMPLEMENTED"; + case CUFFT_NOT_SUPPORTED : return "CUFFT_NOT_SUPPORTED"; + // #if !defined(__HIP_PLATFORM_NVIDIA__) + // case CUFFT_PARSE_ERROR: return "CUFFT_PARSE_ERROR"; + // #endif + case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED"; + case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; + // #if !defined(__HIP_PLATFORM_NVIDIA__) + // case CUFFT_LICENSE_ERROR: return "CUFFT_LICENSE_ERROR"; + // #endif + default: assert(0); + } + return ""; +} + +#define cufftSafeCall(err) implcufftSafeCall(err, __FILE__, __LINE__) +inline void implcufftSafeCall(cufftResult err, const char *file, const int line) { + if( CUFFT_SUCCESS != err) { + std::cerr <<"CUFFT error in file "<< __FILE__ <<", line "<< __LINE__ <<"\nerror "<< err <<": "<<_cudaGetErrorEnum(err)<<"\n"; + //fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n", __FILE__, __LINE__, err, + // _cudaGetErrorEnum(err)); + cudaDeviceReset()==cudaSuccess?void():assert(0); + assert(0); + } +} + +class sign { + int impl_ = 0; + + public: + sign() = default; + constexpr sign(int i) : impl_{i} {} + constexpr operator int() const {return impl_;} +}; + +constexpr sign forward{CUFFT_FORWARD}; +constexpr sign none{0}; +constexpr sign backward{CUFFT_INVERSE}; +// constexpr sign backward{CUFFT_BACKWARD}; + +static_assert(forward != none and none != backward and backward != forward, "!"); + +template +struct plan { + Alloc alloc_; + ::size_t workSize_ = 0; + void* workArea_; + + using complex_type = cufftDoubleComplex; + cufftHandle h_; // TODO(correaa) put this in a unique_ptr + std::array, DD + 1> which_iodims_{}; + int first_howmany_; + +public: + using allocator_type = Alloc; + + plan(plan&& other) noexcept : + h_{std::exchange(other.h_, {})}, + which_iodims_{std::exchange(other.which_iodims_, {})}, + first_howmany_{std::exchange(other.first_howmany_, {})}, + workSize_{std::exchange(other.workSize_, {})}, + workArea_{std::exchange(other.workArea_, {})}, + alloc_{std::move(other.alloc_)} + {} + + template< + class ILayout, class OLayout, dimensionality_type D = std::decay_t::rank::value, + class=std::enable_if_t::rank::value> + > + plan(std::array which, ILayout const& in, OLayout const& out, allocator_type const& alloc = {}) : alloc_{alloc} { + + assert(in.sizes() == out.sizes()); + + auto const sizes_tuple = in.sizes(); + auto const istride_tuple = in.strides(); + auto const ostride_tuple = out.strides(); + + using boost::multi::detail::get; + auto which_iodims = std::apply([](auto... elems) { + return std::array, sizeof...(elems) + 1>{ // TODO(correaa) added one element to avoid problem with gcc 13 static analysis (out-of-bounds) + std::pair{ + get<0>(elems), + fftw_iodim64{get<1>(elems), get<2>(elems), get<3>(elems)} + }..., + std::pair{} + }; + }, boost::multi::detail::tuple_zip(which, sizes_tuple, istride_tuple, ostride_tuple)); + + std::stable_sort(which_iodims.begin(), which_iodims.end() - 1, [](auto const& a, auto const& b){return get<1>(a).is > get<1>(b).is;}); + + auto const part = std::stable_partition(which_iodims.begin(), which_iodims.end() - 1, [](auto elem) {return std::get<0>(elem);}); + + std::array dims{}; + auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) {return elem.second;}); + + std::array howmany_dims{}; + auto const howmany_dims_end = std::transform(part, which_iodims.end() -1, howmany_dims.begin(), [](auto elem) {return elem.second;}); + + which_iodims_ = which_iodims; + first_howmany_ = part - which_iodims.begin(); + + //////////////////////////////////////////////////////////////////////// + + std::array istrides{}; + std::array ostrides{}; + std::array ion{}; + + auto const istrides_end = std::transform(dims.begin(), dims_end, istrides.begin(), [](auto elem) {return elem.is;}); + auto const ostrides_end = std::transform(dims.begin(), dims_end, ostrides.begin(), [](auto elem) {return elem.os;}); + auto const ion_end = std::transform(dims.begin(), dims_end, ion.begin(), [](auto elem) {return elem.n;}); + + int istride = *(istrides_end -1); + auto inembed = istrides; inembed.fill(0); + int ostride = *(ostrides_end -1); + auto onembed = ostrides; onembed.fill(0); + + for(std::size_t i = 1; i != ion_end - ion.begin(); ++i) { + assert(ostrides[i-1] >= ostrides[i]); + assert(ostrides[i-1]%ostrides[i]==0); + onembed[i]=ostrides[i-1]/ostrides[i]; + assert(istrides[i-1]%istrides[i]==0); + inembed[i]=istrides[i-1]/istrides[i]; + } + + if(dims_end == dims.begin()) {throw std::runtime_error{"no ffts in any dimension is not supported"};} + + while(first_howmany_ < D - 1) { + int nelems = 1; + // for(int i = D - 1; i != first_howmany_ + 1; --i) { + // nelems *= which_iodims_[i].second.n; + // if( + // which_iodims_[i - 1].second.is == nelems and + // which_iodims_[i - 1].second.os == nelems + // ) { + // for(int j = i - 1; j != first_howmany_; --j) { + // which_iodims_[j].second.n *= which_iodims_[j + 1].second.n; + // } + // } + + // } + for(int i = first_howmany_ + 1; i != D; ++i) {nelems *= which_iodims_[i].second.n;} + if( + which_iodims_[first_howmany_].second.is == nelems and + which_iodims_[first_howmany_].second.os == nelems + ) { + which_iodims_[first_howmany_ + 1].second.n *= which_iodims_[first_howmany_].second.n; + ++first_howmany_; + } else { + break; + } + } + + if(first_howmany_ == D) { + if constexpr(std::is_same_v) { + cufftSafeCall(::cufftPlanMany( + /*cufftHandle *plan*/ &h_, + /*int rank*/ dims_end - dims.begin(), + /*int *n*/ ion.data(), + /*int *inembed*/ inembed.data(), + /*int istride*/ istride, + /*int idist*/ 1, //stride(first), + /*int *onembed*/ onembed.data(), + /*int ostride*/ ostride, + /*int odist*/ 1, //stride(d_first), + /*cufftType type*/ CUFFT_Z2Z, + /*int batch*/ 1 //BATCH + )); + } else { + cufftSafeCall(cufftCreate(&h_)); + cufftSafeCall(cufftSetAutoAllocation(h_, false)); + cufftSafeCall(cufftMakePlanMany( + /*cufftHandle *plan*/ h_, + /*int rank*/ dims_end - dims.begin(), + /*int *n*/ ion.data(), + /*int *inembed*/ inembed.data(), + /*int istride*/ istride, + /*int idist*/ 1, //stride(first), + /*int *onembed*/ onembed.data(), + /*int ostride*/ ostride, + /*int odist*/ 1, //stride(d_first), + /*cufftType type*/ CUFFT_Z2Z, + /*int batch*/ 1, //BATCH + /*size_t **/ &workSize_ + )); + cufftSafeCall(cufftGetSize(h_, &workSize_)); + workArea_ = ::thrust::raw_pointer_cast(alloc_.allocate(workSize_)); static_assert(sizeof(Alloc) == 1000); + // auto s = cudaMalloc(&workArea_, workSize_); + // if(s != cudaSuccess) {throw std::runtime_error{"L212"};} + cufftSafeCall(cufftSetWorkArea(h_, workArea_)); + } + if(not h_) {throw std::runtime_error{"cufftPlanMany null"};} + return; + } + + std::sort(which_iodims_.begin() + first_howmany_, which_iodims_.begin() + D, [](auto const& a, auto const& b){return get<1>(a).n > get<1>(b).n;}); + + if(first_howmany_ <= D - 1) { + if constexpr(std::is_same_v) { // NOLINT(bugprone-branch-clone) workaround bug in DeepSource + cufftSafeCall(::cufftPlanMany( + /*cufftHandle *plan*/ &h_, + /*int rank*/ dims_end - dims.begin(), + /*int *n*/ ion.data(), + /*int *inembed*/ inembed.data(), + /*int istride*/ istride, + /*int idist*/ which_iodims_[first_howmany_].second.is, + /*int *onembed*/ onembed.data(), + /*int ostride*/ ostride, + /*int odist*/ which_iodims_[first_howmany_].second.os, + /*cufftType type*/ CUFFT_Z2Z, + /*int batch*/ which_iodims_[first_howmany_].second.n + )); + } else { + cufftSafeCall(cufftCreate(&h_)); + cufftSafeCall(cufftSetAutoAllocation(h_, false)); + cufftSafeCall(cufftMakePlanMany( + /*cufftHandle *plan*/ h_, + /*int rank*/ dims_end - dims.begin(), + /*int *n*/ ion.data(), + /*int *inembed*/ inembed.data(), + /*int istride*/ istride, + /*int idist*/ which_iodims_[first_howmany_].second.is, + /*int *onembed*/ onembed.data(), + /*int ostride*/ ostride, + /*int odist*/ which_iodims_[first_howmany_].second.os, + /*cufftType type*/ CUFFT_Z2Z, + /*int batch*/ which_iodims_[first_howmany_].second.n, + /*size_t **/ &workSize_ + )); + cufftSafeCall(cufftGetSize(h_, &workSize_)); + workArea_ = ::thrust::raw_pointer_cast(alloc_.allocate(workSize_)); + cufftSafeCall(cufftSetWorkArea(h_, workArea_)); + } + if(not h_) {throw std::runtime_error{"cufftPlanMany null"};} + ++first_howmany_; + return; + } + // throw std::runtime_error{"cufft not implemented yet"}; + } + + private: + plan() = default; + plan(plan const&) = delete; + void ExecZ2Z(complex_type const* idata, complex_type* odata, int direction) const{ + cufftSafeCall(::cufftExecZ2Z(h_, const_cast(idata), odata, direction)); + // cudaDeviceSynchronize(); + } + + public: + template + void execute(IPtr idata, OPtr odata, int direction) { // TODO(correaa) make const + if(first_howmany_ == DD) { + ExecZ2Z((complex_type const*)::thrust::raw_pointer_cast(idata), (complex_type*)::thrust::raw_pointer_cast(odata), direction); + return; + } + if(first_howmany_ == DD - 1) { + if( which_iodims_[first_howmany_].first) {throw std::runtime_error{"logic error"};} + for(int i = 0; i != which_iodims_[first_howmany_].second.n; ++i) { + ::cufftExecZ2Z( + h_, + const_cast((complex_type const*)::thrust::raw_pointer_cast(idata + i*which_iodims_[first_howmany_].second.is)), + (complex_type *)::thrust::raw_pointer_cast(odata + i*which_iodims_[first_howmany_].second.os) , + direction + ); + } + return; + } + if(first_howmany_ == DD - 2) { + if( which_iodims_[first_howmany_ + 0].first) {throw std::runtime_error{"logic error0"};} + if( which_iodims_[first_howmany_ + 1].first) {throw std::runtime_error{"logic error1"};} + if(idata == odata) {throw std::runtime_error{"complicated inplace 2"};} + for(int i = 0; i != which_iodims_[first_howmany_].second.n; ++i) { + for(int j = 0; j != which_iodims_[first_howmany_ + 1].second.n; ++j) { + ::cufftExecZ2Z( + h_, + const_cast((complex_type const*)::thrust::raw_pointer_cast(idata + i*which_iodims_[first_howmany_].second.is + j*which_iodims_[first_howmany_ + 1].second.is)), + (complex_type *)::thrust::raw_pointer_cast(odata + i*which_iodims_[first_howmany_].second.os + j*which_iodims_[first_howmany_ + 1].second.os) , + direction + ); + } + } + return; + } + throw std::runtime_error{"error2"}; + } + template + void execute_forward(IPtr idata, OPtr odata) { // TODO(correaa) make const + execute(idata, odata, cufft::forward); + } + template + void execute_backward(IPtr idata, OPtr odata) { // TODO(correaa) make const + execute(idata, odata, cufft::backward); + } + + template + void operator()(IPtr idata, OPtr odata, int direction) const { + ExecZ2Z((complex_type const*)::thrust::raw_pointer_cast(idata), (complex_type*)::thrust::raw_pointer_cast(odata), direction); + } + template + O&& execute_dft(I&& i, O&& o, int direction) const { + ExecZ2Z( + const_cast(reinterpret_cast(base(i))), + const_cast(reinterpret_cast(base(o))), + direction + ); + return std::forward(o); + } + + ~plan() { + if constexpr(not std::is_same_v) { + if(workSize_ > 0) {alloc_.deallocate(typename std::allocator_traits::pointer((char*)workArea_), workSize_);} + } + if(h_) {cufftSafeCall(cufftDestroy(h_));} + } + using size_type = int; + using ssize_type = int; +}; + +template +struct cached_plan { + typename std::map, multi::layout_t, multi::layout_t>, plan >::iterator it; + + cached_plan(cached_plan const&) = delete; + cached_plan(cached_plan&&) = delete; + + cached_plan(std::array which, boost::multi::layout_t in, boost::multi::layout_t out, Alloc const& alloc = {}) { + static thread_local std::map, multi::layout_t, multi::layout_t>, plan >& LEAKY_cache = *new std::map, multi::layout_t, multi::layout_t>, plan >; + it = LEAKY_cache.find(std::tuple, multi::layout_t, multi::layout_t>{which, in, out}); + if(it == LEAKY_cache.end()) {it = LEAKY_cache.insert(std::make_pair(std::make_tuple(which, in, out), plan(which, in, out, alloc))).first;} + } + template + void execute(IPtr idata, OPtr odata, int direction) { + // assert(it != LEAKY_cache.end()); + it->second.execute(idata, odata, direction); + } +}; + +template::value, int> =0> +auto dft(std::array which, In const& i, Out&& o, int s) +->decltype(cufft::cached_plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), s), std::forward(o)) { + return cufft::cached_plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), s), std::forward(o); } + +template::value, int> =0> +auto dft(std::array which, In const& i, Out&& o, int s) +->decltype(cufft::cached_plan::rebind_alloc*/ >{which, i.layout(), o.layout()/*, i.get_allocator()*/}.execute(i.base(), o.base(), s), std::forward(o)) { + return cufft::cached_plan::rebind_alloc*/ >{which, i.layout(), o.layout()/*, i.get_allocator()*/}.execute(i.base(), o.base(), s), std::forward(o); } + +template//, std::enable_if_t::value, int> =0> +auto dft_forward(std::array which, In const& i, Out&& o) -> Out&& { +//->decltype(cufft::plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), cufft::forward), std::forward(o)) { + return cufft::cached_plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), cufft::forward), std::forward(o); } + +// template::value, int> =0> +// auto dft_forward(std::array which, In const& i, Out&& o) -> Out&& { +// //->decltype(cufft::plan::rebind_alloc >{which, i.layout(), o.layout(), i.get_allocator()}.execute(i.base(), o.base(), cufft::backward), std::forward(o)) { +// return cufft::cached_plan::rebind_alloc*/>{which, i.layout(), o.layout()/*, i.get_allocator()*/}.execute(i.base(), o.base(), cufft::forward), std::forward(o); } + +template//, std::enable_if_t::value, int> =0> +auto dft_backward(std::array which, In const& i, Out&& o) -> Out&& { +//->decltype(cufft::plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), cufft::backward), std::forward(o)) { + return cufft::cached_plan{which, i.layout(), o.layout()}.execute(i.base(), o.base(), cufft::backward), std::forward(o); } + +// template::value, int> =0> +// auto dft_backward(std::array which, In const& i, Out&& o) -> Out&& { +// //->decltype(cufft::plan::rebind_alloc >{which, i.layout(), o.layout(), i.get_allocator()}.execute(i.base(), o.base(), cufft::backward), std::forward(o)) { +// return cufft::cached_plan::rebind_alloc*/>{which, i.layout(), o.layout()/*, i.get_allocator()*/}.execute(i.base(), o.base(), cufft::backward), std::forward(o); } + +template()))>> +BOOST_MULTI_NODISCARD("when first argument is const") +R dft(In const& i, int s) { + static_assert(std::is_trivially_default_constructible{}); + R ret(extensions(i), get_allocator(i)); + cufft::dft(i, ret, s); + // if(cudaDeviceSynchronize() != cudaSuccess) throw std::runtime_error{"Cuda error: Failed to synchronize"}; + return ret; +} + +template +constexpr auto array_tail_impl(Array const& t, std::index_sequence) { + return std::array{} - 1>{std::get(t)...}; +} + +template +constexpr auto array_tail(Array const& t) +->decltype(array_tail_impl(t, std::make_index_sequence{} - 1>())) { + return array_tail_impl(t, std::make_index_sequence{} - 1>()); } + +// template1), int> = 0> +// auto dft_forward(std::array which, In const& i, Out&& o) +// ->decltype(dft(which, i, std::forward(o), cufft::forward)) { +// return dft(which, i, std::forward(o), cufft::forward); } + +// template1), int> = 0> +// auto dft_backward(std::array which, In const& i, Out&& o) +// ->decltype(dft(which, i, std::forward(o), cufft::backward)) { +// return dft(which, i, std::forward(o), cufft::backward); } + +template +BOOST_MULTI_NODISCARD("when passing a const argument") +auto dft(std::array which, In const& i, int sign)->std::decay_t{return +dft(which, i, typename In::decay_type(extensions(i), get_allocator(i)), sign);} + +template +auto dft(std::array which, In&& i, int sign) +->decltype(dft(which, i, i, sign), std::forward(i)){ + return dft(which, i, i, sign), std::forward(i);} + +template BOOST_MULTI_NODISCARD("when passing a const argument") +auto dft_forward(Array arr, A const& a) +->decltype(cufft::dft(arr, a, cufft::forward)){ + return cufft::dft(arr, a, cufft::forward);} + +// template NODISCARD("when passing a const argument") +// auto dft_forward(Array arr, multi::cuda::array, D>&& a) +// ->decltype(cufft::dft(arr, a, cufft::forward), multi::cuda::array, D>{}){//assert(0); +// return cufft::dft(arr, a, cufft::forward), std::move(a);} + +template BOOST_MULTI_NODISCARD("when passing a const argument") +auto dft_forward(A const& a) +->decltype(cufft::dft(a, cufft::forward)){ + return cufft::dft(a, cufft::forward);} + +template auto dft_backward(A&&... a) +->decltype(cufft::dft(std::forward(a)..., cufft::backward)){ + return cufft::dft(std::forward(a)..., cufft::backward);} + +template BOOST_MULTI_NODISCARD("when passing a const argument") +auto dft_backward(Array arr, A const& a) +->decltype(cufft::dft(arr, a, cufft::backward)){ + return cufft::dft(arr, a, cufft::backward);} + +template BOOST_MULTI_NODISCARD("when passing a const argument") +auto dft_backward(A const& a) +->decltype(cufft::dft(a, cufft::backward)){ + return cufft::dft(a, cufft::backward);} + +} + +}} +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/CMakeLists.txt similarity index 67% rename from external_codes/boost_multi/multi/include/multi/adaptors/cufft/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/CMakeLists.txt index 6d6b1819a0..e68065b6fb 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/CMakeLists.txt @@ -9,16 +9,12 @@ project( LANGUAGES CXX ) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - if(ENABLE_CUDA OR DEFINED CXXCUDA) enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda") + # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda") endif() -find_package(CUDA QUIET) +# find_package(CUDA QUIET) include_directories(${CUDA_INCLUDE_DIRS}) link_libraries(${CUDA_CUFFT_LIBRARIES}) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/CMakeLists.txt similarity index 81% rename from external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/CMakeLists.txt index 8fa75e5481..00a3842f34 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/CMakeLists.txt @@ -34,16 +34,21 @@ link_libraries("-lboost_unit_test_framework") if(ENABLE_CUDA OR DEFINED CXXCUDA) enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda") - find_package(CUDA QUIET) - if(CUDA_FOUND) - message("CUDA found") - include_directories(${CUDA_INCLUDE_DIRS}) - set(INQ_CUDA_LIBRARIES ${CUDA_CUFFT_LIBRARIES} ${CUDA_cusolver_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - link_libraries(${INQ_CUDA_LIBRARIES}) - else() - message("CUDA not found") + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) endif() + + # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda") + # find_package(CUDA QUIET) + find_package(CUDAToolkit REQUIRED COMPONENTS cuFFT) + # if(CUDA_FOUND) + # message("CUDA found") + # include_directories(${CUDA_INCLUDE_DIRS}) + # # set(INQ_CUDA_LIBRARIES ${CUDA_CUFFT_LIBRARIES} ${CUDA_cusolver_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) + # # link_libraries(${INQ_CUDA_LIBRARIES}) + # else() + # message("CUDA not found") + # endif() endif() # FFTW has to go before blas to avoid unscrupulous (i.e. MKL) blas implementations that include FFTW and don't implement it properly @@ -79,6 +84,9 @@ foreach(TEST_FILE ${TEST_SRCS}) target_include_directories(${TEST_EXE} PRIVATE ${Boost_INCLUDE_DIRS}) target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + target_link_libraries(${TEST_EXE} PRIVATE multi) + target_link_libraries(${TEST_EXE} PRIVATE CUDA::cufft) + if(NOT ENABLE_CUDA) target_compile_options( ${TEST_EXE} @@ -124,5 +132,5 @@ foreach(TEST_FILE ${TEST_SRCS}) /W4> ) endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) + add_test(NAME ${TEST_EXE} COMMAND $) endforeach() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/cufft.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/cufft.cpp new file mode 100644 index 0000000000..04c641b1fa --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/cufft/test/cufft.cpp @@ -0,0 +1,698 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuFFT adaptor" +#include + +#include + +// #include "../../../adaptors/cuda.hpp" +#include "../../../adaptors/fft.hpp" +#include "../../../adaptors/fftw.hpp" + +#if (! (defined(__HIP_PLATFORM_AMD__) || defined(__HIP_PLATFORM_NVIDIA__))) && (! defined(__HIPCC__)) +#include "../../../adaptors/cufft.hpp" +#else +#include "../../../adaptors/hipfft.hpp" +#endif + +#include "../../../adaptors/thrust.hpp" + +#include + +#include + +#if ! defined(__HIP_PLATFORM_AMD__) && ! defined(__HIPCC__) +#include // cudaDeviceSynchronize +#else +#endif + +#include +#include +#include +#include + +namespace multi = boost::multi; +using complex = thrust::complex; +namespace utf = boost::unit_test; +complex const I{0.0, 1.0}; + +template<> +constexpr bool multi::force_element_trivial_default_construction> = true; + +template +__attribute__((always_inline)) inline void DoNotOptimize(const T &value) { + asm volatile("" : "+m"(const_cast(value))); +} + +struct watch : private std::chrono::high_resolution_clock{ + std::string label_; time_point start_; + watch(std::string label ="") : label_{label}, start_{}{ + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + start_ = now(); + } + ~watch() { + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + auto const count = std::chrono::duration(now() - start_).count(); + std::cerr<< label_<<": "<< count <<" sec"<; // this can't be std::complex in the gpu + + auto const in_cpu = multi::array{ + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} + }; + + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft_forward({true, true}, in_cpu, fw_cpu); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + multi::cufft::plan<2>({true, true}, in_gpu.layout(), fw_gpu.layout()) + .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + } + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu.layout()) + .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + } + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + for(int i = 0; i != in_gpu.size(); ++i) { + multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) + .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); + } + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + } + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + auto fw_gpu2 = multi::thrust::cuda::array(extensions(in_gpu)); + auto fw_gpu3 = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + for(int i = 0; i != in_gpu.size(); ++i) { + multi::cufft::plan<1>({true}, in_gpu[i].layout(), fw_gpu[i].layout()) + .execute(in_gpu[i].base(), fw_gpu[i].base(), multi::cufft::forward); + } + + multi::cufft::plan<2>({false, true}, in_gpu.layout(), fw_gpu2.layout()) + .execute(in_gpu.base(), fw_gpu2.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - complex(fw_gpu2[3][2])).imag() == 0.0 ); + } + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft({false, true}, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto const fw_gpu = multi::cufft::dft({false, true}, in_gpu, multi::cufft::forward); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); + } + { + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft({true, false}, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto const fw_gpu = multi::cufft::dft({true, false}, in_gpu, multi::cufft::forward); + + BOOST_TEST( fw_cpu[3][2].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2].imag() != 0.0 ); + + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2]) - fw_cpu[3][2]).imag() == 0.0 ); + + BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[2][3]) - fw_cpu[2][3]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(check_thrust_complex_vs_std_complex, *boost::unit_test::tolerance(0.0001)){ + + multi::array, 1> const s_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + multi::array, 1> const t_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + + multi::array, 1> s_out(s_in.extensions()); + multi::array, 1> t_out(t_in.extensions()); + + multi::fftw::plan::forward({true}, s_in.base(), s_in.layout(), s_out.base(), s_out.layout()).execute(s_in.base(), s_out.base()); + multi::fftw::plan::forward({true}, t_in.base(), t_in.layout(), t_out.base(), t_out.layout()).execute(t_in.base(), t_out.base()); + + BOOST_REQUIRE( std::equal(s_out.begin(), s_out.end(), t_out.begin()) ); +} + +BOOST_AUTO_TEST_CASE(small_1D_cpu_vs_cpu, *boost::unit_test::tolerance(0.0001)){ + + multi::array, 1> const cpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + multi::thrust::cuda::array, 1> const gpu_in = {1.0 + I*2.0, 2.0 + I*3.0, 3.0 + I*4.0}; + + multi::array, 1> cpu_out(cpu_in.extensions()); + multi::thrust::cuda::array, 1> gpu_out(gpu_in.extensions()); + + multi::fftw::plan::forward({true}, cpu_in.base(), cpu_in.layout(), cpu_out.base(), cpu_out.layout()).execute (cpu_in.base(), cpu_out.base()); + multi::cufft::plan<1> ({true}, gpu_in.layout(), gpu_out.layout()).execute_forward(gpu_in.base(), gpu_out.base()); +} + +BOOST_AUTO_TEST_CASE(cufft_1D_combinations, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({128}, complex{}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(1.0, 88.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + {true} //, + // {false}, + }){ + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + for(auto const idx : extension(in_cpu)) { + std::cout << "A: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + } + + BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + + auto fw_cpu = multi::array(extensions(in_cpu)); + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + auto p_cpu = multi::fftw::plan::forward(c, in_cpu.base(), in_cpu.layout(), fw_cpu.base(), fw_cpu.layout()); + auto p_gpu = multi::cufft::plan<1> (c, in_gpu.layout(), fw_gpu.layout()); + + for(auto const idx : extension(in_cpu)) { + std::cout << "B: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + } + + BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + + p_cpu.execute (in_cpu.base(), fw_cpu.base()); + p_gpu.execute_forward(in_gpu.base(), fw_gpu.base()); + + BOOST_TEST( fw_cpu[31].real() != 0.0 ); + BOOST_TEST( fw_cpu[31].imag() != 0.0 ); + + for(auto const idx : extension(in_cpu)) { + std::cout << "C: " << idx << ": " << in_cpu[idx] << ", " << in_gpu[idx] << std::endl; + } + + BOOST_TEST( complex(in_gpu[31]).real() == in_cpu[31].real() ); + BOOST_TEST( complex(in_gpu[31]).imag() == in_cpu[31].imag() ); + + for(auto const idx : extension(in_cpu)) { + std::cout << idx << ": " << fw_cpu[idx] << ", " << fw_gpu[idx] << std::endl; + } + + BOOST_TEST( complex(fw_gpu[31]).real() == fw_cpu[31].real() ); + BOOST_TEST( complex(fw_gpu[31]).imag() == fw_cpu[31].imag() ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_2D_combinations, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({10, 20}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0, 1.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + {true , true }, + {true , false}, + {false, true }//, + // {false, false} + }){ + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); + BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); + + multi::cufft::plan<2>(c, in_gpu.layout(), fw_gpu.layout()) + .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_2D_combinations_inplace, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({10, 20}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0, 1.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + {true , true }, + {true , false}, + {false, true }//, + // {false, false} + }){ + auto fw_cpu = in_cpu; + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + multi::fftw::dft(c, fw_cpu, multi::fftw::forward); + + auto fw_gpu = in_gpu; + + BOOST_TEST( fw_cpu[2][1].real() != 0.0 ); + BOOST_TEST( fw_cpu[2][1].imag() != 0.0 ); + + multi::cufft::plan<2>(c, fw_gpu.layout(), fw_gpu.layout()) + .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[2][1]) - fw_cpu[2][1]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_3D, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({10, 20, 30}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0, 1.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + {true , true , true }, + {true , true , false}, + {true , false, true }, + {true , false, false}, + {false, true , true }, + {false, true , false}, + {false, false, true }//, + // {false, false, false} + }){ + auto fw_cpu = multi::array(extensions(in_cpu)); + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + multi::cufft::dft(c, in_gpu, fw_gpu, multi::cufft::forward); + + BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); + + BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_3D_inplace, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({10, 20, 30}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0, 1.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + {true , true , true }, + {true , true , false}, + {true , false, true }, + {true , false, false}, + {false, true , true }, + {false, true , false}, + {false, false, true }//, + // {false, false, false} + }){ + auto fw_cpu = in_cpu; + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + + multi::fftw::dft(c, fw_cpu, multi::fftw::forward); + auto fw_gpu = in_gpu; + + multi::cufft::plan<3>(c, fw_gpu.layout(), fw_gpu.layout()) + .execute(fw_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( fw_cpu[3][2][1].real() != 0.0 ); + BOOST_TEST( fw_cpu[3][2][1].imag() != 0.0 ); + + std::cerr << "case " << c[0] << " " << c[1] << " " << c[2] << std::endl; + + BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[3][2][1]) - fw_cpu[3][2][1]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_4D, *boost::unit_test::tolerance(0.0001)){ + + using complex = thrust::complex; // this can't be std::complex in the gpu + + auto const in_cpu = std::invoke([]{ + multi::array ret({10, 20, 30, 40}); + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0, 1.0); + + std::generate( + reinterpret_cast(ret.data_elements()), + reinterpret_cast(ret.data_elements() + ret.num_elements()), [&]{return distribution(generator);} + ); + return ret; + }); + + for(auto c : std::vector>{ + // {true , true , true , true }, + {true , true , true , false}, + {true , true , false, true }, + {true , true , false, false}, + {true , false, true , true }, + {true , false, true , false}, + {true , false, false, true }, + {true , false, false, false}, + {false, true , true , true }, + {false, true , true , false}, + {false, true , false, true }, + {false, true , false, false}, + {false, false, true , true }, + {false, false, true , false}, + {false, false, false, true }//, + // {false, false, false, false} + }){ + auto fw_cpu = multi::array(extensions(in_cpu)); + multi::fftw::dft(c, in_cpu, fw_cpu, multi::fftw::forward); + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu)); + + BOOST_TEST( fw_cpu[4][3][2][1].real() != 0.0 ); + BOOST_TEST( fw_cpu[4][3][2][1].imag() != 0.0 ); + + multi::cufft::plan<4>(c, in_gpu.layout(), fw_gpu.layout()) + .execute(in_gpu.base(), fw_gpu.base(), multi::cufft::forward); + + BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).real() == 0.0 ); + BOOST_TEST( (complex(fw_gpu[4][3][2][1]) - fw_cpu[4][3][2][1]).imag() == 0.0 ); + } +} + +BOOST_AUTO_TEST_CASE(cufft_3D_timing, *boost::unit_test::tolerance(0.0001)){ + + auto x = multi::extensions_t<3>{300, 300, 300}; + { + auto const in_cpu = multi::array(x, 10.0); + BOOST_ASSERT( in_cpu.num_elements()*sizeof(complex) < 2e9 ); + auto fw_cpu = multi::array(extensions(in_cpu), 99.0); + { + // boost::timer::auto_cpu_timer t; // 1.041691s wall, 1.030000s user + 0.000000s system = 1.030000s CPU (98.9%) + multi::fftw::dft_forward({true, true}, in_cpu, fw_cpu); + BOOST_TEST( fw_cpu[8][9][10] != 99.0 ); + } + + auto const in_gpu = multi::thrust::cuda::array{in_cpu}; // (x, 10.0); + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + { + auto fw_gpu = multi::thrust::cuda::array(extensions(in_gpu), 99.0); + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + boost::multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); + BOOST_TEST( (static_cast(fw_gpu[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); + } + { + // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + auto const fw_gpu2 = boost::multi::cufft::dft({true, true}, in_gpu, multi::cufft::forward); + cudaDeviceSynchronize()==cudaSuccess?void():assert(0); + BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).real() == 0.0 ); + BOOST_TEST( (static_cast(fw_gpu2[8][9][10]) - fw_cpu[8][9][10]).imag() == 0.0 ); + } + } + +#if 1 + { + multi::thrust::cuda::universal_array const in_gpu(x, 10.); + multi::thrust::cuda::universal_array fw_gpu(extensions(in_gpu), 99.); + + // multi::cuda::managed::array const in_gpu(x, 10.); + // multi::cuda::managed::array fw_gpu(extensions(in_gpu), 99.); + { + // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); + } + { + // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) + multi::cufft::dft({true, true}, in_gpu, fw_gpu, multi::cufft::forward); + // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); + } + } +#endif +} + +#if 0 + +BOOST_AUTO_TEST_CASE(cufft_combinations, *utf::tolerance(0.00001)){ + + auto const in = []{ + multi::array ret({32, 90, 98, 96}); + std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), + [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + ); + return ret; + }(); + std::clog<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n"; + + multi::thrust::cuda::universal_array const in_gpu = in; + multi::thrust::cuda::universal_array const in_mng = in; + + using std::clog; + for(auto c : std::vector>{ + {false, true , true , true }, + {false, true , true , false}, + {true , false, false, false}, + {true , true , false, false}, + {false, false, true , false}, + {false, false, false, false}, + }){ + std::clog<<"case "; copy(begin(c), end(c), std::ostream_iterator{std::clog,", "}); std::clog< out = in; + multi::array in_rw = in; + [&, _ = watch{"cpu_opl "}]{ + multi::fftw::dft_forward(c, in, out); + }(); + [&, _ = watch{"cpu_ipl "}]{ + multi::fftw::dft(c, in_rw, multi::fftw::forward); + // BOOST_TEST( abs( static_cast>(in_rw[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); + }(); + { + multi::array in_rw2 = in; + [&, _ = watch{"cpu_mov "}]{ + multi::array const out_mov = multi::fftw::dft_forward(c, std::move(in_rw2)); + // what(out_mov); + // BOOST_TEST( abs( static_cast>(out_mov[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); + BOOST_REQUIRE( is_empty(in_rw2) ); + BOOST_REQUIRE( extensions(out_mov) == extensions(in) ); + }(); + } + + + [&, _ = watch{"cpu_new "}]{ + auto const out_cpy = multi::fftw::dft_forward(c, in); + BOOST_TEST( abs( static_cast>(out_cpy[5][4][3][1]) - std::complex(out[5][4][3][1]) ) == 0. ); + }(); + multi::thrust::cuda::array out_gpu(extensions(in_gpu)); + [&, _ = watch{"gpu_opl "}]{ + multi::cufft::dft(c, in_gpu , out_gpu, multi::cufft::forward); + BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + }(); + { + multi::thrust::cuda::array in_rw_gpu = in_gpu; + [&, _ = watch{"gpu_ipl "}]{ + multi::cufft::dft(c, in_rw_gpu, multi::cufft::forward); + BOOST_TEST( abs( static_cast(in_rw_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + }(); + } + { + multi::thrust::cuda::array in_rw_gpu = in_gpu; + [&, _ = watch{"gpu_mov "}]{ + multi::thrust::cuda::array const out_mov = multi::cufft::dft_forward(c, std::move(in_rw_gpu)); + // BOOST_REQUIRE( in_rw_gpu.empty() ); + // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + }(); + } + { + multi::thrust::cuda::array in_rw_gpu = in_gpu; + [&, _ = watch{"gpu_mov "}]{ + multi::thrust::cuda::array out_mov = std::move(in_rw_gpu); + multi::cufft::dft(c, out_mov, multi::cufft::forward); + // BOOST_REQUIRE( in_rw_gpu.empty() ); + // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); + }(); + } + cudaDeviceSynchronize(); + [&, _ = watch{"gpu_new "}]{ + multi::thrust::cuda::array const out_cpy = multi::cufft::dft(c, in_gpu, multi::cufft::forward); + }(); + multi::thrust::cuda::universal_array out_mng(extensions(in_mng)); + [&, _ = watch{"mng_cld "}]{ + multi::cufft::dft(c, in_mng, out_mng, multi::cufft::forward); + BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + }(); + [&, _ = watch{"mng_hot "}]{ + multi::cufft::dft(c, in_mng , out_mng, multi::cufft::forward); + BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + }(); + [&, _ = watch{"mng_new "}]{ + auto const out_mng = multi::cufft::dft(c, in_mng, multi::cufft::forward); + BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); + }(); + } + // std::clog<<"cache size " + // << multi::cufft::plan::cache<1>().size() <<' ' + // << multi::cufft::plan::cache<2>().size() <<' ' + // << multi::cufft::plan::cache<3>().size() <<' ' + // << multi::cufft::plan::cache<4>().size() <<' ' + // < ret({45, 18, 32, 16}); + std::generate( + ret.data_elements(), ret.data_elements() + ret.num_elements(), + [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + ); + return ret; + }(); + + multi::thrust::cuda::array const in = in_cpu; + multi::thrust::cuda::array out(extensions(in)); + +#if 0 + multi::cufft::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), +1); + + multi::array out_cpu(extensions(in)); + multi::fft::many_dft(begin(unrotated(in_cpu)), end(unrotated(in_cpu)), begin(unrotated(out_cpu)), +1); + + BOOST_TEST( imag( static_cast(out[5][4][3][2]) - out_cpu[5][4][3][2]) == 0. ); +#endif +} + +#if 0 +BOOST_AUTO_TEST_CASE(cufft_4D, *utf::tolerance(0.00001) ){ + auto const in = []{ + multi::array ret({10, 10, 10}); + std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), + [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} + ); + return ret; + }(); + + multi::array out(extensions(in)); +// multi::fftw::dft({true, false, true}, in, out, multi::fftw::forward); + multi::fftw::many_dft(begin(in.rotated()), end(in.rotated()), begin(out.rotated()), multi::fftw::forward); + + multi::thrust::cuda::array in_gpu = in; + multi::thrust::cuda::array out_gpu(extensions(in)); + +// multi::cufft::dft({true, false, true}, in_gpu, out_gpu, multi::fft::forward);//multi::cufft::forward); + // multi::cufft::many_dft(begin(in_gpu.rotated()), end(in_gpu.rotated()), begin( out_gpu.rotated() ), multi::fftw::forward); + // BOOST_TEST( ( static_cast(out_gpu[5][4][3]) - out[5][4][3]).imag() == 0. ); +} +#endif + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fft.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fft.hpp new file mode 100644 index 0000000000..8b8b33032c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fft.hpp @@ -0,0 +1,45 @@ +// Copyright 2020-2024 Alfredo A. Correa + +#ifndef BOOST_MULTI_ADAPTORS_FFT_HPP +#define BOOST_MULTI_ADAPTORS_FFT_HPP + +#include "../adaptors/fftw.hpp" + +#if defined(__CUDA__) || defined(__NVCC__) +#include "../adaptors/cufft.hpp" +#elif defined(__HIPCC__) +#include "../adaptors/hipfft.hpp" +#endif + +#define BOOST_MULTI_DECLRETURN_(ExpR) -> decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing + +namespace boost { +namespace multi { +namespace fft { + + static inline constexpr int forward = static_cast(fftw::forward); + static inline constexpr int none = static_cast(fftw::none); + static inline constexpr int backward = static_cast(fftw::backward); + + static_assert( forward != none and none != backward and backward != forward ); + + template struct priority : std::conditional_t>{}; + + template auto dft_aux_(priority<0>, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_backward(std::forward(args)...)) + template auto dft_aux_(priority<1>, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_backward(std::forward(args)...)) + template auto dft(Args&&... args) BOOST_MULTI_DECLRETURN_(dft_backward_aux_(priority<1>{}, std::forward(args)...)) + template auto dft(std::array::dimensionality> which, In const& in, Args&&... args) -> decltype(auto) {return dft_aux_(priority<1>{}, which, in, std::forward(args)...);} + + template auto dft_forward_aux_(priority<0>, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_forward(std::forward(args)...)) + template auto dft_forward_aux_(priority<1>, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_forward(std::forward(args)...)) + template auto dft_forward(std::array which, In const& in, Args&&... args) -> decltype(auto) {return dft_forward_aux_(priority<1>{}, which, in, std::forward(args)...);} + + template auto dft_backward_aux_(priority<0>, Args&&... args) BOOST_MULTI_DECLRETURN_( fftw::dft_backward(std::forward(args)...)) + template auto dft_backward_aux_(priority<1>, Args&&... args) BOOST_MULTI_DECLRETURN_(cufft ::dft_backward(std::forward(args)...)) + template auto dft_backward(std::array which, In const& in, Args&&... args) -> decltype(auto) {return dft_backward_aux_(priority<1>{}, which, in, std::forward(args)...);} + +}}} + +#undef BOOST_MULTI_DECLRETURN_ + +#endif // BOOST_MULTI_ADAPTORS_FFT_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw.hpp new file mode 100644 index 0000000000..210e01eec5 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw.hpp @@ -0,0 +1,604 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_FFTW_HPP +#define BOOST_MULTI_ADAPTORS_FFTW_HPP +#pragma once + +#include + +#include + +#include // sort +#include +#include +#include // accumulate + +#if HAVE_FFTW3_THREADS +#include +#endif + +#include // external fftw3 library + +namespace boost::multi { +namespace fftw { + +using std::as_const; + +struct flags { + using underlying_type = decltype(FFTW_PRESERVE_INPUT); // NOLINT(hicpp-signed-bitwise) : macro definition in external library + + private: + underlying_type underlying_; + + public: + constexpr explicit flags(underlying_type underlying) : underlying_{underlying} {} + constexpr explicit operator underlying_type() const { return underlying_; } + friend constexpr auto operator|(flags f1, flags f2) { return flags{f1.underlying_ | f2.underlying_}; } +}; + +constexpr flags estimate{FFTW_ESTIMATE}; // NOLINT(hicpp-signed-bitwise) : defined in an external lib 1U << 6 +constexpr flags measure{FFTW_MEASURE}; + +constexpr flags preserve_input{FFTW_PRESERVE_INPUT}; // NOLINT(hicpp-signed-bitwise) : defined in an external lib 1U << 4 +// // NOLINT(): this is a defect in FFTW https://github.com/FFTW/fftw3/issues/246 + +} // end namespace fftw + +// template +// auto fftw_plan_dft_1d( +// Size N, +// std::complex const* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// #ifndef NDEBUG +// auto check = in[N/3]; // check that const data will not been overwritten +// #endif +// assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); +// auto ret=::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT ); +// assert(check == in[N/3]); // check that const data has not been overwritten +// return ret; +// } + +// template +// auto fftw_plan_dft_1d( +// Size N, +// std::complex* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); +// return ::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags); +// } + +// template +// auto fftw_plan_dft_2d( +// Size N1, Size N2, +// std::complex const* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); +// #ifndef NDEBUG +// auto check = in[N1*N2/3]; // check that const data will not been overwritten +// #endif +// auto ret = ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT); +// assert( check == in[N1*N2/3] ); // check that const data has not been overwritten +// return ret; +// } + +// template +// auto fftw_plan_dft_2d( +// Size N1, Size N2, +// std::complex* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); +// return ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags); +// } + +// template +// auto fftw_plan_dft_3d( +// Size N1, Size N2, Size N3, +// std::complex* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); +// return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags); +// } +// template +// auto fftw_plan_dft_3d( +// Size N1, Size N2, Size N3, +// std::complex const* in, std::complex* out, int sign, +// unsigned flags = FFTW_ESTIMATE +// ){ +// assert( flags & FFTW_PRESERVE_INPUT ); +// assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); +// return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT); +// } + +// template +// auto fftw_plan_dft( +// Rank r, int* ns, +// std::complex* in, std::complex* out, +// int sign, unsigned flags = FFTW_ESTIMATE +// ){ +// assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); +// return ::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags); +// } +// template +// auto fftw_plan_dft( +// RankType r, int* ns, +// std::complex const* in, std::complex* out, +// int sign, unsigned flags = FFTW_ESTIMATE | FFTW_PRESERVE_INPUT +// ){ +// assert( flags & FFTW_PRESERVE_INPUT ); +// assert(fftw::alignment_of(in) == fftw::alignment_of(out)); +// #ifndef NDEBUG +// size_t ne = 1; for(RankType i = 0; i != r; ++i) ne*=ns[i]; +// auto check = in[ne/3]; // check that const data will not been overwritten +// #endif +// auto ret=::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags); +// assert(check == in[ne/3]); // check that const data has not been overwritten +// return ret; +// } + +// template +// auto fftw_plan_dft_1d( +// In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE +// ){ +// static_assert(in.dimensionality == 1, "!"); assert(size(in) == size(out)); +// assert( in.is_compact() ); assert( out.is_compact() ); +// return multi::fftw_plan_dft_1d(size(in), data_elements(in), data_elements(out), sign, flags); +// } + +// template +// auto fftw_plan_dft_2d( +// In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE +// ){ +// static_assert(in.dimensionality == 2, "!"); assert(in.sizes() == out.sizes()); +// assert( in.is_compact() ); assert( out.is_compact() ); +// return multi::fftw_plan_dft_2d( +// sizes(in)[0], sizes(in)[1], +// data_elements(in), data_elements(out), sign, flags +// ); +// } + +// template +// auto fftw_plan_dft_3d( +// In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE +// ){ +// static_assert(in.dimensionality == 3, "!"); assert(in.sizes() == out.sizes()); +// assert( in.is_compact() ); assert( out.is_compact() ); +// return multi::fftw_plan_dft_3d( +// sizes(in)[0], sizes(in)[1], sizes(in)[2], +// data(in), data(out), +// sign, flags +// ); +// } + +// template +// constexpr auto to_array(Tpl const& tpl) { +// return std::apply( +// [](auto const&... elems) { return std::array::value>{static_cast(elems)...}; }, +// tpl +// ); +// } + +// template< +// typename It1, class It2, +// std::enable_if_t{} || std::is_convertible*>{}, int> = 0> +// auto fftw_plan_many_dft(It1 first, It1 last, It2 d_first, int sign, fftw::flags flags) +// -> fftw_plan { + +// static_assert(sizeof(*base(first)) == sizeof((*base(first)).real()) + sizeof((*base(first)).imag()), "input must have complex pod layout"); +// static_assert(sizeof(*base(first)) == sizeof(fftw_complex), "input must have complex pod layout"); +// static_assert(sizeof(*base(d_first)) == sizeof((*base(d_first)).real()) + sizeof((*base(d_first)).imag()), "output must have complex pod layout"); +// static_assert(sizeof(*base(d_first)) == sizeof(fftw_complex), "output must have complex pod layout"); + +// assert(strides(*first) == strides(*last)); +// assert(sizes(*first) == sizes(*d_first)); + +// auto const ssn_tuple = multi::detail::tuple_zip(strides(*first), strides(*d_first), sizes(*first)); +// auto ssn = std::apply([](auto... ssn) { +// using boost::multi::detail::get; +// return std::array, sizeof...(ssn)>{ +// boost::multi::detail::mk_tuple(static_cast(get<0>(ssn)), static_cast(get<1>(ssn)), static_cast(get<2>(ssn)))...}; +// }, +// ssn_tuple); +// std::sort(ssn.begin(), ssn.end(), std::greater<>{}); + +// auto const istrides = [&]() { +// std::array::rank::value> istrides{}; +// using boost::multi::detail::get; +// std::transform(ssn.begin(), ssn.end(), istrides.begin(), [](auto elem) { return get<0>(elem); }); +// return istrides; +// }(); + +// auto const ostrides = [&]() { +// std::array::rank::value> ostrides{}; +// using boost::multi::detail::get; +// std::transform(ssn.begin(), ssn.end(), ostrides.begin(), [](auto elem) { return get<1>(elem); }); +// return ostrides; +// }(); +// assert(std::is_sorted(ostrides.begin(), ostrides.end(), std::greater<>{})); // otherwise ordering is incompatible + +// auto const ion = [&]() { +// std::array::rank::value> ion{}; +// using boost::multi::detail::get; +// std::transform(ssn.begin(), ssn.end(), ion.begin(), [](auto elem) { return get<2>(elem); }); +// return ion; +// }(); + +// auto const inembed = [&]() { +// std::array::rank::value + 1> inembed{}; +// std::adjacent_difference( +// istrides.rbegin(), istrides.rend(), inembed.rbegin(), [](auto alpha, auto omega) {assert(omega != 0 && alpha%omega == 0); return alpha/omega; } +// ); +// return inembed; +// }(); + +// auto const onembed = [&]() { +// std::array::rank::value + 1> onembed{}; +// std::adjacent_difference( +// ostrides.rbegin(), ostrides.rend(), onembed.rbegin(), [](auto alpha, auto omega) {assert(omega != 0 && alpha%omega == 0); return alpha/omega; } +// ); +// return onembed; +// }(); + +// auto ret = ::fftw_plan_many_dft( +// /*int rank */ ion.size(), +// /*const int* n */ ion.data(), +// /*int howmany */ last - first, +// /*fftw_complex* in */ reinterpret_cast(const_cast*>(static_cast const*>(base(first)))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) input data +// /*const int* inembed */ inembed.data(), +// /*int istride */ istrides.back(), +// /*int idist */ stride(first), +// /*fftw_complex* out */ reinterpret_cast(static_cast*>(base(d_first))), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) adapt types +// /*const int* onembed */ onembed.data(), +// /*int ostride */ ostrides.back(), +// /*int odist */ stride(d_first), +// /*int */ sign, +// /*unsigned */ static_cast(flags) +// ); +// assert(ret); // if you get null here it could be because your library doesn't support this fftw call mode +// return ret; +// } + +// template< +// typename It1, class It2, +// std::enable_if_t{} || std::is_convertible*>{}, int> = 0> +// auto fftw_plan_many_dft(It1 first, It1 last, It2 d_first, int sign) +// -> fftw_plan { +// return fftw_plan_many_dft(first, last, d_first, sign, fftw::estimate); +// } + +template +auto fftw_plan_dft(std::array which, InPtr in_base, In const& in_layout, OutPtr out_base, Out const& out_layout, int sign, fftw::flags /*flags*/) -> fftw_plan { + assert(in_layout.extensions() == out_layout.extensions()); + + auto const sizes_tuple = in_layout.sizes(); + + auto const istride_tuple = in_layout.strides(); + auto const ostride_tuple = out_layout.strides(); + + using boost::multi::detail::get; + auto which_iodims = std::apply( + [](auto... elems) { + // clang-format off + return std::array, sizeof...(elems) + 1>{ // added one element to avoid problem with gcc 13 static analysis (out-of-bounds) + std::pair{get<0>(elems), fftw_iodim64{get<1>(elems), get<2>(elems), get<3>(elems)}}..., {}, // added one element to avoid problem with gcc 13 static analysis (out-of-bounds) + }; + // clang-format on + }, + boost::multi::detail::tuple_zip(which, sizes_tuple, istride_tuple, ostride_tuple) + ); + auto const part = std::stable_partition(which_iodims.begin(), which_iodims.end() - 1, [](auto elem) { return std::get<0>(elem); }); + + std::array dims{}; + std::array howmany_dims{}; + + auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) { return elem.second; }); + auto const howmany_dims_end = std::transform(part, which_iodims.end() - 1, howmany_dims.begin(), [](auto elem) { return elem.second; }); + + assert(in_base); + assert(out_base); + + assert((sign == -1) || (sign == +1)); + + fftw_plan ret = fftw_plan_guru64_dft( + /*int rank */ dims_end - dims.begin(), + /*const fftw_iodim64 *dims */ dims.data(), + /*int howmany_rank */ howmany_dims_end - howmany_dims.begin(), + /*const fftw_iodim *howmany_dims */ howmany_dims.data(), + /*fftw_complex *in */ const_cast(reinterpret_cast(/*static_cast const *>*/ (in_base))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) //NOSONAR FFTW is taken as non-const while it is really not touched + /*fftw_complex *out */ (reinterpret_cast(/*static_cast *>*/ (out_base))), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + sign, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT + ); + + assert(ret && "fftw lib returned a null plan, if you are using MKL check the limitations of their fftw interface"); + // https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-c/top/appendix-d-fftw-interface-to-intel-math-kernel-library/fftw3-interface-to-intel-math-kernel-library/using-fftw3-wrappers.html + return ret; +} + +template(detail::implicit_cast*>(base(std::declval()))))> // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code +auto fftw_plan_dft(In const& in, Out&& out, int dir) { + return fftw_plan_dft(in, std::forward(out), dir, fftw::estimate); +} + +namespace fftw { + +inline auto initialize_threads() -> bool { +#if HAVE_FFTW3_THREADS + return fftw_init_threads(); +#else + return false; +#endif +} + +enum class sign : decltype(FFTW_FORWARD) { // NOLINT(performance-enum-size) + backward = FFTW_BACKWARD, + none = 0, + forward = FFTW_FORWARD, +}; + +#if(__cplusplus >= 202002L) +using sign::backward; +using sign::forward; +using sign::none; +#else +constexpr inline auto backward = sign::backward; +constexpr inline auto none = sign::none; +constexpr inline auto forward = sign::forward; +#endif + +static_assert(forward != none && none != backward && backward != forward); + +enum class direction : decltype(FFTW_FORWARD) { // NOLINT(performance-enum-size) + backward = FFTW_BACKWARD, + none = 0, + forward = FFTW_FORWARD, +}; + +class plan; + +class environment { + static void cleanup_() { ::fftw_cleanup(); } + static void set_timelimit_(std::chrono::duration limit) { + ::fftw_set_timelimit(limit.count()); + } + static void unset_timelimit_() { ::fftw_set_timelimit(FFTW_NO_TIMELIMIT); } + + public: + environment() = default; + + environment(environment const&) = delete; + environment(environment&&) = delete; + + auto operator=(environment const&) -> environment& = delete; + auto operator=(environment&&) -> environment& = delete; + + template + auto make_plan_forward(std::array which, In const& in, Out&& out); + template + auto make_plan_backward(std::array which, In const& in, Out&& out); + + ~environment() { cleanup_(); } +}; + +class plan { + plan() : impl_{nullptr, &fftw_destroy_plan} {} + // std::shared_ptr const> impl_; + std::unique_ptr, decltype(&fftw_destroy_plan)> impl_; + + public: + plan(plan const&) = delete; + plan(plan&&) = delete; + ~plan() = default; + + template + explicit plan( + std::array which, + InPtr in_base, In in_layout, + OutPtr out_base, Out out_layout, sign ss + ) : impl_{fftw_plan_dft(which, in_base, in_layout, out_base, out_layout, static_cast(ss), fftw::estimate), &fftw_destroy_plan} { + assert(impl_); + } + + template + static auto forward(std::array which, InPtr in_base, In in_layout, OutPtr out_base, Out out_layout) { + return plan(which, in_base, in_layout, out_base, out_layout, fftw::forward); + } + template + static auto backward(std::array which, InPtr in_base, In in_layout, OutPtr out_base, Out out_layout) { + return plan(which, in_base, in_layout, out_base, out_layout, fftw::backward); + } + + template + void execute(I* in, O* out) const { + static_assert(sizeof(in->imag()) == sizeof(double)); + static_assert(sizeof(out->imag()) == sizeof(double)); + + static_assert(sizeof(*in) == sizeof(fftw_complex)); + static_assert(sizeof(*out) == sizeof(fftw_complex)); + + ::fftw_execute_dft( + const_cast(impl_.get()), // NOLINT(cppcoreguidelines-pro-type-const-cast) https://www.fftw.org/fftw3_doc/Thread-safety.html + const_cast(reinterpret_cast(in)), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) //NOSONAR to interface with legacy fftw + reinterpret_cast(out) // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) : to interface with legacy fftw + ); + } + + // template void execute(I&& in, O&& out) const { execute_dft(std::forward(in), std::forward(out)); } + // friend void execute(plan const& self) { self.execute(); } + + auto operator=(plan&&) -> plan& = delete; + auto operator=(plan const&) -> plan& = delete; + + [[nodiscard]] auto cost() const -> double { return fftw_cost(const_cast(impl_.get())); } // NOLINT(cppcoreguidelines-pro-type-const-cast) + [[nodiscard]] auto flops() const { + struct /*ret_t&*/ { + double add = {}; + double mul = {}; + double fma = {}; + // explicit operator double() const{return add + mul + 2*fma;} + } ret; + fftw_flops(const_cast(impl_.get()), &ret.add, &ret.mul, &ret.fma); // NOLINT(cppcoreguidelines-pro-type-const-cast) + return ret; + } + +#if HAVE_FFTW3_THREADS + public: + static void make_thread_safe() { + fftw_make_planner_thread_safe(); // needs linking to -lfftw3_threads, requires FFTW-3.3.6 or greater + is_thread_safe_ = true; + } + static int with_nthreads(int n) { + fftw_plan_with_nthreads(n); + nthreads_ = n; + return n; + } + static int with_nthreads() { + int n = std::thread::hardware_concurrency(); + return with_nthreads(n ? n : 2); + } + static bool is_thread_safe() { return is_thread_safe_; } + static bool nthreads() { return nthreads_; } + + private: + static bool is_thread_safe_; + static int nthreads_; + static bool initialized_threads_; +#else + static constexpr auto is_thread_safe() -> bool { return false; } + static constexpr auto nthreads() -> bool { return true; } + static constexpr auto with_nthreads() -> int { return 1; } +#endif +}; + +template +auto environment::make_plan_forward(std::array which, In const& in, Out&& out) { + return plan::forward(which, in, std::forward(out)); +} + +template +auto environment::make_plan_backward(std::array which, In const& in, Out&& out) { + return plan::backward(which, in, std::forward(out)); +} + +#if HAVE_FFTW3_THREADS +bool plan::is_thread_safe_ = (plan::make_thread_safe(), true); +int plan::nthreads_ = (initialize_threads(), with_nthreads()); +#endif + +using std::decay_t; + +template +auto dft(std::array which, In const& in, Out&& out, sign dir) + -> decltype(plan{which, in.base(), in.layout(), out.base(), out.layout(), dir}.execute(in.base(), out.base()), std::forward(out)) { + return plan{which, in.base(), in.layout(), out.base(), out.layout(), dir}.execute(in.base(), out.base()), std::forward(out); +} + +template::rank::value, + std::enable_if_t().base()), typename std::decay_t::element>, int> = 0> +auto dft(std::array which, In&& in, sign dir) + -> decltype(dft(which, in, in, dir), std::forward(in)) { + return dft(which, in, in, dir), std::forward(in); +} + +template +auto dft_forward(std::array which, A const& in, O&& out) + -> decltype(fftw::dft(which, in, std::forward(out), fftw::forward)) { + return fftw::dft(which, in, std::forward(out), fftw::forward); +} + +template +auto dft_backward(std::array which, A const& in, O&& out) + -> decltype(fftw::dft(which, in, std::forward(out), fftw::backward)) { + return fftw::dft(which, in, std::forward(out), fftw::backward); +} + +template auto dft_backward(A&&... args) + -> decltype(dft(std::forward(args)..., fftw::backward)) { + return dft(std::forward(args)..., fftw::backward); +} + +// template::decay_type> +// auto move(In&& in) { +// if(in.is_compact()) { +// multi::array_ref Ref( +// in.base(), extensions(in) +// ); +// copy(in, Ref); +// return R( +// multi::array_ref>(std::make_move_iterator(in.mbase()), ((in.mbase()=0), extensions(Ref))) +// ); +// } +// return copy(std::forward(in)); +// } + +template +using static_array = ::boost::multi::static_array>; + +template +using array = ::boost::multi::array>; + +template> +auto copy(multi::subarray>&& array) -> R { + if(array.is_compact()) { + return fftw::copy( + array.template static_array_cast(), + multi::array_ref(array.base().base(), array.extensions()) + ) + .template static_array_cast>(); + } + return fftw::copy(std::move(array).template static_array_cast()); +} + +template +auto transpose(Array& array) + -> decltype(fftw::copy(transposed(array), array.reshape(extensions(layout(array).transpose())))) { + multi::array_ref const ref_aux(array.base(), extensions(array)); + return fftw::copy(ref_aux.transposed(), array.reshape(layout(array).transpose().extensions())); +} + +} // end namespace fftw +} // end namespace boost::multi + +namespace boost::multi::fftw { + +template +class fft_iterator { + MDIterator base_; + + std::array which_ = {}; + + public: + using iterator_type = MDIterator; + + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = std::nullptr_t; + + class reference { + typename MDIterator::reference::extensions_type x_; + explicit reference(typename MDIterator::reference const& ref) : x_{ref.extensions()} {} + friend class fft_iterator; + + public: + using extensions_type = typename MDIterator::reference::extensions_type; + auto extensions() const -> extensions_type { return x_; } + }; + + using iterator_category = std::random_access_iterator_tag; // using iterator_category = std::input_iterator_tag; + + explicit fft_iterator(iterator_type base, std::array which) noexcept : base_{std::move(base)}, which_{which} {} + + friend auto operator-(fft_iterator const& self, fft_iterator const& other) -> difference_type { + return self.base_ - other.base_; + } + + auto operator*() const { return reference{*base_}; } +}; + +} // end namespace boost::multi::fftw +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/CMakeLists.txt similarity index 58% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/CMakeLists.txt index ec80d5c497..761c767430 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/CMakeLists.txt @@ -1,12 +1,11 @@ cmake_minimum_required(VERSION 3.11) -set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") #project( -# boost-multi-adaptors-fftw -# VERSION 0.1 -# LANGUAGES CXX +# boost-multi-adaptors-fftw +# VERSION 0.1 +# LANGUAGES CXX #) find_package(PkgConfig) @@ -18,16 +17,20 @@ if(PKG_CONFIG_FOUND) IMPORTED_TARGET ) if(FFTW_FOUND) - include_directories(PkgConfig::FFTW) + # include_directories(PkgConfig::FFTW) this seems to be always incorrect link_libraries(PkgConfig::FFTW) include_directories(${CMAKE_BINARY_DIR}) add_subdirectory(test) +# add_subdirectory(mpi) else() - message(WARNING "Cannot find FFTW, FFTW-adaptor will not be tested. If you want this feature install FFTW, for example please run:\n sudo apt install pkg-config libfftw3-dev") + message(WARNING "Cannot find FFTW, FFTW-adaptor will not be tested. If you want this feature install FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev" + "\n sudo dnf install fftw-devel # in Fedora") endif() else() - message(WARNING "Cannot find PkgConfig and/or FFTW, FFTW-adaptor will not be tested. If you want this feature install PkgConfig and FFTW, for example please run:\n sudo apt install pkg-config libfftw3-dev") + message(WARNING "Cannot find PkgConfig and/or FFTW, FFTW-adaptor will not be tested. If you want this feature install PkgConfig and FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev" + "\n sudo dnf install fftw-devel # in Fedora") endif() - diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/README.md b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/README.md new file mode 100644 index 0000000000..a5b8c95847 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/README.md @@ -0,0 +1,74 @@ + +# [Boost.]MultiAdaptors.FFTW + +> **Disclosure: This is not an official or accepted Boost library and is unrelated to the std::mspan proposal.** + +_© Alfredo A. Correa, 2018-2023_ + +`Multi` is a modern C++ library that provides access and manipulation of data in multidimensional arrays. +Algorithms on multidimensional array data structures are fundamental to several branches of computing. +Multiple libraries implement these algorithms, and some are specially tuned to specific systems and hardware. + +Linear algebra and Fourier transforms are some examples of operations with algorithms on regularly contiguous (strided) multidimensional array data structures. +Although not generic, these libraries are the best options in specific systems for specific element types. + +## Contents +[[_TOC_]] + +## FFTW + +FFTW is a C library for computing the discrete Fourier transform (DFT) in one or more dimensions for real and complex data. +It is the defacto interface for many implementations, including Intel's MKL FFT. + +The FFTW adaptor provides two ways to use the library: one is through plan objects, and the other is through functions. + +Plans are runtime-optimized algorithms that tune the DFT operation for specific array sizes and layouts known in advance. +Plans reserve resources and precalculate parameters utilized during the execution. + +Plans are created from array layouts with dimensionality `D` that sample the input and output. + +```cpp +auto p = multi::fftw::plan::[forward|backward]({which...}, in_layout, out_layout); +``` + +Input and output layout must have the same associated sizes. +`{which...}` is a set of (at most D) boolean value that determined which dimensions are transformed; for example `{true, true, ...}` performs the FFT on all directions, ``{false, true, false, ...}` for the second dimension only and `{false, false, ...}` doesn't perform any Fourier transform, effectively performing a element-wise copy or transposition. + +The plans can be later executed (many times if necessary) as: +```cpp +p.execute(in_base, out_base); +``` + +Executions of the same plan (or over the same data) are not thread-safe (the plan has internal buffers that are modified), so the `.execute` function is not marked `const`. + +The use pattern of the FFTW adaptor (and the original FFTW) interface is somewhat entangled. +The plan construction takes the arrays, and the execute takes the internal pointers to array data. + +There is a convenience function that generates and executes the plan consistently: +```cpp +template +auto&& multi::fftw::dft::forward({which, ...}, In const& in, Out&& out) { + multi::fftw::plan::forward(in.layout(), out.layout()).execute(in.base(), out.base()); + return std::forward(out); +} +``` + + diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/benchmark/memory.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/benchmark/memory.cpp new file mode 100644 index 0000000000..c35b5a000c --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/benchmark/memory.cpp @@ -0,0 +1,148 @@ +#ifdef COMPILATION// sudo cpupower frequency-set --governor performance && sudo apt install libbenchmark-dev +${CXX:-c++} -Ofast -DNDEBUG -march=native `#-DNOEXCEPT_ASSIGNMENT` -I../../../../../include/ $0 -o $0x `pkg-config --libs benchmark fftw3`&&$0x&&rm $0x;exit +#endif + +#include + +#include +#include +#include + +#include + +namespace multi = boost::multi; + +using complex = std::complex; + +template +static void Allocation(benchmark::State& state){ + + multi::array in({state.range(0), state.range(0)*2}, 1.2); + multi::array out(extensions(in), 3.1); + + std::vector v(state.range(0)*3.14); + benchmark::DoNotOptimize(v); + + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + + benchmark::ClobberMemory(); + + multi::fftw::plan p(std::array{true, true}, in, out, multi::fftw::forward, multi::fftw::estimate); + for(auto _ : state){ + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + // benchmark::ClobberMemory(); + + p(); + } + + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + benchmark::ClobberMemory(); +} + +BENCHMARK(Allocation>)->DenseRange(100, 500, 28); // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) +BENCHMARK(Allocation>)->DenseRange(100, 500, 28); // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) + +template +static void Allocation1D(benchmark::State& state){ + + multi::array in({state.range(0)}, 1.2); + multi::array out(extensions(in), 3.1); + + std::vector v(state.range(0)*3.14); + benchmark::DoNotOptimize(v); + + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + + benchmark::ClobberMemory(); + + multi::fftw::plan p(std::array{true}, in, out, multi::fftw::forward, multi::fftw::estimate); + for(auto _ : state){ + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + // benchmark::ClobberMemory(); + + p(); + } + + benchmark::DoNotOptimize(in); + benchmark::DoNotOptimize(out); + benchmark::ClobberMemory(); +} + +BENCHMARK(Allocation1D>)->RangeMultiplier(2)->Range(128, 128*1024); +BENCHMARK(Allocation1D>)->RangeMultiplier(2)->Range(128, 128*1024); + +BENCHMARK_MAIN(); + +/* +$ sh ./memory.cpp +2023-09-08T10:11:42-07:00 +Running ./memory.cppx +Run on (12 X 4000.06 MHz CPU s) +CPU Caches: + L1 Data 32 KiB (x6) + L1 Instruction 32 KiB (x6) + L2 Unified 256 KiB (x6) + L3 Unified 12288 KiB (x1) +Load Average: 4.28, 3.36, 2.76 +***WARNING*** Library was built as DEBUG. Timings may be affected. +----------------------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +----------------------------------------------------------------------------------------------- +Allocation>/100 106167 ns 105362 ns 6801 +Allocation>/128 186345 ns 186233 ns 3701 +Allocation>/156 445409 ns 445387 ns 1609 +Allocation>/184 1524822 ns 1524758 ns 423 +Allocation>/212 6334445 ns 6334253 ns 116 +Allocation>/240 1639634 ns 1639581 ns 454 +Allocation>/268 7807228 ns 7806998 ns 97 +Allocation>/296 13261983 ns 13261620 ns 45 +Allocation>/324 3183764 ns 3183686 ns 200 +Allocation>/352 4003407 ns 4003311 ns 174 +Allocation>/380 7525147 ns 7524610 ns 106 +Allocation>/408 10952047 ns 10951038 ns 51 +Allocation>/436 42780039 ns 42778256 ns 16 +Allocation>/464 19106268 ns 19104952 ns 36 +Allocation>/492 50939459 ns 50933888 ns 13 +Allocation>/100 161494 ns 161399 ns 4491 +Allocation>/128 242198 ns 242146 ns 2874 +Allocation>/156 661061 ns 660975 ns 1024 +Allocation>/184 2139026 ns 2138641 ns 322 +Allocation>/212 9557704 ns 9556300 ns 77 +Allocation>/240 2313975 ns 2313203 ns 285 +Allocation>/268 11401940 ns 11399821 ns 64 +Allocation>/296 19879090 ns 19874211 ns 35 +Allocation>/324 3994153 ns 3990909 ns 176 +Allocation>/352 5923172 ns 5923003 ns 120 +Allocation>/380 9400210 ns 9398525 ns 73 +Allocation>/408 9982523 ns 9981809 ns 67 +Allocation>/436 42858298 ns 42854934 ns 17 +Allocation>/464 20109321 ns 20104686 ns 32 +Allocation>/492 54813030 ns 54097910 ns 13 +Allocation1D>/128 307 ns 307 ns 2482984 +Allocation1D>/256 597 ns 597 ns 1167097 +Allocation1D>/512 1325 ns 1325 ns 446234 +Allocation1D>/1024 2833 ns 2833 ns 226483 +Allocation1D>/2048 7819 ns 7817 ns 88305 +Allocation1D>/4096 23896 ns 23894 ns 27796 +Allocation1D>/8192 64528 ns 64520 ns 10695 +Allocation1D>/16384 122521 ns 122508 ns 5928 +Allocation1D>/32768 294487 ns 294375 ns 2577 +Allocation1D>/65536 635923 ns 635821 ns 1090 +Allocation1D>/131072 1399999 ns 1399604 ns 486 +Allocation1D>/128 285 ns 285 ns 2578003 +Allocation1D>/256 604 ns 604 ns 1140959 +Allocation1D>/512 1206 ns 1206 ns 473758 +Allocation1D>/1024 2761 ns 2759 ns 246249 +Allocation1D>/2048 7240 ns 7235 ns 89839 +Allocation1D>/4096 20633 ns 20611 ns 31992 +Allocation1D>/8192 56934 ns 56813 ns 12723 +Allocation1D>/16384 121458 ns 121206 ns 5358 +Allocation1D>/32768 281074 ns 280545 ns 2503 +Allocation1D>/65536 606937 ns 606822 ns 1098 +Allocation1D>/131072 1582752 ns 1582109 ns 478 +*/ diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/memory.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/memory.hpp new file mode 100644 index 0000000000..b19568ca52 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/memory.hpp @@ -0,0 +1,52 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_FFTW_MEMORY_HPP +#define BOOST_MULTI_ADAPTORS_FFTW_MEMORY_HPP + +#include + +#include // for std:::size_t + +namespace boost::multi::fftw { + +template struct allocator; + +template<> struct allocator{}; + +template +struct allocator { + using value_type = T; + using size_type = std::size_t; + + static auto allocate(size_type n) -> T* { + if(n == 0) { + return nullptr; + } + void* ptr = fftw_malloc(sizeof(T) * n); + if(ptr == nullptr) { + throw std::bad_alloc{}; + } + return static_cast(ptr); + } + static void deallocate(T* ptr, size_type n) { + if(n != 0) { + fftw_free(ptr); + } + } + + constexpr auto operator==(allocator const& /*other*/) const -> bool { return true; } + constexpr auto operator!=(allocator const& /*other*/) const -> bool { return false; } + + static constexpr auto max_size() noexcept { return std::numeric_limits::max() / sizeof(T); } +}; + +template +constexpr auto operator==(allocator const& /*a*/, allocator const& /*b*/) noexcept -> bool { return true; } + +template +constexpr auto operator!=(allocator const& /*a*/, allocator const& /*b*/) noexcept -> bool { return false; } + +} // namespace boost::multi::fftw +#endif // BOOST_MULTI_ADAPTORS_FFTW_MEMORY_HPP diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi.hpp similarity index 91% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi.hpp index 57ac3fb47e..ba8aa52aaf 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi.hpp @@ -9,8 +9,8 @@ #include "../../array.hpp" #include "../../config/NODISCARD.hpp" -#include -#include +#include +#include #include "../fftw.hpp" @@ -45,26 +45,28 @@ struct array{ using element_type = T; mutable bmpi3::communicator comm_; - Alloc alloc_; + Alloc alloc_; typename std::allocator_traits::size_type local_count_; array_ptr::pointer> local_ptr_; ptrdiff_t n0_; - static std::pair::size_type, multi::extensions_type_<2>> - local_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){ - ptrdiff_t local_n0, local_0_start; + static std::pair::size_type, multi::extensions_t<2>> + local_2d(multi::extensions_t<2> ext, boost::mpi3::communicator const& comm){ + ptrdiff_t local_n0; + ptrdiff_t local_0_start; + auto count = fftw_mpi_local_size_2d(std::get<0>(ext).size(), std::get<1>(ext).size(), comm.get(), &local_n0, &local_0_start); assert( count >= local_n0*std::get<1>(ext).size() ); return {count, {{local_0_start, local_0_start + local_n0}, std::get<1>(ext)}}; } - static auto local_count_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){ + static auto local_count_2d(multi::extensions_t<2> ext, boost::mpi3::communicator const& comm){ return local_2d(ext, comm).first; } - static auto local_extension_2d(multi::extensions_type_<2> ext, boost::mpi3::communicator const& comm){ + static auto local_extension_2d(multi::extensions_t<2> ext, boost::mpi3::communicator const& comm){ return local_2d(ext, comm).second; } - array(multi::extensions_type_<2> ext, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) : + array(multi::extensions_t<2> ext, bmpi3::communicator comm = mpi3::environment::self(), Alloc alloc = {}) : comm_{std::move(comm)}, alloc_{alloc}, local_count_{local_count_2d(ext, comm_)}, @@ -84,7 +86,7 @@ struct array{ { local_cutout() = other.local_cutout(); } - array(array&& other) : + array(array&& other) noexcept : comm_ {std::move(other.comm_)}, alloc_ {std::move(other.alloc_)}, local_count_{std::exchange(other.local_count_, 0)}, @@ -100,7 +102,7 @@ struct array{ array_ref local_cutout() &{return *local_ptr_;} array_cref local_cutout() const&{return *local_ptr_;} ptrdiff_t local_count() const&{return local_count_;} - multi::extensions_type_<2> extensions() const&{return {n0_, std::get<1>(local_cutout().extensions())};} + multi::extensions_t<2> extensions() const&{return {n0_, std::get<1>(local_cutout().extensions())};} ptrdiff_t num_elements() const&{return multi::layout_t<2>(extensions()).num_elements();} operator multi::array() const&{ static_assert( std::is_trivially_copy_assignable{}, "!" ); multi::array ret(extensions(), alloc_); diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/CMakeLists.txt new file mode 100644 index 0000000000..c2d74605e3 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.17) + +set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +#project( +# boost-multi-adaptors-fftw +# VERSION 0.1 +# LANGUAGES CXX +#) + +find_package(PkgConfig) +pkg_search_module( + FFTW + fftw3 + IMPORTED_TARGET +) + +link_libraries(-lfftw3_mpi) + +add_subdirectory(test) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/distribution.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/distribution.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/distribution.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/distribution.hpp diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/environment.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/environment.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/environment.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/environment.hpp diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/scattered_array.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/scattered_array.hpp similarity index 99% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/scattered_array.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/scattered_array.hpp index 7ff13e40b9..032062babf 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/scattered_array.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/scattered_array.hpp @@ -356,8 +356,8 @@ class scattered_array{ else assert(0); return *this; } - basic_array::template rebind> transposed() const{ - return basic_array::template rebind>{ + subarray::template rebind> transposed() const{ + return subarray::template rebind>{ layout_t{n0_, true, FFTW_MPI_DEFAULT_BLOCK}, this->local_cutout().layout().transpose(), this->local_cutout().data_elements() }; } diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/CMakeLists.txt new file mode 100644 index 0000000000..a16f3cfa39 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/CMakeLists.txt @@ -0,0 +1,136 @@ +# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- +cmake_minimum_required(VERSION 3.17) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") + +set(CMAKE_VERBOSE_MAKEFILE ON) + +#project( +# boost-multi-adaptors-fftw-test +# VERSION 0.1 +# LANGUAGES CXX +#) + +if((NOT + CMAKE_CXX_COMPILER_ID + STREQUAL + "PGI" + ) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "nvcc" + ) +) +# find_package(Boost REQUIRED COMPONENTS unit_test_framework timer) +else() + link_libraries("-lboost_unit_test_framework") # -lboost_timer") +endif() + +#set(CMAKE_CXX_STANDARD 17) +#set(CMAKE_CXX_STANDARD_REQUIRED ON) +#set(CMAKE_CXX_EXTENSIONS OFF) + +if(ENABLE_CUDA OR DEFINED CXXCUDA) + enable_language(CUDA) + # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +endif() + +enable_testing() +#list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") +include(CTest) + +include_directories(${CMAKE_BINARY_DIR}) + +find_package(MPI REQUIRED COMPONENTS CXX) +message(STATUS "MPI Executable for running programs:" ${MPIEXEC_EXECUTABLE}) +message(STATUS "MPI number of processors detected on the host system: " ${MPIEXEC_MAX_NUMPROCS}) +include(CMakePrintHelpers) +cmake_print_properties(TARGETS MPI::MPI_CXX PROPERTIES INTERFACE_LINK_LIBRARIES INTERFACE_INCLUDE_DIRECTORIES) + +set(TEST_SRCS + array_2d.cpp +) + +foreach(TEST_FILE ${TEST_SRCS}) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + if(ENABLE_CUDA OR DEFINED CXXCUDA) + set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + # set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") + target_compile_options(${TEST_EXE} PRIVATE -std=c++17) + endif() + # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") + target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) + + target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) + target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + + target_include_directories(${TEST_EXE} PRIVATE /home/correaa/prj/alf/boost/) + + target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) + target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + + target_include_directories(${TEST_EXE} PRIVATE ${MPI_CXX_INCLUDE_PATH}) + target_link_libraries(MPI::MPI_CXX) # needs cmake 3.9 + + if((NOT ENABLE_CUDA) + AND (NOT + DART_COMPILER_NAME + STREQUAL + "nvcc" + ) + ) + if(NOT DEFINED ENABLE_CIRCLE) + target_compile_options( + ${TEST_EXE} + PRIVATE -Werror + -Wall + -Wextra + $<$: + -fno-common + -Wpedantic + -Wformat-truncation + -fstack-usage> # -Wconversion + $<$,$>: + -fno-common + -Wpedantic + -Wmove> + $<$: + -wd161 + -diag-disable=remark + -Warray-bounds + -Wchar-subscripts + -Wcomment + -Wenum-compare + -Wformat + -Wuninitialized + -Wmaybe-uninitialized + -Wmain + -Wnarrowing + -Wnonnull + -Wparentheses + -Wpointer-sign + -Wreorder + -Wno-return-type + -Wsign-compare + -Wsequence-point + -Wtrigraphs + -Wunused-function + -Wunused-but-set-variable + -Wunused-variable + -Wwrite-strings + -Werror + -diag-error:3846 + > + $<$: + /W4> + ) + else() + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_TEST_DYN_LINK=1") + endif() + endif() + add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) + target_link_libraries(${TEST_EXE} PRIVATE PkgConfig::FFTW) +endforeach() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/test/array_2d.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/array_2d.cpp similarity index 53% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/test/array_2d.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/array_2d.cpp index 338bbaca39..f91a88fb54 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/mpi/test/array_2d.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/mpi_/test/array_2d.cpp @@ -1,40 +1,40 @@ -#if COMPILATION_INSTRUCTIONS -mpic++ -I$HOME/prj/alf $0 -o $0x -lfftw3 -lfftw3_mpi&&time mpirun -n 4 $0x&&rm $0x;exit -#endif +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2019-2022 Alfredo A. Correa -#include "../../../fftw/mpi.hpp" +//#include "../../../fftw/mpi.hpp" -#include -#include -#include +#include +#include +#include #include "../../../fftw.hpp" namespace mpi3 = boost::mpi3; namespace multi = boost::multi; -int mpi3::main(int, char*[], mpi3::communicator world){ - multi::fftw::mpi::environment fenv; +int mpi3::main(int, char**, mpi3::communicator /*world*/){ +// multi::fftw::mpi::environment fenv; - multi::fftw::mpi::array, 2> G({41, 321}, world); +// multi::fftw::mpi::array, 2> G({41, 321}, world); +#if 0 if(auto x = G.local_cutout().extensions()) for(auto i : std::get<0>(x)) for(auto j : std::get<1>(x)) G.local_cutout()[i][j] = std::complex(i + j, i + 2*j); - - multi::array, 2> L = G; // world replicas + + multi::array, 2> L = G; // world replicas assert( L == G ); - + using multi::fftw::dft_forward; - dft_forward(L, L); // dft in replicas + dft_forward(L, L); // dft in replicas dft_forward(G, G); if(auto x = G.local_cutout().extensions()) for(auto i : std::get<0>(x)) for(auto j : std::get<1>(x)) if(not(std::abs(G.local_cutout()[i][j] - L[i][j]) < 1e-8)) std::cout<< std::abs(G.local_cutout()[i][j] - L[i][j]) << std::endl; - +#endif return 0; } diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/CMakeLists.txt similarity index 68% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/CMakeLists.txt index de716182a4..e4d19acf75 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/CMakeLists.txt @@ -1,22 +1,13 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- -#[=[Multi Test suite can be run like this: - mkdir -p build - cd build - cmake .. [-DENABLE_CUDA=1] - make -j - ctest -j --output-on-error [-T memcheck] - exit -#]=] -cmake_minimum_required(VERSION 3.11) +cmake_minimum_required(VERSION 3.18) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_VERBOSE_MAKEFILE ON) #project( -# boost-multi-adaptors-fftw-test -# VERSION 0.1 -# LANGUAGES CXX +# boost-multi-adaptors-fftw-test +# VERSION 0.1 +# LANGUAGES CXX #) if((NOT @@ -30,17 +21,17 @@ if((NOT "nvcc" ) ) -# find_package(Boost REQUIRED COMPONENTS unit_test_framework timer) +# find_package(Boost REQUIRED COMPONENTS unit_test_framework timer) else() link_libraries("-lboost_unit_test_framework") # -lboost_timer") endif() # find_package(PkgConfig REQUIRED) # pkg_search_module( -# FFTW -# REQUIRED -# fftw3 -# IMPORTED_TARGET +# FFTW +# REQUIRED +# fftw3 +# IMPORTED_TARGET # ) # include_directories(PkgConfig::FFTW) @@ -52,43 +43,24 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(ENABLE_CUDA OR DEFINED CXXCUDA) - enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") - -# include(FindCUDA/select_compute_arch) -# cuda_detect_installed_gpus(INSTALLED_GPU_CCS_1) -# string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2) -# string( -# REPLACE " " -# ";" -# INSTALLED_GPU_CCS_3 -# "${INSTALLED_GPU_CCS_2}" -# ) -# string( -# REPLACE "." -# "" -# CUDA_ARCH_LIST -# "${INSTALLED_GPU_CCS_3}" -# ) -# set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() endif() #find_package(CUDA QUIET) #if(CUDA_FOUND) -# message("CUDA found") -# include_directories(${CUDA_INCLUDE_DIRS}) +# message("CUDA found") +# include_directories(${CUDA_INCLUDE_DIRS}) #else() -# message("CUDA not found") +# message("CUDA not found") #endif() enable_testing() -list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17 -include(CTest) -# configure_file("config.hpp.in" ${CMAKE_BINARY_DIR}/config.hpp) +include(CTest) include_directories(${CMAKE_BINARY_DIR}) @@ -99,6 +71,7 @@ set(TEST_SRCS core.cpp moved.cpp shift.cpp + so_shift.cpp # thrust.cpp transpose.cpp transpose_square.cpp @@ -109,12 +82,15 @@ foreach(TEST_FILE ${TEST_SRCS}) add_executable(${TEST_EXE} ${TEST_FILE}) if(ENABLE_CUDA OR DEFINED CXXCUDA) set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - # set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") + # set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") target_compile_options(${TEST_EXE} PRIVATE -std=c++17) endif() # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_DYN_LINK=1) + target_compile_definitions(${TEST_EXE} PRIVATE BOOST_TEST_MODULE="C++ Unit Tests for Multi FFTW") target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) @@ -128,7 +104,7 @@ foreach(TEST_FILE ${TEST_SRCS}) "nvcc" ) ) - if(NOT DEFINED ENABLE_CIRCLE) + if(NOT DEFINED ENABLE_CIRCLE) target_compile_options( ${TEST_EXE} PRIVATE -Werror @@ -142,7 +118,9 @@ foreach(TEST_FILE ${TEST_SRCS}) $<$,$>: -fno-common -Wpedantic - -Wmove> + -Wmove + -Wno-error=\#warnings + > $<$: -wd161 -diag-disable=remark @@ -177,6 +155,8 @@ foreach(TEST_FILE ${TEST_SRCS}) target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_TEST_DYN_LINK=1") endif() endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) - target_link_libraries(${TEST_EXE} PRIVATE PkgConfig::FFTW) + + target_link_libraries(${TEST_EXE} PRIVATE multi PkgConfig::FFTW) + + add_test(NAME ${TEST_EXE} COMMAND $) endforeach() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/combinations.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/combinations.cpp new file mode 100644 index 0000000000..1ceabe45c2 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/combinations.cpp @@ -0,0 +1,169 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +#include // NOLINT(build/c++11) +#include +#include +#include + +namespace multi = boost::multi; + +template<> +inline constexpr bool multi::force_element_trivial_default_construction> = true; + +namespace utf = boost::unit_test::framework; + +using fftw_fixture = multi::fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +class watch : private std::chrono::high_resolution_clock { // NOSONAR(cpp:S4963) this class will report timing on destruction + std::string label_; + time_point start_ = now(); + + public: + explicit watch(std::string label) : label_{std::move(label)} {} + + watch(watch const&) = delete; + watch(watch&&) = delete; + + auto operator=(watch const&) = delete; + auto operator=(watch&&) = delete; + + auto elapsed_sec() const { return std::chrono::duration(now() - start_).count(); } + ~watch() { std::cerr << label_ << ": " << elapsed_sec() << " sec" << '\n'; } +}; + +template using marray = multi::array; +constexpr auto exts = multi::extensions_t<4>({6, 12, 24, 12}); + +BOOST_AUTO_TEST_CASE(fft_combinations, *boost::unit_test::tolerance(0.00001)) { + using complex = std::complex; + + auto const in = [] { + marray ret(exts); + std::generate( + ret.data_elements(), ret.data_elements() + ret.num_elements(), + [eng = std::default_random_engine{std::random_device{}()}, + uniform_01 = std::uniform_real_distribution<>{}]() mutable { + return complex{uniform_01(eng), uniform_01(eng)}; + } + ); + return ret; + }(); + + // NOLINTNEXTLINE(fuchsia-default-arguments-calls) + std::vector> const which_cases = { + {false, true, true, true}, + {false, true, true, false}, + { true, false, false, false}, + { true, true, false, false}, + {false, false, true, false}, + {false, false, false, false}, + }; + + using std::cout; + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + for(auto which : which_cases) { // NOLINT(altera-unroll-loops) + cout << "case "; + copy(begin(which), end(which), std::ostream_iterator{cout, ", "}); + cout << "\n"; + + marray out = in; + { + auto const pln = multi::fftw::plan::forward(which, in.base(), in.layout(), out.base(), out.layout()); + watch const unnamed{"cpu_oplac planned %ws wall, CPU (%p%)\n"s}; + pln.execute(in.base(), out.base()); + } + { + auto in_rw = in; + watch const unnamed{"cpu_iplac %ws wall, CPU (%p%)\n"s}; + multi::fftw::dft_forward(which, in_rw, in_rw); + } + { + auto in_rw = in; + auto const pln = multi::fftw::plan::forward(which, in_rw.base(), in_rw.layout(), in_rw.base(), in_rw.layout()); + watch const unnamed{"cpu_iplac planned %ws wall, CPU (%p%)\n"s}; + pln.execute(in_rw.base(), in_rw.base()); + } + { + auto in_rw = in; + auto const pln = multi::fftw::plan::forward(which, in_rw.base(), in_rw.layout(), in_rw.base(), in_rw.layout()); + watch const unnamed{"cpu_iplac planned measured %ws wall, CPU (%p%)\n"s}; + pln.execute(in_rw.base(), in_rw.base()); + } + } +} + +BOOST_AUTO_TEST_CASE(fftw_4D_power_benchmark, *boost::unit_test::enabled()) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + using complex = std::complex; + namespace fftw = multi::fftw; + + marray in(exts); + std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2); + + BOOST_REQUIRE(in[0][0][0][0] == 1.2); + std::array which = {false, true, true, true}; + [&, unnamed = watch{utf::current_test_case().full_name() + " inplace FTTT"s}] { + fftw::dft(which, in, fftw::forward); + }(); + [&, unnamed = watch{utf::current_test_case().full_name() + " inplace FTTT"s}] { + fftw::dft(which, in, fftw::forward); + }(); + auto in0000 = in[0][0][0][0]; + BOOST_REQUIRE(in0000 != 1.2); + + marray out(exts); + [&, unnamed = watch{utf::current_test_case().full_name() + " outofplace FTTT"s}] { + fftw::dft(which, in, out, fftw::forward); + }(); + [&, unnamed = watch{utf::current_test_case().full_name() + " outofplace FTTT"s}] { + fftw::dft(which, in, out, fftw::forward); + }(); + [&, unnamed = watch{utf::current_test_case().full_name() + " outofplace FTTT"s}] { + fftw::dft(which, in, out, fftw::forward); + }(); + [&, unnamed = watch{utf::current_test_case().full_name() + " outofplace+alloc FTTT"s}] { + marray out2(exts); + fftw::dft(which, in, out2, fftw::forward); + }(); + [&, unnamed = watch{utf::current_test_case().full_name() + " outofplace+alloc FTTT"s}] { + marray out2(exts); + fftw::dft(which, in, out2, fftw::forward); + }(); + BOOST_REQUIRE(in0000 == in[0][0][0][0]); +} + +BOOST_AUTO_TEST_CASE(fftw_4D_power_benchmark_syntax) { + // NOLINTNEXTLINE(fuchsia-default-arguments-calls) use of std::vector + std::vector> const which_cases = { + {false, true, true, true}, + {false, true, true, false}, + { true, false, false, false}, + { true, true, false, false}, + {false, false, true, false}, + {false, false, false, false}, + }; + using complex = std::complex; + + auto const in = [] { + marray ret(exts); + std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), + [eng = std::default_random_engine{std::random_device{}()}, + uniform_01 = std::uniform_real_distribution<>{}]() mutable { + return complex{uniform_01(eng), uniform_01(eng)}; + }); + return ret; + }(); + + auto io = in; + (void)io; + BOOST_REQUIRE( io.extensions() == in.extensions() ); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/copy.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/copy.cpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/copy.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/copy.cpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/core.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/core.cpp new file mode 100644 index 0000000000..0a83028d77 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/core.cpp @@ -0,0 +1,272 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +#include // NOLINT(build/c++11) bug in cpplint +#include +#include + +#include + +namespace { + +namespace multi = boost::multi; +namespace fftw = multi::fftw; + +template auto power(M const& elem) -> decltype(std::norm(elem)) { return std::norm(elem); } + +template= 1)>> // DELETE((M::rank::value < 1))> +auto power(M const& array) { + return accumulate(begin(array), end(array), 0.0, [](auto const& alpha, auto const& omega) { return alpha + power(omega); }); +} + +struct sum_power { + template auto operator()(A const& alpha, B const& omega) const { return alpha + power(omega); } +}; + +} // end anonymous namespace + +template class randomizer { + std::mt19937_64 gen_; // NOSONAR rng good enough for the test + + public: + explicit randomizer(unsigned int seed) : gen_(seed) {} + + template::reference> void operator()(M&& arr) { + std::for_each(std::begin(std::forward(arr)), std::end(std::forward(arr)), [self = this](R elem) { self->operator()(elem); }); + } + void operator()(T& elem) { // NOLINT(runtime/references) passing by reference + std::normal_distribution gauss; + elem = gauss(gen_); + } +}; + +template class randomizer> { + std::mt19937_64 gen_; // NOSONAR rng good enough for the test + + public: + explicit randomizer(unsigned int seed) : gen_(seed) {} + + template::reference> void operator()(M&& arr) { + std::for_each(std::begin(std::forward(arr)), std::end(std::forward(arr)), [self = this](R elem) { self->operator()(elem); }); + } + void operator()(std::complex& zee) { // NOLINT(runtime/references) : passing by reference + std::normal_distribution gauss; + zee = std::complex(gauss(gen_), gauss(gen_)); + } +}; + +using fftw_fixture = fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +BOOST_AUTO_TEST_CASE(fftw_2D_identity_2, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = { + { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + multi::array out(extensions(in)); + + multi::fftw::dft_forward({false, false}, in, out); // out = in; + + BOOST_TEST_REQUIRE( in[2][3].real() == out[2][3].real() ); + BOOST_TEST_REQUIRE( in[2][3].imag() == out[2][3].imag() ); + + BOOST_REQUIRE( out == in ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_many, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; + + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = { + { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + multi::array out(extensions(in)); + + using multi::fftw::dft_forward; + + multi::fftw::dft_forward({false, false}, rotated(in), rotated(out)); + BOOST_REQUIRE( in == out ); +} + +BOOST_AUTO_TEST_CASE(fftw_many1_from_2) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + std::random_device dev; + multi::array in({3, 10}); + randomizer{dev()}(in); + multi::array out({3, 10}); + fftw::dft_forward({false, true}, in, out); + + multi::array out2({3, 10}); + std::transform(in.begin(), in.end(), out2.begin(), out2.begin(), [](auto const& in_elem, auto&& out2_elem) { + fftw::dft_forward({true}, in_elem, out2_elem); + return std::forward(out2_elem); + }); + + BOOST_REQUIRE(out2 == out); +} + +BOOST_AUTO_TEST_CASE(fftw_many2_from_3) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + std::random_device dev; + multi::array in({3, 5, 6}); + randomizer{dev()}(in); + multi::array out({3, 5, 6}); + fftw::dft_forward({false, true, true}, in, out); + + multi::array out2({3, 5, 6}); + std::transform(in.begin(), in.end(), out2.begin(), out2.begin(), [](auto const& in_elem, auto&& out2_elem) { + fftw::dft_forward({true, true}, in_elem, out2_elem); + return std::forward(out2_elem); + }); + + BOOST_REQUIRE(out2 == out); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_power_plan) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in({16, 16}); + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + multi::array out(extensions(in)); + auto const pln = multi::fftw::plan::forward({true, true}, in.base(), in.layout(), out.base(), out.layout()); + pln.execute(in.base(), out.base()); + BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-7 ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_power_plan_modern) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in({16, 16}); + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + multi::array out(extensions(in)); + auto const pln = multi::fftw::plan::forward({true, true}, in.base(), in.layout(), out.base(), out.layout()); + pln.execute(in.base(), out.base()); + BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_power_plan_modern_measure) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in({16, 16}); + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + multi::array out(extensions(in)); + auto const pln = multi::fftw::plan::forward({true, true}, in.base(), in.layout(), out.base(), out.layout()); + pln.execute(in.base(), out.base()); + BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_power_dft) { + using complex = std::complex; + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in({16, 16}); + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + multi::array out(extensions(in)); + multi::fftw::dft_forward({true, true}, in, out); + BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); +} + +BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place_over_ref_inplace) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array io({4, 4, 4}); + std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + auto const powerin = power(io); + + // fftw::dft_inplace(multi::array_ref(io.data(), io.extensions()), fftw::forward); + + fftw::dft_forward( + {true, true, true}, + multi::array_ref(data_elements(io), extensions(io)), + multi::array_ref(data_elements(io), extensions(io)) + ); + BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_naive_square) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + }; + multi::array const in_transpose = in.transposed(); + in = in.transposed(); + BOOST_REQUIRE( in != in_transpose ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nonpod) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + multi::array in = { + {"100.0 + 2.0*I"s, "9.0 - 1.0*I"s, "2.0 + 4.0*I"s}, + { "3.0 + 3.0*I"s, "7.0 - 4.0*I"s, "1.0 + 9.0*I"s}, + { "4.0 + 1.0*I"s, "5.0 + 3.0*I"s, "2.0 + 4.0*I"s}, + { "3.0 - 1.0*I"s, "8.0 + 7.0*I"s, "2.0 + 1.0*I"s}, + { "31.0 - 1.0*I"s, "18.0 + 7.0*I"s, "2.0 + 10.0*I"s}, + }; + multi::array const in_transpose = in.transposed(); + in = in.transposed(); + BOOST_REQUIRE( in != in_transpose ); +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nonpod_square) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + multi::array in = { + {"100.0 + 2.0*I"s, "9.0 - 1.0*I"s, "2.0 + 4.0*I"s}, + { "3.0 + 3.0*I"s, "7.0 - 4.0*I"s, "1.0 + 9.0*I"s}, + { "4.0 + 1.0*I"s, "5.0 + 3.0*I"s, "2.0 + 4.0*I"s}, + }; + multi::array const in_transpose = in.transposed(); + in = in.transposed(); + BOOST_REQUIRE( in != in_transpose ); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/core.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/memory.cpp similarity index 55% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/core.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/memory.cpp index 8e70f26b4b..9a21716c8c 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/core.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/memory.cpp @@ -1,37 +1,17 @@ // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa +// Copyright 2023 Alfredo A. Correa -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW core" +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW memory" #include -#include "../../../adaptors/fftw.hpp" -#include "../../../array.hpp" +#include +#include #include #include #include -//#include // TODO(correaa) make lib work with thrust complex - -namespace { - -namespace multi = boost::multi; -namespace fftw = multi::fftw; - -using complex = std::complex; [[maybe_unused]] complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - -template auto power(M const& elem) -> decltype(std::norm(elem)) {return std::norm(elem);} - -template -auto power(M const& array) { - return accumulate(begin(array), end(array), 0., [](auto const& alpha, auto const& omega) {return alpha + power(omega);}); -} - -struct sum_power{ - template auto operator()(A const& alpha, B const& omega) const {return alpha + power(omega);} -}; - -} // end anonymous namespace +#include class watch : private std::chrono::high_resolution_clock{ std::string label; @@ -39,17 +19,13 @@ class watch : private std::chrono::high_resolution_clock{ public: explicit watch(std::string label) : label{std::move(label)} {} - watch(watch const&) = delete; - watch(watch&&) = default; - auto operator=(watch const&) = delete; - auto operator=(watch&&) -> watch& = default; // NOLINT(fuchsia-trailing-return): - ~watch(){ - std::cerr<< label<<": "<< std::chrono::duration(now() - start).count() <<" sec"<(now() - start).count() <<" sec"< struct randomizer { - template void operator()(M&& array) const {for(auto&& elem : array) {operator()(elem);}} + template void operator()(M&& arr) const { + std::for_each(arr.begin(), arr.end(), [&self=*this](auto&& elem) {self.operator()(elem);}); + } void operator()(T& elem) const { // NOLINT(runtime/references) passing by reference static std::random_device dev; static std::mt19937 gen{dev()}; static std::normal_distribution gauss; elem = gauss(gen); @@ -57,77 +33,85 @@ template struct randomizer { }; template struct randomizer> { - template void operator()(M&& array) const {for(auto&& elem : array) {operator()(elem);}} + template void operator()(M&& arr) const { + std::for_each(arr.begin(), arr.end(), [&self=*this](auto&& elem) {self.operator()(elem);}); + } void operator()(std::complex& zee) const { // NOLINT(runtime/references) : passing by reference static std::random_device dev; static std::mt19937 gen{dev()}; static std::normal_distribution gauss; zee = std::complex(gauss(gen), gauss(gen)); } }; -struct fftw_fixture : fftw::environment { -// void setup(){} -// void teardown(){}//fftw_cleanup();} -}; - +using fftw_fixture = fftw::environment; BOOST_TEST_GLOBAL_FIXTURE( fftw_fixture ); BOOST_AUTO_TEST_CASE(fftw_3D) { using complex = std::complex; // TODO(correaa) make it work with thrust multi::array in({10, 10, 10}); - in[2][3][4] = 99.; + in[2][3][4] = 99.0; multi::fftw::dft_forward(in); - BOOST_REQUIRE(in[2][3][4] == 99.); + BOOST_REQUIRE(in[2][3][4] == 99.0); } BOOST_AUTO_TEST_CASE(fftw_1D_const) { - multi::array const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I}; + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = {1.0 + 2.0*I, 2.0 + 3.0 *I, 4.0 + 5.0*I, 5.0 + 6.0*I}; - auto fwd = multi::fftw::dft(in, fftw::forward); // Fourier[in, FourierParameters -> {1, -1}] + auto fwd = multi::fftw::dft(in, fftw::forward); // Fourier[in, FourierParameters -> {1, -1}] BOOST_REQUIRE( size(fwd) == size(in) ); - BOOST_REQUIRE( fwd[2] == -2. - 2.*I ); - BOOST_REQUIRE( in[1] == +2. + 3.*I ); + BOOST_REQUIRE( fwd[2] == -2.0 - 2.0*I ); + BOOST_REQUIRE( in[1] == +2.0 + 3.0*I ); - auto bwd = multi::fftw::dft(in, fftw::forward); // InverseFourier[in, FourierParameters -> {-1, -1}] - BOOST_REQUIRE( bwd[2] == -2. - 2.*I ); + auto bwd = multi::fftw::dft(in, fftw::forward); // InverseFourier[in, FourierParameters -> {-1, -1}] + BOOST_REQUIRE( bwd[2] == -2.0 - 2.0*I ); } BOOST_AUTO_TEST_CASE(fftw_2D_identity_2, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; multi::array out(extensions(in)); - multi::fftw::dft({false, false}, in, out, fftw::forward); // out = in; + multi::fftw::dft({false, false}, in, out, fftw::forward); // out = in; BOOST_REQUIRE( out == in ); } BOOST_AUTO_TEST_CASE(fftw_2D_identity, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; + + [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + {31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, }; auto fwd = multi::fftw::dft({}, in, fftw::forward); BOOST_REQUIRE( fwd == in ); } BOOST_AUTO_TEST_CASE(fftw_2D, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; namespace fftw = multi::fftw; auto fwd = fftw::dft_forward(in); - BOOST_TEST_REQUIRE( fwd[3][1].real() == -19.0455 ); // Fourier[in, FourierParameters -> {1, -1}][[4]][[2]] + BOOST_TEST_REQUIRE( fwd[3][1].real() == -19.0455 ); // Fourier[in, FourierParameters -> {1, -1}][[4]][[2]] BOOST_TEST_REQUIRE( fwd[3][1].imag() == - 2.22717 ); multi::array const in0 = {1. + 2.*I, 9. - 1.*I, 2. + 4.*I}; @@ -136,30 +120,34 @@ BOOST_AUTO_TEST_CASE(fftw_2D, *boost::unit_test::tolerance(0.0001)) { } BOOST_AUTO_TEST_CASE(fftw_2D_rotated, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + using multi::array; array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; using multi::fftw::dft_forward; auto fwd = dft_forward(in); BOOST_REQUIRE( dft_forward(rotated(in)[0]) - == dft_forward(array{1.+2.*I, 3.+3.*I, 4. + 1.*I, 3. - 1.*I, 31. - 1.*I}) + == dft_forward(array{1.0 + 2.0*I, 3.0 + 3.0*I, 4.0 + 1.0*I, 3.0 - 1.0*I, 31.0 - 1.0*I}) ); BOOST_REQUIRE( dft_forward(rotated(in)) == rotated(fwd) ); } BOOST_AUTO_TEST_CASE(fftw_2D_many, *boost::unit_test::tolerance(0.0001)) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; multi::array out(extensions(in)); @@ -179,33 +167,41 @@ BOOST_AUTO_TEST_CASE(fftw_2D_many, *boost::unit_test::tolerance(0.0001)) { } BOOST_AUTO_TEST_CASE(fftw_1D_const_forward) { - multi::array const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I}; + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = {1.0 + 2.0*I, 2.0 + 3.0 *I, 4.0 + 5.0*I, 5.0 + 6.0*I}; - auto fwd = multi::fftw::dft_forward(in); // Fourier[in, FourierParameters -> {1, -1}] + auto fwd = multi::fftw::dft_forward(in); // Fourier[in, FourierParameters -> {1, -1}] BOOST_REQUIRE( size(fwd) == size(in) ); - BOOST_REQUIRE( fwd[2] == -2. - 2.*I ); - BOOST_REQUIRE( in[1] == +2. + 3.*I ); + BOOST_REQUIRE( fwd[2] == -2.0 - 2.0*I ); + BOOST_REQUIRE( in[1] == +2.0 + 3.0*I ); - auto bwd = multi::fftw::dft_forward(in); // InverseFourier[in, FourierParameters -> {-1, -1}] - BOOST_REQUIRE( bwd[2] == -2. - 2.*I ); + auto bwd = multi::fftw::dft_forward(in); // InverseFourier[in, FourierParameters -> {-1, -1}] + BOOST_REQUIRE( bwd[2] == -2.0 - 2.0*I ); } BOOST_AUTO_TEST_CASE(fftw_1D_const_sign) { - multi::array const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I}; + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = {1.0 + 2.0*I, 2.0 + 3.0*I, 4.0 + 5.0*I, 5.0 + 6.0*I}; - auto const fwd = multi::fftw::dft(in, static_cast(+1)); // Fourier[in, FourierParameters -> {1, -1}] + auto const fwd = multi::fftw::dft(in, static_cast(+1)); // Fourier[in, FourierParameters -> {1, -1}] BOOST_REQUIRE( size(fwd) == size(in) ); BOOST_REQUIRE( fwd[2] == -2. - 2.*I ); } BOOST_AUTO_TEST_CASE(fftw_1D_const_copy_by_false) { - multi::array const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I}; + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array const in = {1.0 + 2.0*I, 2.0 + 3.0 *I, 4.0 + 5.0*I, 5.0 + 6.0*I}; auto const out = multi::fftw::dft({false}, in, static_cast(+1)); BOOST_REQUIRE( out == in ); } BOOST_AUTO_TEST_CASE(fftw_1D_const_copy_by_false_forward) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = {1. + 2.*I, 2. + 3. *I, 4. + 5.*I, 5. + 6.*I}; auto const out = multi::fftw::dft_forward({false}, in); @@ -213,33 +209,41 @@ BOOST_AUTO_TEST_CASE(fftw_1D_const_copy_by_false_forward) { } BOOST_AUTO_TEST_CASE(fftw_many1_from_2) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({3, 10}); randomizer{}(in); multi::array out({3, 10}); fftw::dft({false, true}, in, out, fftw::forward); multi::array out2({3, 10}); - for(int i = 0; i!=size(in); ++i) { - fftw::dft_forward(in[i], out2[i]); - } + std::transform(in.begin(), in.end(), out2.begin(), out2.begin(), [](auto const& in_elem, auto&& out2_elem) { + fftw::dft_forward(in_elem, out2_elem); + return std::forward(out2_elem); + }); BOOST_REQUIRE(out2 == out); } BOOST_AUTO_TEST_CASE(fftw_many2_from_3) { - multi::array in({3, 5, 6}); randomizer{}(in); + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in ({3, 5, 6}); randomizer{}(in); multi::array out({3, 5, 6}); fftw::dft_forward({false, true, true}, in, out); multi::array out2({3, 5, 6}); - for(int i = 0; i!=size(in); ++i) { - fftw::dft_forward(in[i], out2[i]); - } + std::transform(in.begin(), in.end(), out2.begin(), out2.begin(), [](auto const& in_elem, auto&& out2_elem) { + fftw::dft_forward(in_elem, out2_elem); + return std::forward(out2_elem); + }); BOOST_REQUIRE(out2 == out); } BOOST_AUTO_TEST_CASE(fftw_many2_from_2) { - multi::array in({5, 6}); randomizer{}(in); + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in ({5, 6}); randomizer{}(in); multi::array out({5, 6}); fftw::dft({true, true}, in, out, static_cast(FFTW_FORWARD)); @@ -249,20 +253,24 @@ BOOST_AUTO_TEST_CASE(fftw_many2_from_2) { } BOOST_AUTO_TEST_CASE(fftw_4D) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = [] { - multi::array in({10, 10, 10, 10}); in[2][3][4][5] = 99.; return in; + multi::array in({6, 6, 6, 6}); in[2][3][4][5] = 99.0; return in; }(); auto fwd = multi::fftw::dft({true, true, true, true}, in, fftw::forward); - BOOST_REQUIRE(in[2][3][4][5] == 99.); + BOOST_REQUIRE(in[2][3][4][5] == 99.0); } BOOST_AUTO_TEST_CASE(fftw_4D_many) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + auto const in = [] { - multi::array in({97, 95, 101, 10}, 0.); - in[2][3][4][5] = 99.; return in; + multi::array in({7, 8, 9, 10}, {0.0, 0.0}); + in[2][3][4][5] = 99.0; return in; }(); auto fwd = multi::fftw::dft({true, true, true, false}, in, fftw::forward); - BOOST_REQUIRE( in[2][3][4][5] == 99. ); + BOOST_REQUIRE( in[2][3][4][5] == 99.0 ); multi::array out(extensions(in)); multi::fftw::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), fftw::forward); @@ -270,6 +278,8 @@ BOOST_AUTO_TEST_CASE(fftw_4D_many) { } BOOST_AUTO_TEST_CASE(cufft_many_2D) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + auto const in = [] { multi::array ret({10, 10, 10}); std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), @@ -288,72 +298,90 @@ BOOST_AUTO_TEST_CASE(cufft_many_2D) { } BOOST_AUTO_TEST_CASE(fftw_5D) { - multi::array in({4, 5, 6, 7, 8}, 0.); + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in({4, 5, 6, 7, 8}, {0.0, 0.0}); BOOST_REQUIRE( size(in) == 4 ); - in[2][3][4][5][6] = 99.; + in[2][3][4][5][6] = 99.0; auto const out_fwd = multi::fftw::dft(in, fftw::forward); - BOOST_REQUIRE(in[2][3][4][5][6] == 99.); - BOOST_REQUIRE( power(in) - power(out_fwd)/num_elements(out_fwd) < 1e-8 ); + BOOST_REQUIRE(in[2][3][4][5][6] == 99.0); + BOOST_TEST_REQUIRE( power(in) - power(out_fwd)/num_elements(out_fwd) < 1e-5 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_plan) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code multi::array out(extensions(in)); multi::fftw::plan const pln{in, out, fftw::forward, fftw::preserve_input}; - pln(); //execute(p); //p.execute(); - BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); + pln(); + BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-7 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_plan_modern) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code multi::array out(extensions(in)); multi::fftw::plan const pln{in.layout(), out.layout(), fftw::forward, fftw::preserve_input}; - pln(in.base(), out.base()); //execute(p); //p.execute(); + pln(in.base(), out.base()); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_plan_modern_measure) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code multi::array out(extensions(in)); multi::fftw::plan const pln{in.layout(), out.layout(), fftw::forward, fftw::preserve_input}; - pln(in.base(), out.base()); //execute(p); //p.execute(); + pln(in.base(), out.base()); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_dft) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code multi::array out(extensions(in)); multi::fftw::dft_forward(in, out); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code auto out = multi::fftw::dft(in, fftw::forward); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); } BOOST_AUTO_TEST_CASE(fftw_2D_power_dft_out_default) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({16, 16}); - std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in) + num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code auto out = multi::fftw::dft(in, fftw::forward); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-8 ); } BOOST_AUTO_TEST_CASE(fftw_3D_power) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({4, 4, 4}); std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2); - multi::array out = fftw::dft(in, fftw::forward); + multi::array const out = fftw::dft(in, fftw::forward); BOOST_REQUIRE( std::abs(power(in) - power(out)/num_elements(out)) < 1e-10 ); } BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array io({4, 4, 4}); std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2); auto powerin = power(io); fftw::dft_inplace(io, fftw::forward); @@ -361,27 +389,33 @@ BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place) { } BOOST_AUTO_TEST_CASE(fftw_3D_power_in_place_over_ref_inplace) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array io({4, 4, 4}); - std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(io.data_elements(), io.data_elements() + io.num_elements(), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code auto const powerin = power(io); -// fftw::dft_inplace(multi::array_ref(io.data(), io.extensions()), fftw::forward); +// fftw::dft_inplace(multi::array_ref(io.data(), io.extensions()), fftw::forward); fftw::dft_inplace(multi::array_ref(data_elements(io), extensions(io)), fftw::forward); BOOST_REQUIRE( powerin - power(io)/num_elements(io) < 1e-10 ); } BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_ref) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in({4, 4, 4}); - std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code multi::array out({4, 4, 4}); multi::array_ref(data_elements(out), extensions(out)) = fftw::dft(multi::array_cref(data_elements(in), extensions(in)), fftw::forward); BOOST_REQUIRE( power(in) - power(out)/num_elements(out) < 1e-10 ); } BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_temporary) { - double powerin = NAN; + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + double powerin = std::numeric_limits::quiet_NaN(); auto fun = [&]() { multi::array in({4, 4, 4}); - std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code powerin = power(in); return in; }; @@ -390,21 +424,25 @@ BOOST_AUTO_TEST_CASE(fftw_3D_power_out_of_place_over_temporary) { } BOOST_AUTO_TEST_CASE(fftw_2D_transposition_square_inplace) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - {11., 12.}, - {21., 22.} + { {11.0, 0.0}, {12.0, 0.0} }, + { {21.0, 0.0}, {22.0, 0.0} } }; BOOST_REQUIRE( in[1][0] == 21. ); multi::fftw::copy(in, rotated(in)); - BOOST_TEST( in[0][1].real() == 21. ); - BOOST_TEST( in[0][1].imag() == 0. ); + BOOST_TEST( in[0][1].real() == 21.0 ); + BOOST_TEST( in[0][1].imag() == 0.0 ); } BOOST_AUTO_TEST_CASE(fftw_4D_inq_poisson) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = [] { - multi::array in({50, 100, 137, 1}); - std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code + multi::array in({5, 10, 17, 1}); + std::iota(data_elements(in), data_elements(in)+num_elements(in), 1.2); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): test code return in; }(); @@ -419,12 +457,15 @@ BOOST_AUTO_TEST_CASE(fftw_4D_inq_poisson) { BOOST_TEST( power(in) == power(out)/get<1>(sizes(out))/get<2>(sizes(out)) , boost::test_tools::tolerance(1e-10) ); } -BOOST_AUTO_TEST_CASE(fftw_1D_power) { - multi::array in(16, 0.); + +BOOST_AUTO_TEST_CASE(fftw_1D_power_c_interface) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in(16, {0.0, 0.0}); BOOST_REQUIRE( size(in) == 16 ); - std::iota(begin(in), end(in), 1.); - BOOST_TEST_REQUIRE( power(in) == 1496. ); + std::iota(begin(in), end(in), 1.0); + BOOST_TEST_REQUIRE( power(in) == 1496.0 ); multi::array out(extensions(in)); @@ -437,11 +478,11 @@ BOOST_AUTO_TEST_CASE(fftw_1D_power) { #if 0 BOOST_AUTO_TEST_CASE(fftw_2D_const_range_part1) { multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; { @@ -537,12 +578,14 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_part2) { #endif BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; { @@ -556,7 +599,7 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref) { BOOST_REQUIRE( fwd == in ); } { - multi::array fwd = multi::fftw::ref(in); + multi::array const fwd = multi::fftw::ref(in); BOOST_REQUIRE( fwd == in ); } { @@ -569,7 +612,7 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref) { BOOST_TEST_REQUIRE( fwd[2][0] == in[0][2] ); } { - multi::array fwd({3, 5}, 0.); + multi::array fwd({3, 5}, {0.0, 0.0}); fwd() = multi::fftw::ref(in.transposed()); BOOST_REQUIRE( fwd .size() == 3 ); BOOST_REQUIRE( (~fwd).size() == 5 ); @@ -581,14 +624,15 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_part2) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array const in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; - { auto in2 = + multi::fftw::ref(in); BOOST_REQUIRE( in2 == in ); @@ -609,40 +653,28 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_part2) { multi::array tt({3, 5}); multi::fftw::dft({}, in, tt.transposed(), multi::fftw::forward); - multi::array in2t({3, 5}, 99.); + multi::array in2t({3, 5}, {99.0, 0.0}); in2t() = multi::fftw::ref(in).transposed(); { - auto xs = std::get<0>(in.extensions()); // TODO(correaa) use structured bindings - auto ys = std::get<1>(in.extensions()); - for(auto const ex : xs) { - for(auto const wye : ys) { - std::cout<< in[ex][wye] <<'\t'; - } + std::for_each(in.begin(), in.end(), [](auto const& row) { + std::copy(row.begin(), row.end(), std::ostream_iterator(std::cout, "\t")); std::cout<< std::endl; - } + }); std::cout<< std::endl; } { - auto xs = std::get<0>(in2t.extensions()); - auto ys = std::get<1>(in2t.extensions()); - for(auto const ex : xs) { - for(auto const why : ys) { - std::cout<< in2t[ex][why] <<'\t'; - } + std::for_each(in2t.begin(), in2t.end(), [](auto const& row) { + std::copy(row.begin(), row.end(), std::ostream_iterator(std::cout, "\t")); std::cout<< std::endl; - } + }); std::cout<< std::endl; } { - auto xs = std::get<0>(tt.extensions()); - auto ys = std::get<1>(tt.extensions()); - for(auto ex : xs) { - for(auto wye : ys) { - std::cout<< tt[ex][wye] <<'\t'; - } + std::for_each(tt.begin(), tt.end(), [](auto const& row) { + std::copy(row.begin(), row.end(), std::ostream_iterator(std::cout, "\t")); std::cout<< std::endl; - } + }); std::cout<< std::endl; } @@ -654,7 +686,7 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_part2) { BOOST_REQUIRE( in2 == in.transposed() ); } { - multi::array fwd({3, 5}, 0.); + multi::array fwd({3, 5}, {0.0, 0.0}); fwd() = multi::fftw::ref(in).transposed(); BOOST_REQUIRE( fwd .size() == 3 ); BOOST_REQUIRE( (~fwd).size() == 5 ); @@ -666,14 +698,16 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_part2) { } BOOST_AUTO_TEST_CASE(fftw_4D_many_new_interface) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + auto const in = [] { - multi::array in({97, 95, 101, 10}, 0.); - in[2][3][4][5] = 99.; + multi::array in({17, 15, 10, 8}, {0.0, 0.0}); + in[2][3][4][5] = 99.0; return in; }(); { auto fwd = + multi::fftw::ref(in)(fftw::forward, fftw::forward, fftw::forward, fftw::none); - BOOST_REQUIRE( in[2][3][4][5] == 99. ); + BOOST_REQUIRE( in[2][3][4][5] == 99.0 ); multi::array out(extensions(in)); multi::fftw::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), fftw::forward); @@ -681,7 +715,7 @@ BOOST_AUTO_TEST_CASE(fftw_4D_many_new_interface) { } { auto fwd = + multi::fftw::ref(in)(fftw::forward, fftw::forward, fftw::forward); - BOOST_REQUIRE( in[2][3][4][5] == 99. ); + BOOST_REQUIRE( in[2][3][4][5] == 99.0 ); multi::array out(extensions(in)); multi::fftw::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), fftw::forward); @@ -690,23 +724,27 @@ BOOST_AUTO_TEST_CASE(fftw_4D_many_new_interface) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_naive) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; multi::array const in_transpose = in.transposed(); in = in.transposed(); -// BOOST_REQUIRE( in != in_transpose ); +// BOOST_REQUIRE( in != in_transpose ); } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_naive_square) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I} }; multi::array const in_transpose = in.transposed(); in = in.transposed(); @@ -714,27 +752,32 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_naive_square) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; multi::array const in_transpose = in.transposed(); auto* in_base = in.base(); in = multi::fftw::ref(in).transposed(); + BOOST_REQUIRE( in == in_transpose ); BOOST_REQUIRE( in_base == in.base() ); } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nested) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; multi::array const in_transpose = in.transposed(); auto* in_base = in.base(); @@ -744,10 +787,12 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nested) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_square) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I} }; multi::array const in_transpose = in.transposed(); auto* in_base = in.base(); @@ -757,10 +802,12 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_square) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_square_nested) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, + {100.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I} }; multi::array const in_transpose = in.transposed(); auto* in_base = in.base(); @@ -770,42 +817,26 @@ BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_square_nested) { } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nonpod) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s multi::array in = { - { "100. + 2.*I", "9. - 1.*I", "2. + 4.*I"}, - { "3. + 3.*I", "7. - 4.*I", "1. + 9.*I"}, - { "4. + 1.*I", "5. + 3.*I", "2. + 4.*I"}, - { "3. - 1.*I", "8. + 7.*I", "2. + 1.*I"}, - { "31. - 1.*I", "18. + 7.*I", "2. + 10.*I"} + { "100.0 + 2.0*I"s, "9.0 - 1.0*I"s, "2.0 + 4.0*I"s}, + { "3.0 + 3.0*I"s, "7.0 - 4.0*I"s, "1.0 + 9.0*I"s}, + { "4.0 + 1.0*I"s, "5.0 + 3.0*I"s, "2.0 + 4.0*I"s}, + { "3.0 - 1.0*I"s, "8.0 + 7.0*I"s, "2.0 + 1.0*I"s}, + { "31.0 - 1.0*I"s, "18.0 + 7.0*I"s, "2.0 + 10.0*I"s}, }; multi::array const in_transpose = in.transposed(); in = in.transposed(); -// BOOST_REQUIRE( in != in_transpose ); +// BOOST_REQUIRE( in != in_transpose ); } BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_transposed_nonpod_square) { - #if not defined(__circle_build__) // circle 170 crashes https://github.com/seanbaxter/circle/issues/114 multi::array in = { - { "100. + 2.*I", "9. - 1.*I", "2. + 4.*I"}, - { "3. + 3.*I", "7. - 4.*I", "1. + 9.*I"}, - { "4. + 1.*I", "5. + 3.*I", "2. + 4.*I"} + { "100.0 + 2.0*I", "9.0 - 1.0*I", "2.0 + 4.0*I"}, // std::string NOLINT(fuchsia-default-arguments-calls) + { "3.0 + 3.0*I", "7.0 - 4.0*I", "1.0 + 9.0*I"}, // std::string NOLINT(fuchsia-default-arguments-calls) + { "4.0 + 1.0*I", "5.0 + 3.0*I", "2.0 + 4.0*I"}, // std::string NOLINT(fuchsia-default-arguments-calls) }; multi::array const in_transpose = in.transposed(); in = in.transposed(); BOOST_REQUIRE( in != in_transpose ); - #endif -} - -//BOOST_AUTO_TEST_CASE(fftw_2D_const_range_ref_move) { -// multi::array in = { -// { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, -// { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, -// { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, -// { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, -// { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} -// }; -// multi::array const in_transpose = in.transposed(); -// auto* in_base = in.base(); -// multi::array out = multi::fftw::ref(std::move(in)).operator multi::array(); -// BOOST_REQUIRE( out == in_transpose ); -// BOOST_REQUIRE( in_base == out.base() ); -//} +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/moved.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/moved.cpp new file mode 100644 index 0000000000..206f266766 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/moved.cpp @@ -0,0 +1,301 @@ +// Copyright 2020-2024 Alfredo A. Correa + +#include + +#include +#include + +#include // for std::transform_reduce + +namespace multi = boost::multi; + +template auto power(M const& array) { + return std::accumulate(array.elements().begin(), array.elements().end(), 0.0, [](auto e1, auto e2) {return std::move(e1) + std::norm(e2);}); +// return std::transform_reduce(array.elements().begin(), array.elements().end(), 0.0, std::plus<>{}, [](auto zee) { return std::norm(zee); }); +} + +using fftw_fixture = multi::fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_fft_move) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// multi::array in = { +// {100.0 + 2.0 * I, 9. - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7. - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5. + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8. + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18. + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// // auto* const in_base = in.base(); + +// // multi::array in2(in.extensions()); + +// // in2 = multi::fftw::fft(std::move(in)); + +// // BOOST_REQUIRE( power(in2)/num_elements(in2) - power(in_copy) < 1e-8 ); +// // BOOST_REQUIRE( in2.base() == in_base ); +// // BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// } + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_move) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + BOOST_REQUIRE( in[1][1] == 7.0 - 4.0*I ); + + auto const in_copy = in; + auto* const in_base = in.base(); + BOOST_REQUIRE( in_base == in.base() ); + + // in = multi::fftw::ref(in); + + // BOOST_REQUIRE( in == in_copy ); + // BOOST_REQUIRE( in_base == in.base() ); // prove no allocation +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + BOOST_REQUIRE( in[1][1] == 7.0 - 4.0*I ); + + auto const in_copy = in; + auto* const in_base = in.base(); + BOOST_REQUIRE( in_base == in.base() ); + BOOST_REQUIRE( in.size() == 5 ); + +// in = multi::fftw::ref(in).transposed(); + +// BOOST_REQUIRE( in.size() == 3 ); +// BOOST_REQUIRE( in == in_copy.transposed() ); // prove correctness +// BOOST_REQUIRE( in_base == in.base() ); // prove no allocation +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_naive) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + BOOST_REQUIRE( in[1][1] == 7.0 - 4.0*I ); + + auto const in_copy = in; + auto* const in_base = in.base(); + BOOST_REQUIRE( in_base == in.base() ); + BOOST_REQUIRE( in.size() == 5 ); + + in = in.transposed(); // this is UB + + BOOST_REQUIRE( in.size() == 3 ); + // BOOST_REQUIRE( in != in_copy.transposed() ); // prove it is incorrect + BOOST_REQUIRE( in_base == in.base() ); // prove no allocation +} + +BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_naive_copy) { + using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + + multi::array in = { + {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, + { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, + { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, + { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, + { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, + }; + BOOST_REQUIRE( in[1][1] == 7.0 - 4.0*I ); + + auto const in_copy = in; + auto* const in_base = in.base(); + BOOST_REQUIRE( in_base == in.base() ); + BOOST_REQUIRE( in.size() == 5 ); + + in = +in.transposed(); + + BOOST_REQUIRE( in.size() == 3 ); + BOOST_REQUIRE( in == in_copy.transposed() ); // prove correctness + BOOST_REQUIRE( in_base != in.base() ); // prove no allocation +} + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_fft_copy) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2 = multi::fftw::fft(in); + +// BOOST_REQUIRE( power(in2)/num_elements(in2) - power(in_copy) < 1e-8 ); +// BOOST_REQUIRE( in2.base() != in_base ); +// BOOST_REQUIRE( not in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_copyconstruct) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2 = multi::fftw::ref(in).transposed(); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// BOOST_REQUIRE( in2.base() != in_base ); +// BOOST_REQUIRE( in .base() == in_base ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveconstruct) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2 = multi::fftw::ref(std::move(in)).transposed(); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// BOOST_REQUIRE( in2.base() == in_base ); +// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveconstruct_implicit) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// #if not defined(__INTEL_COMPILER) // TODO(correaa) problem with icpc 2022.3.0.8751 +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// auto in2 = +multi::fftw::ref(std::move(in)).transposed(); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// #if not defined(__NVCOMPILER) // these tests fail with nvc++ 22.9, 23.1 +// BOOST_REQUIRE( in2.base() == in_base ); +// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// #endif +// #endif +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveassign_from_temp) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// #if not defined(__INTEL_COMPILER) // TODO(correaa) problem with icpc 2022.3.0.8751 +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2; +// in2 = static_cast>(multi::fftw::ref(std::move(in)).transposed()); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// #if not defined(__NVCOMPILER) // these tests fail with nvc++ 22.9, 23.1 +// BOOST_REQUIRE( in2.base() == in_base ); +// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// #endif +// #endif +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveassign) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// #if not defined(__INTEL_COMPILER) // TODO(correaa) problem with icpc 2022.3.0.8751 +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2; +// in2 = multi::fftw::ref(std::move(in)).transposed(); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// #if not defined(__NVCOMPILER) // these tests fail with nvc++ 22.9, 23.1 +// BOOST_REQUIRE( in2.base() == in_base ); +// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// #endif +// #endif +// } + +// BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_fftwmove) { +// using complex = std::complex; [[maybe_unused]] auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imag unit + +// #if not defined(__INTEL_COMPILER) // TODO(correaa) problem with icpc 2022.3.0.8751 +// multi::array in = { +// {100.0 + 2.0 * I, 9.0 - 1.0 * I, 2.0 + 4.0 * I}, +// { 3.0 + 3.0 * I, 7.0 - 4.0 * I, 1.0 + 9.0 * I}, +// { 4.0 + 1.0 * I, 5.0 + 3.0 * I, 2.0 + 4.0 * I}, +// { 3.0 - 1.0 * I, 8.0 + 7.0 * I, 2.0 + 1.0 * I}, +// { 31.0 - 1.0 * I, 18.0 + 7.0 * I, 2.0 + 10.0 * I}, +// }; + +// auto const in_copy = in; +// auto* const in_base = in.base(); + +// multi::array in2; +// in2 = multi::fftw::move(in).transposed(); + +// BOOST_REQUIRE( in2 == in_copy.transposed() ); +// #if not defined(__NVCOMPILER) // these tests fail with nvc++ 22.9, 23.1 +// BOOST_REQUIRE( in2.base() == in_base ); +// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing +// #endif +// #endif +// } diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/shift.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/shift.cpp new file mode 100644 index 0000000000..fa3ff6d05b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/shift.cpp @@ -0,0 +1,79 @@ +// Copyright 2022-2024 Alfredo A. Correa + +#include + +#include "../../fftw.hpp" + +#include // NOLINT(build/c++11) +#include + +template +class n_random_complex { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + std::size_t n_ = 0; + mutable std::mt19937 gen_{std::random_device{}()}; // NOLINT(whitespace/braces) cpplint 1.6 bug + mutable std::uniform_real_distribution<> dist_{-1.0, 1.0}; + + public: + n_random_complex(n_random_complex const&) = delete; + explicit n_random_complex(std::size_t n) : n_{n} {} + + class iterator : public boost::multi::random_access_iterator, std::complex, void> { + n_random_complex const* ptr_; + std::size_t n_; + + public: // NOLINT(whitespace/indent) cpplint 1.6 bug + iterator(n_random_complex const* ptr, std::size_t n) : ptr_{ptr}, n_{n} {} + + auto operator*() const { return std::complex{ptr_->dist_(ptr_->gen_), ptr_->dist_(ptr_->gen_)}; } + auto operator++() -> iterator& { + ++n_; + return *this; + } + + friend auto operator==(iterator const& self, iterator const& other) { return self.n_ == other.n_; } + friend auto operator!=(iterator const& self, iterator const& other) { return self.n_ != other.n_; } + + auto operator-(iterator const& other) const { return n_ - other.n_; } + + auto operator+(std::ptrdiff_t delta) const { return iterator{ptr_, n_ + delta}; } // mmm, needed by culang? + }; + + auto begin() const { return iterator{this, 0}; } + auto end() const { return iterator{this, n_}; } + + auto size() const { return n_; } +}; + +namespace multi = boost::multi; +namespace fftw = multi::fftw; + +using fftw_fixture = fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +BOOST_AUTO_TEST_CASE(fftw_shift) { + class watch : std::chrono::steady_clock { + time_point start_ = now(); + + public: // NOLINT(whitespace/indent) cpplint 1.6 bug + auto elapsed_sec() const { return std::chrono::duration(now() - start_).count(); } + }; + + multi::array, 1> const arr = n_random_complex(19586); + BOOST_REQUIRE(arr.size() == 19586); + multi::array, 1> res(arr.extensions()); + BOOST_REQUIRE(res.size() == 19586); + + auto fdft = fftw::plan::forward({true}, arr.base(), arr.layout(), res.base(), res.layout()); + // fftw::plan fdft({true}, arr.layout(), res.layout(), multi::fftw::forward); + + [&, unnamed = watch{}] { + auto const repeat = 40; + std::for_each( + multi::extension_t{0, repeat}.begin(), multi::extension_t{0, repeat}.end(), [&fdft, &arr, &res](auto /*idx*/) { + fdft.execute(arr.base(), res.base()); + std::rotate(res.begin(), res.begin() + res.size() / 2, res.end()); + } + ); + BOOST_TEST_MESSAGE("FFTW shift " << unnamed.elapsed_sec() / repeat << " sec"); // prints 0.000882224 sec + }(); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/so_shift.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/so_shift.cpp new file mode 100644 index 0000000000..55babc7663 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/so_shift.cpp @@ -0,0 +1,41 @@ +// Copyright 2022-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // includes fftw3.hpp + +#include // for std::rotate +#include +#include +#include // for std::iota + +namespace multi = boost::multi; + +auto main() -> int { + using complex = std::complex; + + // input array + auto const x = std::invoke([] { // NOLINT(readability-identifier-length) + multi::array ret(8); + // fill the first array with some numbers + std::iota(ret.begin(), ret.end(), 1.0); + return ret; + }); + + // output array + // multi::array y(x.size()); // NOLINT(readability-identifier-length) + // compute the FFT of x and store results in y + // auto y = +multi::fftw::dft_forward(x); // NOLINT(readability-identifier-length) + + // display the results + // std::cout << "FFT =" << std::endl; + // std::copy(y.begin(), y.end(), std::ostream_iterator(std::cout, "\n")); + + // "shifted" results + // std::rotate(y.begin(), y.begin() + y.size() / 2 + y.size() % 2, y.end()); + + // std::cout << "FFT shifted =" << std::endl; + // std::copy(y.begin(), y.end(), std::ostream_iterator(std::cout, "\n")); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/thrust.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/thrust.cpp similarity index 51% rename from external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/thrust.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/thrust.cpp index bcdfc0f380..2e142915c9 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/thrust.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/thrust.cpp @@ -1,10 +1,8 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -$CXX $0 -o $0x -lfftw3 -lboost_unit_test_framework -ftemplate-backtrace-limit=0&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 +// © Alfredo A. Correa 2020-2024 #define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW adaptor (cpu) with thrust complex" #define BOOST_TEST_DYN_LINK + #include #include "../../fftw.hpp" @@ -14,16 +12,15 @@ namespace multi = boost::multi; -BOOST_AUTO_TEST_CASE(fftw_2D_identity){ - - using complex = thrust::complex; complex const I{0, 1}; +BOOST_AUTO_TEST_CASE(const fftw_2D_identity){ + using complex = thrust::complex; complex const I{0.0, 1.0}; multi::array const in = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} + { 1.0 + 2.0*I, 9.0 - 1.0*I, 2.0 + 4.0*I}, + { 3.0 + 3.0*I, 7.0 - 4.0*I, 1.0 + 9.0*I}, + { 4.0 + 1.0*I, 5.0 + 3.0*I, 2.0 + 4.0*I}, + { 3.0 - 1.0*I, 8.0 + 7.0*I, 2.0 + 1.0*I}, + { 31.0 - 1.0*I, 18.0 + 7.0*I, 2.0 + 10.0*I} }; auto fwd = multi::fftw::dft({true, true}, in, multi::fftw::forward); @@ -32,6 +29,4 @@ BOOST_AUTO_TEST_CASE(fftw_2D_identity){ auto fwd_t = multi::fftw::dft({true, true}, in_t, multi::fftw::forward); BOOST_REQUIRE( fwd == fwd_t ); - } - diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose.cpp new file mode 100644 index 0000000000..d40d8a15b2 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose.cpp @@ -0,0 +1,59 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +#include // NOLINT(build/c++11) +#include +#include +#include + +namespace multi = boost::multi; + +class watch // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) +: private std::chrono::high_resolution_clock { + std::string label_; + time_point start_ = now(); + + public: + explicit watch(std::string label) : label_{std::move(label)} {} // NOLINT(fuchsia-default-arguments-calls) + + watch(watch const&) = delete; + + auto operator=(watch const&) -> watch& = delete; + + auto elapsed_sec() const { return std::chrono::duration(now() - start_).count(); } + ~watch() { std::cerr << label_ << ": " << elapsed_sec() << " sec" << '\n'; } // NOLINT(cpp:S4963) +}; + +using fftw_fixture = multi::fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +BOOST_AUTO_TEST_CASE(fftw_transpose) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + using complex = std::complex; + + auto const in = std::invoke([] { + multi::array ret({101, 99}); // ({1013, 997}); // ({10137, 9973}); + std::generate( + ret.data_elements(), ret.data_elements() + ret.num_elements(), + [eng = std::default_random_engine{std::random_device{}()}, uniform_01 = std::uniform_real_distribution<>{}]() mutable { + return complex{uniform_01(eng), uniform_01(eng)}; + } + ); + return ret; + }); + + multi::array out = in; + + watch const unnamed{"transposition with aux %ws wall, CPU (%p%)\n"s}; + + multi::array aux = ~out; + + out = std::move(aux); + BOOST_REQUIRE( out[35][79] == in[79][35] ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose_square.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose_square.cpp new file mode 100644 index 0000000000..cf445534c8 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/fftw/test/transpose_square.cpp @@ -0,0 +1,97 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +#include + +#include + +#include // NOLINT(build/c++11) +#include +#include +#include + +namespace multi = boost::multi; + +using fftw_fixture = multi::fftw::environment; +BOOST_TEST_GLOBAL_FIXTURE(fftw_fixture); + +using complex = std::complex; + +class watch : private std::chrono::high_resolution_clock { // NOSONAR(cpp:S4963) this class will report timing on destruction + std::string label_; + time_point start_ = now(); + + public: + explicit watch(std::string label) : label_{std::move(label)} {} + + watch(watch const&) = delete; + watch(watch&&) = delete; + + auto operator=(watch const&) = delete; + auto operator=(watch&&) = delete; + + auto elapsed_sec() const { return std::chrono::duration(now() - start_).count(); } + ~watch() { std::cerr << label_ << ": " << elapsed_sec() << " sec" << '\n'; } +}; + +BOOST_AUTO_TEST_CASE(fftw_transpose) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + multi::fftw::initialize_threads(); + { + auto const in = [] { + // multi::array ret({819, 819}); + multi::array ret({81, 81}); + std::generate( + ret.data_elements(), ret.data_elements() + ret.num_elements(), + [eng = std::default_random_engine{std::random_device{}()}, + uniform_01 = std::uniform_real_distribution<>{}]() mutable { + return complex{uniform_01(eng), uniform_01(eng)}; + } + ); + // std::cout<<"memory size "<< ret.num_elements()*sizeof(complex)/1e6 <<" MB\n"; + return ret; + }(); + { + multi::array out = in; + multi::array aux(extensions(out)); + { + watch const unnamed{"auxiliary copy %ws wall, CPU (%p%)\n"s}; + aux = ~out; + out = std::move(aux); + BOOST_REQUIRE( out[35][79] == in[79][35] ); + } + BOOST_REQUIRE( out == ~in ); + } + { + multi::array out = in; + { + watch const unnamed{"transposition with loop %ws wall, CPU (%p%)\n"s}; + std::for_each(extension(out).begin(), extension(out).end(), [&out](auto idx) { + auto ext = multi::extension_t(0L, idx); + std::for_each(ext.begin(), ext.end(), [&out, idx](auto jdx) { + std::swap(out[idx][jdx], out[jdx][idx]); + }); + }); + BOOST_REQUIRE( out[35][79] == in[79][35] ); + } + BOOST_REQUIRE( out == ~in ); + } + { + multi::array out = in; + { + watch const unnamed{"transposition with loop 2 %ws wall, CPU (%p%)\n"s}; + std::for_each(extension(out).begin(), extension(out).end(), [&out](auto idx) { + auto ext = multi::extension_t(idx + 1, out.size()); + std::for_each(ext.begin(), ext.end(), [&out, idx](auto jdx) { + std::swap(out[idx][jdx], out[jdx][idx]); + }); + }); + BOOST_REQUIRE( out[35][79] == in[79][35] ); + } + BOOST_REQUIRE( out == ~in ); + } + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft.hpp new file mode 100644 index 0000000000..07e63177d0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft.hpp @@ -0,0 +1,64 @@ +// Copyright 2020-2024 Alfredo A. Correa + +#ifndef BOOST_MULTI_ADAPTORS_HIPFFT_HPP +#define BOOST_MULTI_ADAPTORS_HIPFFT_HPP + +#include +#include + +using cudaError_t = hipError_t; + +constexpr static auto const& cudaDeviceReset = hipDeviceReset; +constexpr static auto const& cudaDeviceSynchronize = hipDeviceSynchronize; +constexpr static auto const& cudaSuccess = hipSuccess; + +#define cu2hip_fft(TypeleafnamE) using cufft ## TypeleafnamE = hipfft ## TypeleafnamE + cu2hip_fft(Handle); + cu2hip_fft(DoubleComplex); + cu2hip_fft(Result); +#undef cu2hip_fft + +#define cu2hip_fft(FunctionleafnamE) constexpr static auto const& cufft ## FunctionleafnamE = hipfft ## FunctionleafnamE + cu2hip_fft(Create); + cu2hip_fft(Destroy); + cu2hip_fft(GetSize); + cu2hip_fft(ExecZ2Z); + cu2hip_fft(SetAutoAllocation); + cu2hip_fft(SetWorkArea); + cu2hip_fft(PlanMany); +#undef cu2hip_fft + +#define CU2HIPFFT_(NamE) constexpr static auto const& CUFFT_ ## NamE = HIPFFT_ ## NamE + +CU2HIPFFT_(ALLOC_FAILED); +CU2HIPFFT_(BACKWARD); + +constexpr static auto const& CUFFT_INVERSE = HIPFFT_BACKWARD; + +CU2HIPFFT_(EXEC_FAILED); +CU2HIPFFT_(FORWARD); +CU2HIPFFT_(INCOMPLETE_PARAMETER_LIST); +CU2HIPFFT_(INTERNAL_ERROR); +CU2HIPFFT_(INVALID_DEVICE); +CU2HIPFFT_(INVALID_SIZE); +CU2HIPFFT_(INVALID_TYPE); +CU2HIPFFT_(INVALID_VALUE); +CU2HIPFFT_(INVALID_PLAN); +CU2HIPFFT_(NO_WORKSPACE); +CU2HIPFFT_(NOT_IMPLEMENTED); +CU2HIPFFT_(NOT_SUPPORTED); +CU2HIPFFT_(UNALIGNED_DATA); +CU2HIPFFT_(PARSE_ERROR); +CU2HIPFFT_(SETUP_FAILED); +CU2HIPFFT_(SUCCESS); +CU2HIPFFT_(Z2Z); + +#undef CU2HIPFFT_ + +#include "cufft.hpp" + +// namespace boost::multi{ +// namespace cufft = hipfft; +// } + +#endif // BOOST_MULTI_ADAPTORS_HIPFFT_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/CMakeLists.txt new file mode 100644 index 0000000000..ecd6d90e6a --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.11) + +project( + boost-multi-adaptors-hipfft + VERSION 0.1 + LANGUAGES CXX +) + +enable_language(HIP) +find_package(hipFFT REQUIRED) +# find_package(rocthrust REQUIRED) + +# if(ENABLE_CUDA OR DEFINED CXXCUDA) +# enable_language(CUDA) +# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda") +# endif() + +# find_package(CUDA QUIET) +#include_directories(${CUDA_INCLUDE_DIRS}) +#link_libraries(${CUDA_CUFFT_LIBRARIES}) + +# include_directories(${CMAKE_BINARY_DIR}) + +add_subdirectory(test) diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/CMakeLists.txt new file mode 100644 index 0000000000..15185d7076 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.11) + +project( + boost-multi-adaptors-hipfft-test + VERSION 0.1 + LANGUAGES CXX +) + +enable_language(HIP) +find_package(hipFFT REQUIRED) + +# FFTW has to go before blas to avoid unscrupulous (i.e. MKL) blas implementations that include FFTW and don't implement it properly +find_package(PkgConfig REQUIRED) +pkg_search_module( + FFTW + REQUIRED + fftw3 + IMPORTED_TARGET +) +include_directories(PkgConfig::FFTW) +link_libraries(PkgConfig::FFTW) + +enable_testing() +include(CTest) + +# include_directories(${CMAKE_BINARY_DIR}) + +find_package(Boost COMPONENTS unit_test_framework) + +add_executable(hipfft.cpp.x hipfft.cpp) +set_source_files_properties(hipfft.cpp PROPERTIES LANGUAGE HIP) + +# target_link_libraries(${TEST_EXE} PRIVATE multi) +target_link_libraries(hipfft.cpp.x PRIVATE hip::hipfft Boost::unit_test_framework) + +add_test(NAME hipfft.cpp.x COMMAND $) + diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/hipfft.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/hipfft.cpp new file mode 100644 index 0000000000..b76cb6a1f0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipfft/test/hipfft.cpp @@ -0,0 +1,7 @@ +namespace boost::multi::hipfft{} + +// namespace boost::multi{ +// namespace cufft = hipfft; +// } + +#include "../../cufft/test/cufft.cpp" diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/CMakeLists.txt new file mode 100644 index 0000000000..f0c7989208 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/CMakeLists.txt @@ -0,0 +1,74 @@ +cmake_minimum_required(VERSION 3.24) + +project(boost-multi-adaptor-thrust-hip LANGUAGES CXX HIP) + +find_package(Boost REQUIRED COMPONENTS timer unit_test_framework) +find_package(rocthrust REQUIRED) + +include(CMakePrintHelpers) + +cmake_print_properties(TARGETS roc::rocthrust PROPERTIES INTERFACE_LINK_LIBRARIES INTERFACE_INCLUDE_DIRECTORIES) + +enable_testing() + +include(CTest) + +# set(TEST_SRCS +# # array.cu +# # memory_resource.cu +# # set_identity_kernel.cu +# # speed.cu +# # speed_algo.cu +# # universal.cu +# vector.cpp +# ) + +# add_executable(vector.cu.nvidia.x vector.cu) +# set_source_files_properties(vector.cu PROPERTIES LANGUAGE CUDA) + +# target_link_libraries(vector.cu.nvidia.x PRIVATE Boost::unit_test_framework) +# add_test(NAME vector.cu.nvidia.x COMMAND vector.cu.nvidia.x) + +add_executable(vector.hip.amd.x vector.hip) +set_source_files_properties(vector.hip PROPERTIES LANGUAGE HIP) +target_link_libraries(vector.hip.amd.x PRIVATE Boost::unit_test_framework roc::rocthrust) +target_compile_definitions(vector.hip.amd.x PRIVATE BOOST_TEST_DYN_LINK=1) + +add_test(NAME vector.hip.amd.x COMMAND $) + +add_executable(array.hip.amd.x array.hip) +set_source_files_properties(array.hip PROPERTIES LANGUAGE HIP) +target_link_libraries(array.hip.amd.x PRIVATE Boost::unit_test_framework Boost::timer multi roc::rocthrust) +target_compile_definitions(array.hip.amd.x PRIVATE BOOST_TEST_DYN_LINK=1) + +add_test(NAME array.hip.amd.x COMMAND $) + +add_executable(speed.hip.amd.x speed.hip) +set_source_files_properties(speed.hip PROPERTIES LANGUAGE HIP) +target_link_libraries(speed.hip.amd.x PRIVATE Boost::unit_test_framework Boost::timer multi roc::rocthrust) +target_compile_definitions(speed.hip.amd.x PRIVATE BOOST_TEST_DYN_LINK=1) +target_compile_definitions(speed.hip.amd.x PRIVATE BOOST_TEST_MODULE="C++ Unit Tests for Multi CUDA thrust universal copy and assignment") + +add_test(NAME speed.hip.amd.x COMMAND $) + +# foreach(TEST_FILE ${TEST_SRCS}) +# # set(TEST_EXE "${TEST_FILE}") +# add_executable(vector ${TEST_FILE}) +# # if(ENABLE_CUDA OR DEFINED CXXCUDA) +# # set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE HIP) +# # endif() + +# # target_include_directories(${TEST_EXE} PRIVATE ../../../../../include) + +# target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") +# target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) +# target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) +# target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) +# target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + +# # if(ENABLE_CUDA) +# # add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) +# # endif() +# endforeach() + +# set_tests_properties(speed.cu.x PROPERTIES RUN_SERIAL TRUE) diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/array.hip b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/array.hip new file mode 100644 index 0000000000..e8a1151bfa --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/array.hip @@ -0,0 +1,746 @@ +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi hip thrust" +#include +#include +#include + +#include +// #include +// #include + +#include +#include +#include +#include + +#include + +#include +#include + +#include + +namespace multi = boost::multi; + +// #ifdef __NVCC__ +// template<> +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = true; +// template<> +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = true; +// template<> +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = true; +// template<> +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = true; +// #else // vvv nvcc (12.1?) doesn't support this kind of customization: "error: expected initializer before ‘<’" +// template +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = std::is_trivially_default_constructible::value; +// template +// inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = std::is_trivially_default_constructible::value; +// #endif + +namespace { + +template using test_allocator = +// multi ::memory::hip::allocator +// multi ::memory::hip::managed::allocator//, std::integral_constant > +// multi ::memory::hip::cached::allocator > +// multi::thrust::hip::managed_allocator + thrust::hip::allocator +; + +} + +using types_list = boost::mpl::list< + // char, + double, + // std::complex, + thrust::complex +>; + +BOOST_AUTO_TEST_CASE(dummy_test) {} + +BOOST_AUTO_TEST_CASE(test_univ_alloc) { + multi::array > Dev({128, 128}); + *raw_pointer_cast(Dev.base()) = 99.0; +} + +// BOOST_AUTO_TEST_CASE(mtc_universal_array) { +// multi::thrust::hip::universal_array Dev({128, 128}); +// *raw_pointer_cast(Dev.base()) = 99.0; +// } + +// BOOST_AUTO_TEST_CASE(mtc_universal_coloncolon_array) { +// multi::thrust::hip::universal::array Dev({128, 128}); +// *raw_pointer_cast(Dev.base()) = 99.0; +// } + +// BOOST_AUTO_TEST_CASE(test_alloc) { +// multi::array > Dev({128, 128}); +// // *raw_pointer_cast(Dev.base()) = 99.0; // segmentation fault (correct behavior) +// } + +#if defined(NDEBUG) +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_copy_1D_issue123, T, types_list) { // BOOST_AUTO_TEST_CASE(fdfdfdsfds) { using T = char; + static_assert( multi::is_trivially_default_constructible{}, "!"); + static_assert( std::is_trivially_copy_constructible{} , "!"); + static_assert( std::is_trivially_assignable{} , "!"); + + + multi::array> Devc(multi::extensions_t<1>{10240*10240}); + multi::array> Dev2(multi::extensions_t<1>{10240*10240}); + multi::array Host(multi::extensions_t<1>{10240*10240}); std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2(multi::extensions_t<1>{10240*10240}); + + std::cout<<"| 1D `"<< typeid(T).name() <<"` total data size: "<< Host.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|"< devc | "<< Host.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Host.sliced(0, 10240*10240/2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Host.strided(2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Host.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Host.sliced(0, 10240*10240/2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Host.strided(2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.sliced(0, 10240*10240/2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.strided(2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Hos2.sliced(0, 10240*10240/2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< host | "<< Hos2.strided(2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"<({10240, 10240}); + + std::cout<<"| 2D `"<< typeid(T).name() <<"` max data size "<< exts.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|"<> Devc(exts); + multi::array> Dev2(exts); + + multi::array Host(exts); std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2(exts); + + { + Devc({0, 5120},{0, 5120}) = Host({0, 5120},{0, 5120}); // 0.002859s + } + { + boost::timer::auto_cpu_timer t{""}; + Devc = Host; + std::cout<<"| contiguous host to devc | "<< Host.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"<({1024, 1024, 100}); + + std::cout<<"| 3D `"<< typeid(T).name() <<"` max data size "<< exts.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|"<> Devc(exts); + multi::array> Dev2(exts); + multi::array Host(exts); std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2(exts); + + { + Devc({0, 512}, {0, 512}, {0, 512}) = Host({0, 512}, {0, 512}, {0, 512}); // 0.002859s + } + { + boost::timer::auto_cpu_timer t{""}; + Devc = Host; + std::cout<<"| contiguous host to devc | "<< Host.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << " GB/sec |"< devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"< devc | "<< Dev2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |"<> Devc(multi::extensions_t<1>{10240*10240}); + multi::array> Dev2(multi::extensions_t<1>{10240*10240}); + multi::array Host(multi::extensions_t<1>{10240*10240}); + std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2(multi::extensions_t<1>{10240*10240}); + + std::cout<<"| 1D `"<< typeid(T).name() <<"` total data size: "<< Host.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|\n"; + + Devc = Host; + Dev2 = Host; + Hos2 = Host; + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE( Devc == Dev2 ); + std::cout<<"| contiguous devc == devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + // BOOST_REQUIRE( Devc.sliced(0, Devc.size()/2) == Dev2.sliced(0, Devc.size()/2) ); + BOOST_REQUIRE( thrust::equal( Devc.sliced(0, Devc.size()/2).elements().begin(), Devc.sliced(0, Devc.size()/2).elements().end(), Dev2.sliced(0, Devc.size()/2).elements().begin() ) ); + std::cout<<"| sliced devc == devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + // BOOST_REQUIRE(Host == Hos2); + BOOST_REQUIRE( std::equal( Host.elements().begin(), Host.elements().end(), Hos2.elements().begin() ) ); + std::cout<<"| contiguous host == host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + // BOOST_REQUIRE(Host.sliced(0, Devc.size()/2) == Hos2.sliced(0, Devc.size()/2) ); + BOOST_REQUIRE( std::equal( Host.sliced(0, Host.size()/2).elements().begin(), Host.sliced(0, Host.size()/2).elements().end(), Hos2.sliced(0, Devc.size()/2).elements().begin() ) ); + std::cout<<"| sliced host == host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + + std::cout<<" "< Host = {{1, 2, 3}, {4, 5, 6}}; + multi::array Hos2 = {{1, 2, 3}, {4, 5, 6}}; + BOOST_REQUIRE( Host.size() == 2 ); + + BOOST_REQUIRE( *Host().elements().begin() == *Hos2().elements().begin() ); + + BOOST_REQUIRE( Host().elements().begin()[0] == Hos2().elements().begin()[0] ); + BOOST_REQUIRE( Host().elements().begin()[1] == Hos2().elements().begin()[1] ); + BOOST_REQUIRE( Host().elements().begin()[2] == Hos2().elements().begin()[2] ); + BOOST_REQUIRE( Host().elements().begin()[3] == Hos2().elements().begin()[3] ); + BOOST_REQUIRE( Host().elements().begin()[4] == Hos2().elements().begin()[4] ); + BOOST_REQUIRE( Host().elements().begin()[5] == Hos2().elements().begin()[5] ); + + BOOST_REQUIRE( *(Host().elements().end() - 1) == *(Hos2().elements().end() - 1) ); + BOOST_REQUIRE( *(Host().elements().end() - 2) == *(Hos2().elements().end() - 2) ); + BOOST_REQUIRE( *(Host().elements().end() - 3) == *(Hos2().elements().end() - 3) ); + + BOOST_REQUIRE( std::equal(Host().elements().begin(), Host().elements().end(), Hos2().elements().begin()) ); + BOOST_REQUIRE( thrust::equal(Host().elements().begin(), Host().elements().end(), Hos2().elements().begin()) ); + +// BOOST_REQUIRE( Host() == Hos2() ); +} + +BOOST_AUTO_TEST_CASE(thrust_equality_2D_small_gpu_issue123) { + multi::array Host = {{1, 2, 3}, {4, 5, 6}}; + + multi::array> Devc(Host.extensions()); Devc = Host; + multi::array> Dev2(Host.extensions()); Dev2 = Host; + BOOST_REQUIRE( Dev2.size() == 2 ); + + BOOST_REQUIRE( thrust::equal( + Devc().elements().begin(), + Devc().elements().end() , Dev2().elements().begin() + )); + + BOOST_REQUIRE( thrust::equal( + thrust::hip::par, + Devc().elements().begin(), + Devc().elements().end() , Dev2().elements().begin() + )); + + BOOST_REQUIRE( thrust::equal( + Devc.rotated().elements().begin(), + Devc.rotated().elements().end() , Dev2.rotated().elements().begin() + )); + + BOOST_REQUIRE( thrust::equal( + thrust::hip::par, + Devc.rotated().elements().begin(), + Devc.rotated().elements().end() , Dev2.rotated().elements().begin() + )); + + BOOST_REQUIRE( multi::adl_equal( + Devc.rotated().elements().begin(), + Devc.rotated().elements().end() , Dev2.rotated().elements().begin() + )); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_equality_2D_issue123, T, types_list) { + multi::extensions_t<2> x({10240, 10240}); + multi::array> Devc(x); + multi::array> Dev2(x); + multi::array Host(x); std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2(x); + + std::cout<<"| 2D `"<< typeid(T).name() <<"` max data size "<< Host.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|\n"; + + Devc = Host; + Dev2 = Host; + Hos2 = Host; + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Host == Hos2); + std::cout<<"| contiguous host == host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Host.sliced(0, Host.size()/2) == Hos2.sliced(0, Host.size()/2)); + // BOOST_REQUIRE( std::equal(Host.sliced(0, 5120).elements().begin(), Host.sliced(0, 5120).elements().end(), Hos2.sliced(0, 5120).elements().begin()) ); + std::cout<<"| sliced host == host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Host({0, Host.size()/2},{0, Host.size()/2}) == Hos2({0, Hos2.size()/2},{0, Hos2.size()/2})); + // BOOST_REQUIRE( std::equal(Host({0, 5120},{0, 5120}).elements().begin(), Host({0, 5120},{0, 5120}).elements().end(), Hos2({0, 5120},{0, 5120}).elements().begin()) ); + std::cout<<"| strided host == host | "<< Hos2({0, Host.size()/2},{0, Host.size()/2}).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Devc == Dev2); + std::cout<<"| contiguous devc == devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Devc.sliced(0, Devc.size()/2) == Dev2.sliced(0, Dev2.size()/2)); + std::cout<<"| sliced devc == devc | "<< Devc.sliced(0, Devc.size()/2).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE(Devc({0, Devc.size()/2},{0, Devc.size()/2}) == Dev2({0, Dev2.size()/2},{0, Dev2.size()/2})); + std::cout<<"| strided devc == devc | "<< Devc({0, Devc.size()/2},{0, Devc.size()/2}).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + std::cout<<" "<> Devc({1024, 1024, 100}); + multi::array> Dev2({1024, 1024, 100}); + multi::array Host({1024, 1024, 100}); std::iota(Host.elements().begin(), Host.elements().end(), 12.); + multi::array Hos2({1024, 1024, 100}); + + std::cout<<"| 3D `"<< typeid(T).name() <<"` max data size "<< Host.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|\n"; + + Devc = Host; + Dev2 = Host; + Hos2 = Host; + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE( Devc == Dev2 ); + std::cout<<"| contiguous devc == devc | "<< Devc.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << " GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE( Devc.sliced(0, 512) == Dev2.sliced(0, 512) ); + std::cout<<"| sliced devc == devc | "<< Dev2.sliced(0, 512).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << " GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE( Devc({0, 512}, {0, 512}, {0, 512}) == Dev2({0, 512}, {0, 512}, {0, 512}) ); + std::cout<<"| strided devc == devc | "<< Dev2({0, 512},{0, 512}, {0, 512}).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + BOOST_REQUIRE( Host == Hos2 ); + std::cout<<"| contiguous host == host | "<< Hos2.num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << " GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + // BOOST_REQUIRE( Host.sliced(0, 512) == Hos2.sliced(0, 512) ); + BOOST_REQUIRE( std::equal( Host.sliced(0, 512).elements().begin(), Host.sliced(0, 512).elements().end(), Hos2.sliced(0, 512).elements().begin() ) ); + std::cout<<"| sliced host == host | "<< Hos2.sliced(0, 512).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << " GB/sec |\n"; + } + { + boost::timer::auto_cpu_timer t{""}; + // BOOST_REQUIRE( Host({0, 512}, {0, 512}, {0, 512}) == Hos2({0, 512}, {0, 512}, {0, 512}) ); + BOOST_REQUIRE( std::equal( Host({0, 512}, {0, 512}, {0, 512}).elements().begin(), Host({0, 512}, {0, 512}, {0, 512}).elements().end(), Hos2({0, 512}, {0, 512}, {0, 512}).elements().begin() ) ); + std::cout<<"| strided host == host | "<< Hos2({0, 512},{0, 512}, {0, 512}).num_elements()*sizeof(T) / (t.elapsed().wall/1e9) / 1073741824. << "GB/sec |\n"; + } + std::cout<<" "<; +} + +BOOST_AUTO_TEST_CASE(thrust_complex_cached_1D) { + using T = inq::complex; + multi::array > aa(10, T{1., 1.}); + multi::array > bb(10, T{2., 2.}); + + bb = aa; + + BOOST_REQUIRE(( bb[0] == T{1., 1.} )); +} + +BOOST_AUTO_TEST_CASE(thrust_complex_cached_without_values_1D) { + using T = inq::complex; + multi::array > aa(10); + multi::array > bb(10); + BOOST_REQUIRE( aa.size() == 10 ); + BOOST_REQUIRE( bb.size() == 10 ); + + bb = aa; + + BOOST_REQUIRE(( bb[0] == aa[0] )); +} + +BOOST_AUTO_TEST_CASE(thrust_complex_cached_2D) { + using T = inq::complex; + multi::array > aa({10, 20}, T{1., 1.}); + multi::array > bb({10, 20}, T{2., 2.}); + + bb = aa; + + BOOST_REQUIRE(( bb[0][0] == T{1., 1.} )); +} + +BOOST_AUTO_TEST_CASE(thrust_complex_cached_without_values_2D) { + using T = inq::complex; + multi::array > aa({10, 20}); + multi::array > bb({10, 20}); + BOOST_REQUIRE( aa.size() == 10 ); + BOOST_REQUIRE( bb.size() == 10 ); + + bb = aa; + + BOOST_REQUIRE(( bb[0][0] == aa[0][0] )); +} + +BOOST_AUTO_TEST_CASE(array) { + +//{ +// multi::thrust::hip::array C({2, 3}); + +// C[0][0] = 0. ; +// C[1][1] = 11.; +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +//} + +//{ +// multi::array const H = { +// {00., 01., 02.}, +// {10., 11., 12.}, +// }; + +// BOOST_TEST_REQUIRE( H[1][1] == 11. ); + +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// thrust::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// std::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// std::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// thrust::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.extensions() == H.extensions() ); +// thrust::copy_n(H.begin(), H.size(), C.begin()); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// BOOST_REQUIRE( C.extensions() == H.extensions() ); +// std::copy_n(H.begin(), H.size(), C.begin()); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C(H.extensions()); +// C = H; +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::hip::array C = H; +// BOOST_REQUIRE( C == H ); +// } +//} + +} +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/speed.hip b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/speed.hip new file mode 100644 index 0000000000..212ffe6def --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/speed.hip @@ -0,0 +1,267 @@ +// Copyright 2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +namespace multi = boost::multi; + +// template<> inline constexpr bool multi::force_element_trivial_default_construction> = true; +// template<> inline constexpr bool multi::force_element_trivial_default_construction> = true; + +// template<> inline constexpr bool multi::force_element_trivial_default_construction> = true; +// template<> inline constexpr bool multi::force_element_trivial_default_construction> = true; + +using test_types = boost::mpl::list< + double, ::thrust::complex, std::complex, + float , ::thrust::complex , std::complex, + // char, + unsigned, int +>; + +template +inline void DoNotOptimize(Tp&& value) noexcept { + (hipDeviceSynchronize()==hipSuccess)?void():exit(-1); + asm volatile("" : "+m"(value) : : "memory"); +} + +constexpr auto measure = [](auto&& F) { + auto start_time = std::chrono::high_resolution_clock::now(); + F(); + std::chrono::duration const time = std::chrono::high_resolution_clock::now() - start_time; + DoNotOptimize(F); + return time.count(); +}; + +BOOST_AUTO_TEST_CASE_TEMPLATE(allocation_speed, T, test_types) { + std::cout << typeid(T).name() << " ******************************************\n"; + + auto const n = 8000L; + + auto const size = n*n * sizeof(T) / 1e9; + + // static_assert( std::is_trivial_v || multi::force_element_trivial_default_construction ); + + auto hipmallocfree0 = measure([&]() { + char* Buffer; + if(auto r = hipMalloc((void**)&Buffer , n*n * sizeof(T)); r != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + if(auto r = hipFree(Buffer ); r != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + }); + + auto hipmallocfree1 = measure([&]() { + char* Buffer; + if(hipMalloc((void**)&Buffer , n*n * sizeof(T)) != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + if(auto r = hipFree(Buffer ); r != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + }); + + auto hipmallocfree2 = measure([&]() { + char* Buffer; + if(auto r = hipMalloc((void**)&Buffer , n*n * sizeof(T)); r != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + if(auto r = hipFree(Buffer ); r != hipSuccess) {throw std::runtime_error{""};} + DoNotOptimize(Buffer); + }); + + auto const hipmallocfree = hipmallocfree2; + + auto const hipallocator = measure([&]() { + thrust::hip::allocator alloc; + auto p = alloc.allocate(n*n); + DoNotOptimize(p); + alloc.deallocate(p, n*n); + DoNotOptimize(p); + }); + + BOOST_TEST( (hipallocator / hipmallocfree) > 0.10 ); + + auto const hipmultiarray = measure([&]() { + multi::array> buffer({n, n}); + DoNotOptimize(buffer); + }); + + BOOST_TEST( (hipmultiarray / hipmallocfree) > 0.10 ); + + auto const hipmultiuniversalarray = measure([&]() { + multi::array> buffer({n, n}); + DoNotOptimize(buffer); + }); + + BOOST_TEST( (hipmultiuniversalarray / hipmallocfree) > 0.80 ); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_speed, T, test_types) { + std::cout << typeid(T).name() << " ******************************************\n"; + + auto const n = 8000; + + multi::array> src({n, n}, T{12}); + multi::array> dst(extensions(src), T{33}); + + auto const size = src.num_elements() * sizeof(T) / 1e9; + + auto const hipmemcpy0 = measure([&]() { + if(hipMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), hipMemcpyDeviceToDevice) != hipSuccess) throw std::runtime_error(""); + DoNotOptimize(dst); + }); + + BOOST_REQUIRE( dst == src ); + + DoNotOptimize(src); + DoNotOptimize(dst); + + auto const hipmemcpy1 = measure([&]() { + if(hipMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), hipMemcpyDeviceToDevice) != hipSuccess) throw std::runtime_error(""); + DoNotOptimize(dst); + }); + + BOOST_REQUIRE( dst == src ); + + DoNotOptimize(src); + DoNotOptimize(dst); + + auto const hipmemcpy = measure([&]() { + hipMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), hipMemcpyDeviceToDevice) == hipSuccess?:throw std::runtime_error(""); + DoNotOptimize(dst); + }); + + DoNotOptimize(src); + DoNotOptimize(dst); + + std::cout << "hipmemcpy rate = " << size/hipmemcpy << " GB/s\n"; + + auto const hip_copy = measure([&]() { + thrust::copy_n(src.data_elements(), src.num_elements(), dst.data_elements()); + DoNotOptimize(dst); + }); + + DoNotOptimize(src); + DoNotOptimize(dst); + + std::cout << "hip_copy rate = " << size/hip_copy << " GB/s\n"; + + BOOST_TEST( hipmemcpy > 0.8* hip_copy ); + + auto const multi_copy_assign = measure([&]() { + dst = src; + DoNotOptimize(dst); + }); + + DoNotOptimize(src); + DoNotOptimize(dst); + + std::cout << "multi_copy_assign rate = " << size/multi_copy_assign << " GB/s\n"; + + BOOST_TEST( hipmemcpy > 0.8* multi_copy_assign ); + + + auto const hip_copy_elements = measure([&]() { + thrust::copy(src.elements().begin(), src.elements().end(), dst.elements().begin()); + DoNotOptimize(dst); + }); + + DoNotOptimize(src); + DoNotOptimize(dst); + + std::cout << "hip_copy_elements rate = " << size/hip_copy_elements << " GB/s\n"; + + BOOST_TEST( hipmemcpy > 0.8* hip_copy_elements ); + + auto const multi_sub_assign = measure([&]() { + dst({2, n - 2}, {2, n - 2}) = src({2, n - 2}, {2, n - 2}); + DoNotOptimize(dst); + }); + + DoNotOptimize(src); + DoNotOptimize(dst); + + std::cout << "multi_sub_assign rate = " << size/multi_sub_assign << " GB/s\n"; + + BOOST_TEST( hipmemcpy > 0.5* multi_sub_assign ); +} + +#if 0 +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_universal_speed, T, test_types) { + std::cout << typeid(T).name() << " ******************************************\n"; + + auto const n = 8000; + + multi::array> src({n, n}); + multi::array> dst(extensions(src)); + + auto const threshold = 0.10; + + auto const size = src.num_elements() * sizeof(T) / 1e9; + + auto const dummy = std::invoke([&] { + auto start_time = std::chrono::high_resolution_clock::now(); + auto r = hipMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), hipMemcpyDeviceToDevice); + assert(r == hipSuccess); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (warmup)\n"; + return rate; + }); + + auto const memcpy_rate = std::invoke([&] { + auto start_time = std::chrono::high_resolution_clock::now(); + auto r = hipMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), hipMemcpyDeviceToDevice); + assert(r == hipSuccess); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (ratio = 1)\n"; + return rate; + }); + + { // cctor + auto tick = std::chrono::high_resolution_clock::now(); + + auto dst2 = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "cctor rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } + { // assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "assign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } + { // subarray assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst({0, n - 2}, {0, n - 2}) = src({2, n}, {2, n}); + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + std::cout << "subasssign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } +} +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/vector.hip b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/vector.hip new file mode 100644 index 0000000000..dd658eaecd --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/hipthrust/test/vector.hip @@ -0,0 +1,45 @@ +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi HIP thrust" +#include + +#include +#include + +template void what(T&&) = delete; + +BOOST_AUTO_TEST_CASE(vector){ + // H has storage for 4 integers + thrust::host_vector H(4); + + // initialize individual elements + H[0] = 14; + H[1] = 20; + H[2] = 38; + H[3] = 46; + + // H.size() returns the size of vector H + BOOST_TEST_REQUIRE( H.size() == 4 ); + + // print contents of H + BOOST_TEST_REQUIRE( H[2] == 38 ); + + // resize H + H.resize(2); + + BOOST_REQUIRE( H.size() == 2 ); + + // Copy host_vector H to device_vector D + thrust::device_vector D = H; + +// f(D.data()); + + // elements of D can be modified + D[0] = 99; + D[1] = 88; + + thrust::device_ptr p = D.data(); // this works with rocm hip 5.6 +// thrust::pointer p = D.data(); // this works with rocm hip 5.6 + + BOOST_REQUIRE( p[0] == 99 ); + + BOOST_TEST_REQUIRE( D[1] == 88 ); +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack.hpp new file mode 100644 index 0000000000..a8c3b9e814 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack.hpp @@ -0,0 +1,6 @@ +// Copyright 2019-2024 Alfredo A. Correa + +#pragma once + +#include "multi/adaptors/lapack/getrf.hpp" +#include "multi/adaptors/lapack/potrf.hpp" diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/CMakeLists.txt new file mode 100644 index 0000000000..ed205cdea1 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.18) # 3.18 for LAPACK::LAPACK + +project( + boost-multi-adaptor-lapack + VERSION 0.1 + LANGUAGES CXX +) + +add_library(multi-lapack INTERFACE) + +find_package(LAPACK REQUIRED) + +target_link_libraries(multi-lapack INTERFACE multi LAPACK::LAPACK) + +add_subdirectory(./test) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/core.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/core.hpp similarity index 59% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/core.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/core.hpp index a5adbc8d9a..08642d8b43 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/core.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/core.hpp @@ -1,17 +1,17 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo "#include\""$0"\"" > $0x.cpp) && clang++ `#-DNDEBUG` -O3 -std=c++14 -Wall -Wextra -Wpedantic -Wfatal-errors -D_TEST_MULTI_ADAPTORS_LAPACK_CORE -DADD_ $0x.cpp -o $0x.x -lblas -llapack && time $0x.x $@ && rm -f $0x.x $0x.cpp; exit -#endif -// Alfredo A. Correa 2019 © +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_ADAPTORS_LAPACK_CORE_HPP -#define MULTI_ADAPTORS_LAPACK_CORE_HPP +#ifndef BOOST_MULTI_ADAPTORS_LAPACK_CORE_HPP +#define BOOST_MULTI_ADAPTORS_LAPACK_CORE_HPP //#include #include #include -//#include -#include +// #include +// #include +// #include #define s float #define d double @@ -31,10 +31,11 @@ #define LIWORK INTEGER liwork #define IWORK int* -#define xPOTRF(T) v LAPACK(T##potrf)(UPLO, int const& N, T*, int const& LDA, int& INFO) -#define xSYEV(T) v LAPACK(T##syev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO) -#define xSYEVD(T) v LAPACK(T##syevd)(JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, IWORK, LIWORK, int& INFO) -#define xHEEV(T) v LAPACK(T##heev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO) +// cppcheck-suppress [preprocessorErrorDirective] bug in cppcheck 2.11 +#define xPOTRF(T) v LAPACK(T##potrf)(UPLO, int const& N, T*, int const& LDA, int& INFO) // NOLINT(bugprone-macro-parentheses,readability-identifier-length) +#define xSYEV(T) v LAPACK(T##syev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO) // NOLINT(bugprone-macro-parentheses) +#define xSYEVD(T) v LAPACK(T##syevd)(JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, IWORK, LIWORK, int& INFO) // NOLINT(bugprone-macro-parentheses) +#define xHEEV(T) v LAPACK(T##heev) (JOBZ, UPLO, int const& N, T*, int const& LDA, T*, T*, LWORK, int& INFO) // NOLINT(bugprone-macro-parentheses) #define subroutine void #define integer int const& @@ -45,70 +46,80 @@ // http://www.netlib.org/lapack/explore-html/dd/d9a/group__double_g_ecomputational_ga0019443faea08275ca60a734d0593e60.html #define xGETRF(T) \ -subroutine T##getrf_( \ - integer M, /*The number of rows of the matrix A. M >= 0.*/ \ - integer N, /*The number of columns of the matrix A. N >= 0.*/ \ - T* A, /*On entry, the M-by-N matrix to be factored.*/ \ +/*NOLINTBEGIN(readability-identifier-length,bugprone-macro-parentheses)*/ \ +void T##getrf_( \ + integer M, /*The number of rows of the matrix A. M >= 0.*/ \ + integer N, /*The number of columns of the matrix A. N >= 0.*/ \ + T* A, /*On entry, the M-by-N matrix to be factored.*/ \ /*On exit, the factors L and U from the factorization*/ \ - integer LDA, /*The leading dimension of the array A. LDA >= max(1,M).*/\ + integer LDA, /*The leading dimension of the array A. LDA >= max(1,M).*/\ integer_ptr IPIV, /*The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i).*/\ integer_out INFO /*= 0: successful exit*/\ /*< 0: if INFO = -i, the i-th argument had an illegal value*/\ /*> 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations.*/\ -) +) \ +/*NOLINTEND(readability-identifier-length,bugprone-macro-parentheses)*/ +// NOLINTBEGIN(bugprone-macro-parentheses) +// NOLINTBEGIN(readability-identifier-length) // http://www.netlib.org/lapack/explore-html/d8/ddc/group__real_g_ecomputational_gaa00bcf4d83a118cb6f0b6619d6ffaa24.html #define xGETRS(T) \ -subroutine T##getrs_( \ +void T##getrs_( \ character TRANS,/*Specifies the form of the system of equations: */\ /* = 'N': A * X = B (No transpose) */\ /* = 'T': A**T* X = B (Transpose) */\ /* = 'C': A**T* X = B (Conjugate transpose = Transpose) */\ - integer N, /*The order of the matrix A. N >= 0. */\ - integer NRHS, /*The number of right hand sides, i.e., the number of columns*/\ + integer N, /*The order of the matrix A. N >= 0. */\ + integer NRHS, /*The number of right hand sides, i.e., the number of columns*/\ /*of the matrix B. NRHS >= 0. */\ T const* A, /* The factors L and U from the factorization A = P*L*U */\ /*as computed by SGETRF. */\ - integer LDA, /*The leading dimension of the array A. LDA >= max(1,N). */\ + integer LDA, /*The leading dimension of the array A. LDA >= max(1,N). */\ integer_cptr IPIV, /*The pivot indices from SGETRF; for 1<=i<=N, row i of the */\ /*matrix was interchanged with row IPIV(i). */\ T* B, /*On entry, the right hand side matrix B. */\ /*On exit, the solution matrix X. */\ integer LDB, /*The leading dimension of the array B. LDB >= max(1,N). */\ - integer INFO /*= 0: successful exit */\ + integer_out INFO /*= 0: successful exit */\ /*< 0: if INFO = -i, the i-th argument had an illegal value */\ ) +// NOLINTEND(readability-identifier-length) +// NOLINTEND(bugprone-macro-parentheses) -// TODO // http://www.netlib.org/lapack/explore-html/d7/d3b/group__double_g_esolve_ga5ee879032a8365897c3ba91e3dc8d512.html - +// TODO(correaa) // http://www.netlib.org/lapack/explore-html/d7/d3b/group__double_g_esolve_ga5ee879032a8365897c3ba91e3dc8d512.html extern "C"{ -//xGETRF(s) ; xGETRF(d) ; xGETRF(c) ; xGETRF(z) ; -//xGETRS(s) ; xGETRS(d) ; xGETRS(c) ; xGETRS(z) ; +xGETRF(s) ; xGETRF(d) ; xGETRF(c) ; xGETRF(z) ; +xGETRS(s) ; xGETRS(d) ; xGETRS(c) ; xGETRS(z) ; } -namespace core{ +namespace core { // http://www.netlib.org/lapack/explore-html/da/d30/a18643_ga5b625680e6251feb29e386193914981c.html -int getrf(lapack_int m, lapack_int n, double* A, lapack_int lda, int* ipiv){ +using lapack_int = int; + +// TODO(correaa) make into a template, then remove inline +inline auto getrf(lapack_int m, lapack_int n, double* A, lapack_int lda, int* ipiv) -> int { // NOLINT(readability-identifier-length) lapack conventional name assert( m >= 0 ); assert( n >= 0 ); assert( lda >= std::max(lapack_int{1}, m) ); - int info; - dgetrf_(&m, &n, A, &lda, ipiv, &info); + int info; // NOLINT(cppcoreguidelines-init-variables) delayed initialization + dgetrf_(m, n, A, lda, ipiv, info); assert(info >= 0); return info; } -void getrs(char trans, lapack_int const n, lapack_int const nrhs, double const* A, lapack_int const lda, int const* ipiv, double* B, lapack_int const ldb){ - assert( trans == 'T' or trans == 'N' or trans == 'C' ); +// TODO(correaa) make into a template, then remove inline +inline void getrs(char trans, lapack_int const n, lapack_int const nrhs, double const* A, lapack_int const lda, int const* ipiv, double* B, lapack_int const ldb) { // NOLINT(readability-identifier-length) lapack conventional name + assert( trans == 'T' || trans == 'N' || trans == 'C' ); assert( n >= 0 ); assert( nrhs >= 0 ); assert( lda >= std::max(1, n) ); - int info; - dgetrs_(&trans, &n, &nrhs, A, &lda, ipiv, B, &ldb, &info); + int info; // NOLINT(cppcoreguidelines-init-variables) delayed initialization + dgetrs_(trans, n, nrhs, A, lda, ipiv, B, ldb, info); + switch(info){ - case -1: throw std::logic_error{"transa ≠ 'N', 'T', or 'C'"}; + case -1: throw std::logic_error{"transa != 'N', 'T', or 'C'"}; case -2: throw std::logic_error{"n < 0" }; case -3: throw std::logic_error{"nrhs < 0" }; case -4: throw std::logic_error{"n > lda" }; @@ -116,29 +127,28 @@ void getrs(char trans, lapack_int const n, lapack_int const nrhs, double const* case -6: throw std::logic_error{"n > ldb" }; case -7: throw std::logic_error{"ldb ≤ 0" }; case -8: throw std::logic_error{"error!" }; + default: assert(info == 0); } - assert(info == 0 ); - return; } -} +} // end namespace core -namespace lapack{ +namespace lapack { struct context{ - template static auto getrf(Args&&... args)->decltype(core::getrf(args...)){return core::getrf(args...);} - template static auto getrs(Args&&... args)->decltype(core::getrs(args...)){return core::getrs(args...);} + template static auto getrf(Args&&... args)->decltype(core::getrf(args...)){return core::getrf(std::forward(args)...);} + template static auto getrs(Args&&... args)->decltype(core::getrs(args...)){return core::getrs(std::forward(args)...);} }; -} +} // end namespace lapack -extern "C"{ -//xPOTRF(s) ; xPOTRF(d) ; -//xPOTRF(c) ; xPOTRF(z) ; +extern "C" { +xPOTRF(s) ; xPOTRF(d) ; +xPOTRF(c) ; xPOTRF(z) ; //xSYEV(s) ; xSYEV(d) ; //xSYEVD(s) ; xSYEVD(d) ; -// xHEEV(c) ; xHEEV(z) ; +//xHEEV(c) ; xHEEV(z) ; } #undef subroutine @@ -156,15 +166,18 @@ extern "C"{ #undef INT -#define xpotrf(T) template v potrf(char uplo, S n, T *x, S incx, int& info){LAPACK(T##potrf)(uplo, n, x, incx, info);} +#define xpotrf(T) template v potrf(char uplo, S n, T *x, S incx, int& info){LAPACK(T##potrf)(uplo, n, x, incx, info);} // NOLINT(bugprone-macro-parentheses,readability-identifier-length) -namespace core{ +namespace core { xpotrf(s) xpotrf(d) xpotrf(c) xpotrf(z) -} +} //end namespace core + +// NOLINTBEGIN(bugprone-macro-parentheses) // http://www.netlib.org/lapack/explore-html/d2/d8a/group__double_s_yeigen_ga442c43fca5493590f8f26cf42fed4044.html #define xsyev(T) template v syev(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int& info){LAPACK(T##syev)(jobz, uplo, n, a, lda, w, work, lwork, info);} + // http://www.netlib.org/lapack/explore-html/d2/d8a/group__double_s_yeigen_ga77dfa610458b6c9bd7db52533bfd53a1.html #define xsyevd(T) template v syevd(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int* iwork, S liwork, int& info){ \ if(n <= 1 ){assert(lwork >= 1 ); assert(liwork >=1 );} \ @@ -172,13 +185,15 @@ xpotrf(c) xpotrf(z) if(jobz == 'V' and n > 1){assert(lwork >= 1 + 6*n + 2*n*n); assert(liwork >= 3 + 5*n);} \ LAPACK(T##syevd)(jobz, uplo, n, a, lda, w, work, lwork, iwork, liwork, info); \ } + #define xheev(T) template v heev(char jobz, char uplo, S n, T* a, S lda, T* w, T* work, S lwork, int& info){LAPACK(T##heev)(jobz, uplo, n, a, lda, w, work, lwork, info);} +// NOLINTEND(bugprone-macro-parentheses) -namespace core{ -xsyev (s) xsyev (d) -xsyevd(s) xsyevd(d) - xheev(c) xheev(z) -} +// namespace core{ +// // xsyev (s) xsyev (d) +// // xsyevd(s) xsyevd(d) +// // xheev(c) xheev(z) +// } #undef s #undef d @@ -188,41 +203,4 @@ xsyevd(s) xsyevd(d) #define TRANS const char& trans -/////////////////////////////////////////////////////////////////////////////// - -#if _TEST_MULTI_ADAPTORS_LAPACK_CORE - -#include "../../array.hpp" -#include "../../utility.hpp" - -#include -#include -#include - -namespace multi = boost::multi; -using std::cout; - -int main(){ - using core::potrf; - - std::vector v = { - 2., 1., - 1., 2. - }; - cout - << v[0] <<'\t'<< v[1] <<'\n' - << v[2] <<'\t'<< v[3] <<'\n' << std::endl - ; - int info; - potrf('U', 2, v.data(), 2, info); - cout << "error " << info << std::endl; - cout - << v[0] <<'\t'<< v[1] <<'\n' - << v[2] <<'\t'<< v[3] <<'\n' - ; - cout << std::endl; -} - #endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/cuda.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/cuda.hpp similarity index 99% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/cuda.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/cuda.hpp index f478332ed3..6a17a0a384 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/cuda.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/cuda.hpp @@ -59,7 +59,7 @@ struct error_category : std::error_category{ static error_category& instance(){static cusolver::error_category instance; return instance;} }; inline std::error_code make_error_code(cusolver::status s) noexcept{ - return std::error_code(int(s), cusolver::error_category::instance()); + return {int(s), cusolver::error_category::instance()}; } struct version_t{ diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/filling.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/filling.hpp new file mode 100644 index 0000000000..70e320c71b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/filling.hpp @@ -0,0 +1,31 @@ +// Copyright 2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_LAPACK_FILLING_HPP +#define BOOST_MULTI_ADAPTORS_LAPACK_FILLING_HPP +#pragma once + +// TODO(correaa) #include "multi/blas/filling.hpp" + +namespace boost::multi::lapack { + +enum class filling : char { + lower = 'U', + upper = 'L', +}; + +inline auto flip(filling side) -> filling { + switch(side) { + case filling::lower: return filling::upper; + case filling::upper: return filling::lower; + } + __builtin_unreachable(); // LCOV_EXCL_LINE +} + +inline auto operator-(filling side) -> filling { return flip(side); } +inline auto operator+(filling side) -> filling { return side; } + +} // namespace boost::multi::lapack + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/geqrf.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/geqrf.hpp new file mode 100644 index 0000000000..95e896927d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/geqrf.hpp @@ -0,0 +1,85 @@ +#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- +$CXXX $CXXFLAGS $0 -o $0x$OXX `pkg-config --libs blas lapack` -lboost_unit_test_framework&&$0x$OXX -x 0&&rm $0x$OXX;exit +#endif +// © Alfredo A. Correa 2020 + +#ifndef MULTI_ADAPTORS_LAPACK_GEQRF_HPP +#define MULTI_ADAPTORS_LAPACK_GEQRF_HPP + +#include "../lapack/core.hpp" +#include "../blas/filling.hpp" + +#include "../../config/NODISCARD.hpp" + +#include + +namespace boost{namespace multi{namespace lapack{ + +using blas::filling; + +template +A&& geqrf(Context&& ctxt, A&& a, TAU&& tau, WORK&& work){ +// assert( stride(~a) == 1); + assert( size(tau) == std::min(size(~a), size(a)) ); + int info = -1; + geqrf_(std::forward(ctxt), size(~a), size(a), a.base(), stride(a), tau.base(), work.data(), work.size(), info); + assert(info == 0); + return std::forward(a); +} + +//using ::core::syev; +//using ::core::geqrf; + +#if 0 +template +auto syev(blas::filling uplo, Array2D&& a, Array1D&& w, Array1DW&& work) +->decltype(syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride(a), base(w), base(work), size(work), std::declval()), a({0l, 1l}, {0l, 1l})) +{ + assert( size(work) >= std::max(1l, 3*size(a)-1l) ); + assert( size(a) == size(w) ); + assert( stride(w)==1 ); + assert( stride(work)==1 ); + if(size(a)==0) return std::forward(a)(); + int info = -1; + if(stride(rotated(a))==1) syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride( a ), base(w), base(work), size(work), info); + else if(stride( a )==1) syev('V', uplo==blas::filling::upper?'U':'L', size(a), base(a), stride(rotated(a)), base(w), base(work), size(work), info); + else assert(0); // case not contemplated by lapack + if(info < 0) assert(0); // bad argument + return std::forward(a)({0, size(a)-info}, {0, size(a)-info}); +} + +template::decay_type> +auto syev(blas::filling uplo, Array2D&& a, Array1D&& w) +->decltype(syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)))){ + return syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)));}// TODO obtain automatic size from lapack info routine + +template +NODISCARD("because input array is const, output gives eigenvectors") +typename Array2D::decay_type syev(blas::filling uplo, Array2D const& a, Array1D&& w){ + auto ret = a.decay(); + if(syev(uplo, ret, std::forward(w)).size() != a.size()) assert(0); // failed + return ret; +} + +template +NODISCARD("because input array is const, output gives eigenvalues") +auto syev(blas::filling uplo, Array2D&& a){ + multi::array::element_type, 1, decltype(get_allocator(a))> eigenvalues(size(a), get_allocator(a)); + syev(uplo, std::forward(a), eigenvalues); + return eigenvalues; +} + +template +NODISCARD("because input array is const, output gives a structured binding of eigenvectors and eigenvactor") +auto syev(blas::filling uplo, Array2D const& a){ + struct{ + typename Array2D::decay_type eigenvectors; + typename Array2D::value_type eigenvalues; + } ret{a, {size(a), get_allocator(a)}}; + auto&& l = syev(uplo, ret.eigenvectors, ret.eigenvalues); + assert( size(l) == size(a) ); + return ret; +} +#endif + +}}} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/getrf.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/getrf.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/getrf.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/getrf.hpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/potrf.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/potrf.hpp new file mode 100644 index 0000000000..7d26df73d3 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/potrf.hpp @@ -0,0 +1,100 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_LAPACK_POTRF_HPP +#define BOOST_MULTI_ADAPTORS_LAPACK_POTRF_HPP +#pragma once + +#include +#include + +#include "../lapack/core.hpp" +#include "../lapack/filling.hpp" + +#include "../blas/numeric.hpp" + +#include + +namespace boost::multi::lapack { + +// using blas::filling; + +using ::core::potrf; + +template +BOOST_MULTI_NODISCARD("result has information of order of minor through .size() member") +auto potrf(filling uplo, Iterator first, Iterator last) +->decltype(potrf(static_cast(uplo), typename std::iterator_traits::difference_type{}, base(first), stride(first), std::declval()), Iterator{}) +{ + assert( stride(first) == stride(last) ); + assert( first->stride() == 1 ); +// auto lda = stride(first); + + int info; // NOLINT(cppcoreguidelines-init-variables) + potrf(static_cast(uplo), std::distance(first, last), base(first), stride(first), info); + + assert( info >= 0 ); + // if(info > 0) {std::cerr << "warning minor of order " << info << " is not possitive\n";} + return info==0?last:first + info - 1; +} + +template +BOOST_MULTI_NODISCARD("result has information of order of minor through .size() member") +auto potrf(filling uplo, A2D&& A) // NOLINT(readability-identifier-length) conventional lapack name +->decltype(potrf(uplo, begin(A), end(A)), A({0, 1})) +{ + using lapack::flip; + + if(stride(A) == 1) { + auto last = potrf(flip(uplo), begin(rotated(A)), end(rotated(A))); + using std::distance; + return A({0, distance(begin(rotated(A)), last)}, {0, distance(begin(rotated(A)), last)}); + } + + auto last = potrf(uplo, begin(A), end(A)); + + using std::distance; + return std::forward(A)({0, distance(begin(A), last)}); // , {0, distance(begin(A), last-1)}); +} + +template +struct hermitic_t : private A { + using underlying_type = A; + + auto underlying() const & -> underlying_type const& {return *this;} + auto underlying() & -> underlying_type & {return *this;} + auto underlying() && -> underlying_type && {return std::move(*this);} + + private: + lapack::filling side_; + + public: + auto side() const {return side_;} + + hermitic_t(A const& a, lapack::filling side) : A{a}, side_{side} {} // NOLINT(readability-identifier-length) conventional lapack name + using A::size; +}; + +template auto hermitic(lapack::filling side, A&& a) // NOLINT(readability-identifier-length) conventional lapack name +-> hermitic_t()())>> { + return {std::forward(a)(), side}; +} + +template +BOOST_MULTI_NODISCARD("result is returned because third argument is const") +auto potrf(HA&& ha) -> decltype(auto) { + return hermitic(ha.side, potrf(ha.side, std::forward(ha).underlying())); // static_cast(ha))); +} + +// orthonormalize rows +template auto onrm(A&& a, filling uplo /*= filling::upper*/) // NOLINT(readability-identifier-length) conventional lapack name +->decltype(trsm(flip(uplo), hermitized(potrf(uplo, herk(uplo, a))), std::forward(a))) { assert(size(a) <= size(rotated(a))); + return trsm(flip(uplo), hermitized(potrf(uplo, herk(uplo, a))), std::forward(a)); } + +template auto onrm(A&& a, B&& buffer, filling uplo /* = filling::upper*/) // NOLINT(readability-identifier-length) conventional lapack name +->decltype(trsm(flip(uplo), hermitized(potrf(uplo, herk(uplo, a, std::forward(buffer)))), std::forward(a))) { assert(size(a) <= size(rotated(a))); + return trsm(flip(uplo), hermitized(potrf(uplo, herk(uplo, a, std::forward(buffer)))), std::forward(a)); } + +} // end namespace boost::multi::lapack +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/syev.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/syev.hpp new file mode 100644 index 0000000000..d38e9bb347 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/syev.hpp @@ -0,0 +1,90 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_LAPACK_SYEV_HPP +#define BOOST_MULTI_ADAPTORS_LAPACK_SYEV_HPP + +#include + +namespace boost { +namespace multi { +namespace lapack { + +using blas::filling; + +using ::core::syev; + +template +auto syev(blas::filling uplo, Array2D&& a, Array1D&& w, Array1DW&& work) + -> decltype(syev('V', uplo == blas::filling::upper ? 'L' : 'U', size(a), base(a), stride(a), base(w), base(work), size(work), std::declval()), a({0L, 1L}, {0L, 1L})) { + assert(size(work) >= std::max(1L, 3 * size(a) - 1L)); + assert(size(a) == size(w)); + assert(stride(w) == 1); + assert(stride(work) == 1); + + if(size(a) == 0) + return std::forward(a)(); + + int info = -1; + + if(stride(rotated(a)) == 1) { + syev('V', uplo == blas::filling::upper ? 'L' : 'U', size(a), base(a), stride(a), base(w), base(work), size(work), info); + } else if(stride(a) == 1) { + syev('V', uplo == blas::filling::upper ? 'U' : 'L', size(a), base(a), stride(rotated(a)), base(w), base(work), size(work), info); + } else { + assert(0); + } // case not contemplated by lapack + + if(info < 0) { + assert(0); + } // bad argument + + return std::forward(a)({0, size(a) - info}, {0, size(a) - info}); +} + +template::decay_type> +auto syev(blas::filling uplo, Array2D&& a, Array1D&& w) + -> decltype(syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1L, 3 * size(a) - 1L), get_allocator(w)))) { + return syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1L, 3 * size(a) - 1L), get_allocator(w))); +} // TODO(correaa) obtain automatic size from lapack info routine + +template +NODISCARD("because input array is const, output gives eigenvectors") +typename Array2D::decay_type syev(blas::filling uplo, Array2D const& a, Array1D&& w) { + auto ret = a.decay(); + auto l = syev(uplo, ret, std::forward(w)); + if(size(l) != size(a)) + assert(0); // failed + return ret; +} + +template +NODISCARD("because input array is const, output gives eigenvalues") +auto syev(blas::filling uplo, Array2D&& a) { + multi::array::element_type, 1, decltype(get_allocator(a))> eigenvalues(size(a), get_allocator(a)); + syev(uplo, std::forward(a), eigenvalues); + return eigenvalues; +} + +template +NODISCARD("because input array is const, output gives a structured binding of eigenvectors and eigenvactor") +auto syev(blas::filling uplo, Array2D const& a) { + struct { + typename Array2D::decay_type eigenvectors; + typename Array2D::value_type eigenvalues; + } ret{a, typename Array2D::value_type(size(a), get_allocator(a))}; + auto&& l = syev(uplo, ret.eigenvectors, ret.eigenvalues); + assert(size(l) == size(a)); + return ret; +} + +} // namespace lapack +} // namespace multi +} // namespace boost +#endif // BOOST_MULTI_ADAPTORS_LAPACK_SYEV_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/CMakeLists.txt new file mode 100644 index 0000000000..63adb43334 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/CMakeLists.txt @@ -0,0 +1,67 @@ +cmake_minimum_required(VERSION 3.18) + +project( + boost-multi-adaptors-lapack-test + VERSION 0.1 + LANGUAGES CXX +) + +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +find_package(Boost REQUIRED COMPONENTS unit_test_framework) +# add_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) +# include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) +# link_libraries(${Boost_LIBRARIES}) +# link_directories(${Boost_LIBRARY_DIRS}) + +#find_package(LAPACK REQUIRED) + +# find_package(BLAS REQUIRED) + +# set(BLA_VENDOR OpenBLAS) +# set(BLA_VENDOR Intel10_64lp) find_package(BLAS) if(BLAS_FOUND) # in some systems with MKL, regular BLAS headers need to be found for it to work message("Multi/BLAS: MKL environment detected") add_definitions(-DRETURN_BY_STACK) else() +# message("Multi/BLAS: MKL environment not detected, looking for other BLAS") unset(BLA_VENDOR) +# find_package(BLAS REQUIRED) +# endif() + +# find_path( +# BLAS_INCLUDE_DIRS +# cblas.h +# /usr/include +# /usr/local/include +# $ENV{BLAS_HOME}/include +# ) + +# include_directories(../../../../../include) + +# link_libraries(${BLAS_LIBRARIES}) +#link_libraries(-llapacke) + +# include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) + +# add_compile_options( +# -Werror +# -Wall +# -Wextra +# -fno-common +# -Wfatal-errors +# # $<$: -Wpedantic -Wformat-truncation -fstack-usage >#-Wconversion +# # $<$,$>: +# # -Wpedantic -Wmove > $<$: -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses +# # -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846 > $<$: /W4 > +# ) + +enable_testing() +include(CTest) + +# add_executable(getrf.cpp.x getrf.cpp) +# add_test(NAME getrf.cpp.x COMMAND ./getrf.cpp.x) + +# add_executable(geqrf.cpp.x geqrf.cpp) +# add_test(NAME geqrf.cpp.x COMMAND ./geqrf.cpp.x) + +add_executable(potrf.cpp.x potrf.cpp) +target_link_libraries(potrf.cpp.x PRIVATE multi-lapack Boost::unit_test_framework) + +add_test(NAME potrf.cpp.x COMMAND $) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/syev.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/geqrf.cpp similarity index 53% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/syev.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/geqrf.cpp index e4ed6798a3..da456b07a2 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/syev.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/geqrf.cpp @@ -1,91 +1,25 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x `pkg-config --libs blas lapack` -lboost_unit_test_framework&&$0x&&rm $0x;exit +#ifdef COMPILATION_INSTRUCTIONS +(echo '#include"'$0'"'>$0.cpp)&&nvcc -x cu --expt-relaxed-constexpr`#$CXX` $0 -o $0x -Wno-deprecated-declarations -lcudart -lcublas -lcusolver `pkg-config --libs blas lapack` -DBOOST_TEST_DYN_LINK -lboost_unit_test_framework -DBOOST_LOG_DYN_LINK -lboost_log -lpthread -lboost_system &&$0x&&rm $0x $0.cpp; exit #endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_ADAPTORS_LAPACK_SYEV_HPP -#define MULTI_ADAPTORS_LAPACK_SYEV_HPP - -#include "../lapack/core.hpp" -#include "../blas/filling.hpp" - -#include "../../config/NODISCARD.hpp" - -#include - -namespace boost{namespace multi{namespace lapack{ - -using blas::filling; - -using ::core::syev; - -template -auto syev(blas::filling uplo, Array2D&& a, Array1D&& w, Array1DW&& work) -->decltype(syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride(a), base(w), base(work), size(work), std::declval()), a({0l, 1l}, {0l, 1l})) -{ - assert( size(work) >= std::max(1l, 3*size(a)-1l) ); - assert( size(a) == size(w) ); - assert( stride(w)==1 ); - assert( stride(work)==1 ); - if(size(a)==0) return std::forward(a)(); - int info = -1; - if(stride(rotated(a))==1) syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride( a ), base(w), base(work), size(work), info); - else if(stride( a )==1) syev('V', uplo==blas::filling::upper?'U':'L', size(a), base(a), stride(rotated(a)), base(w), base(work), size(work), info); - else assert(0); // case not contemplated by lapack - if(info < 0) assert(0); // bad argument - return std::forward(a)({0, size(a)-info}, {0, size(a)-info}); -} - -template::decay_type> -auto syev(blas::filling uplo, Array2D&& a, Array1D&& w) -->decltype(syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)))){ - return syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)));}// TODO obtain automatic size from lapack info routine - -template -NODISCARD("because input array is const, output gives eigenvectors") -typename Array2D::decay_type syev(blas::filling uplo, Array2D const& a, Array1D&& w){ - auto ret = a.decay(); - auto l = syev(uplo, ret, std::forward(w)); - if(size(l) != size(a)) assert(0); // failed - return ret; -} - -template -NODISCARD("because input array is const, output gives eigenvalues") -auto syev(blas::filling uplo, Array2D&& a){ - multi::array::element_type, 1, decltype(get_allocator(a))> eigenvalues(size(a), get_allocator(a)); - syev(uplo, std::forward(a), eigenvalues); - return eigenvalues; -} - -template -NODISCARD("because input array is const, output gives a structured binding of eigenvectors and eigenvactor") -auto syev(blas::filling uplo, Array2D const& a){ - struct{ - typename Array2D::decay_type eigenvectors; - typename Array2D::value_type eigenvalues; - } ret{a, {size(a), get_allocator(a)}}; - auto&& l = syev(uplo, ret.eigenvectors, ret.eigenvalues); - assert( size(l) == size(a) ); - return ret; -} - -}}} +// © Alfredo A. Correa 2019-2020 +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi getrf" +#include -#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_LAPACK_SYEV +#include +#include +#include +#include -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi lapack adaptor syev" -#define BOOST_TEST_DYN_LINK -#include +// namespace multi = boost::multi; -#include "../../array.hpp" +// #include "../../array.hpp" #include // std::isnan #include #include // std::max -namespace multi = boost::multi; -namespace lapack = multi::lapack; +namespace multi = ::boost::multi; +// namespace lapack = ::boost::multi::lapack; template decltype(auto) print(M const& C){ using std::cout; @@ -97,6 +31,35 @@ template decltype(auto) print(M const& C){ return cout << std::endl; } +template decltype(auto) print_1d(M const& C){ + using std::cout; + using multi::size; + for(int i = 0; i != size(C); ++i) cout<< C[i] <<' '; + return cout << std::endl; +} + +BOOST_AUTO_TEST_CASE(lapack_geqrf){ + + multi::array A = + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0} + } + ; +// multi::lapack::context ctxt; + + multi::array TAU(std::min(size(A), size(~A))); + multi::array WORK(std::max(1l, 3*size(A)-1)); + + multi::lapack::geqrf(ctxt, A, TAU, WORK); + + print(A); + print(TAU); + +} + +#if 0 BOOST_AUTO_TEST_CASE(lapack_syev, *boost::unit_test::tolerance(0.00001) ){ { multi::array A = { @@ -208,7 +171,6 @@ BOOST_AUTO_TEST_CASE(lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( sys.eigenvectors[2][1] == -0.579092 ); BOOST_TEST( sys.eigenvalues[1] == 42.2081 ); } -#if __cpp_structured_bindings { multi::array const A = { {167.413, 126.804, 125.114}, @@ -222,10 +184,5 @@ BOOST_AUTO_TEST_CASE(lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( eigenvecs[2][1] == -0.579092 ); BOOST_TEST( eigenvals[1] == 42.2081 ); } -#endif - } #endif -#endif - - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/getrf.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/getrf.cpp similarity index 67% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/getrf.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/getrf.cpp index 7f54850158..0601df3478 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/getrf.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/getrf.cpp @@ -15,61 +15,61 @@ namespace multi = boost::multi; //BOOST_AUTO_TEST_CASE(lapack_getrf){ //// https://www.ibm.com/support/knowledgecenter/SSFHY8_6.2/reference/am5gr_hsgetrf.html -// multi::array A = { -// { 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6 }, -// { 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4 }, -// { 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2 }, -// { 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 }, -// { 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8 }, -// { 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6 }, -// { 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4 }, -// { 2.4, 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2 }, -// { 2.6, 2.4, 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0 } -// }; - -// multi::array P({9}, 0.); -// lapack::context ctxt; -// auto const& LU = multi::lapack::getrf(ctxt, A, P); - -// BOOST_REQUIRE( LU.size() == A.size() ); - -// BOOST_REQUIRE_CLOSE( LU[0][0] , 2.6 , 1e-5 ); -// BOOST_REQUIRE_CLOSE( LU[0][8] , 1. , 1e-5 ); -// BOOST_REQUIRE_CLOSE( LU[8][0] , 0.923077 , 1e-5 ); -// BOOST_REQUIRE_CLOSE( LU[8][8] , 0.4 , 1e-5 ); +// multi::array A = { +// { 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6 }, +// { 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4 }, +// { 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2 }, +// { 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 }, +// { 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6, 1.8 }, +// { 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4, 1.6 }, +// { 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2, 1.4 }, +// { 2.4, 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0, 1.2 }, +// { 2.6, 2.4, 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0 } +// }; + +// multi::array P({9}, 0.); +// lapack::context ctxt; +// auto const& LU = multi::lapack::getrf(ctxt, A, P); + +// BOOST_REQUIRE( LU.size() == A.size() ); + +// BOOST_REQUIRE_CLOSE( LU[0][0] , 2.6 , 1e-5 ); +// BOOST_REQUIRE_CLOSE( LU[0][8] , 1. , 1e-5 ); +// BOOST_REQUIRE_CLOSE( LU[8][0] , 0.923077 , 1e-5 ); +// BOOST_REQUIRE_CLOSE( LU[8][8] , 0.4 , 1e-5 ); //} //BOOST_AUTO_TEST_CASE(lapack_getrf2){ //// https://www.ibm.com/support/knowledgecenter/SSFHY8_6.2/reference/am5gr_hsgetrf.html -// multi::array A = { -// { 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, -// { 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0 }, -// { 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, -// { 0.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0 }, -// { 0.0, 0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0 }, -// { 0.0, 0.0, 0.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, -// { 0.0, 0.0, 0.0, 0.0, 8.0, 1.0, 1.0, 1.0, 1.0 }, -// { 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 1.0, 1.0, 1.0 }, -// { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 11.0, 12.0 } -// }; - -// multi::array P({9}, 0.); -// lapack::context ctxt; -// auto const& LU = multi::lapack::getrf(ctxt, A, P); - -// BOOST_REQUIRE( LU.size() == A.size() ); - -// for(int i = 0; i != 9; ++i){ -// for(int j = 0; j != 9; ++j){ -// std::cout<<'\t'<< LU[i][j] <<','; -// } -// std::cout< A = { +// { 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, +// { 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0 }, +// { 4.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, +// { 0.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0 }, +// { 0.0, 0.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0 }, +// { 0.0, 0.0, 0.0, 7.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, +// { 0.0, 0.0, 0.0, 0.0, 8.0, 1.0, 1.0, 1.0, 1.0 }, +// { 0.0, 0.0, 0.0, 0.0, 0.0, 9.0, 1.0, 1.0, 1.0 }, +// { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 11.0, 12.0 } +// }; + +// multi::array P({9}, 0.); +// lapack::context ctxt; +// auto const& LU = multi::lapack::getrf(ctxt, A, P); + +// BOOST_REQUIRE( LU.size() == A.size() ); + +// for(int i = 0; i != 9; ++i){ +// for(int j = 0; j != 9; ++j){ +// std::cout<<'\t'<< LU[i][j] <<','; +// } +// std::cout< + +#include "../../blas/gemm.hpp" +#include "../../blas/herk.hpp" + +#include "../../lapack/potrf.hpp" + +#include +#include + +namespace multi = boost::multi; +// namespace lapack = multi::lapack; +namespace blas = multi::blas; + +using complex = std::complex; + +auto operator<<(std::ostream& os, std::complex const& cx) -> std::ostream& { + return os << real(cx) << " + I*" << imag(cx); +} + +template auto print(M const& arr) -> decltype(auto) { return print(arr, ""); } +template auto print(M const& arr, std::string const& msg) -> decltype(auto) { + using multi::size; + using std::cout; + cout << msg << "\n" + << '{'; + for(int i = 0; i != size(arr); ++i) { + cout << '{'; + for(auto j : arr[i].extension()) { // NOLINT(altera-unroll-loops) + cout << arr[i][j]; + if(j + 1 != size(arr[i])) { + cout << ", "; + } + } + cout << '}' << '\n'; + if(i + 1 != size(arr)) { + cout << ", "; + } + } + return cout << '}' << '\n'; +} +template auto print(M const& arr, char const* msg) -> decltype(auto) { return print(arr, std::string{msg}); } // NOLINT(fuchsia-default-arguments-calls) + +template +auto randomize(M&& arr) -> M&& { + std::random_device dev; + std::mt19937 eng{dev()}; + + auto gen = [&]() { + auto unif = std::uniform_real_distribution<>{-1.0, 1.0}; + return std::complex(unif(eng), unif(eng)); + }; + + std::for_each(begin(arr), end(arr), [&](auto&& row) { std::generate(begin(row), end(row), gen); }); + return std::forward(arr); +} + +/* +BOOST_AUTO_TEST_CASE(orthogonalization_over_rows, *boost::unit_test::tolerance(0.00001)){ + auto A = randomize(multi::array({3, 10})); + lapack::onrm(A); + + using blas::herk; + using blas::hermitized; + using blas::filling; + auto id = herk(filling::upper, A); + BOOST_TEST( real(id[1][1]) == 1.0 ); BOOST_TEST( imag(id[1][1]) == 0.0 ); + BOOST_TEST( real(id[1][2]) == 0.0 ); BOOST_TEST( imag(id[1][2]) == 0.0 ); +} +*/ + +// BOOST_AUTO_TEST_CASE(orthogonalization_over_rows_cuda, *boost::unit_test::tolerance(0.00001)) { +// auto Acpu = randomize(multi::array({3, 10})); + +// multi::cuda::array A = Acpu; + +// using namespace blas; +// using namespace lapack; + +// trsm(filling::lower, hermitized(potrf(filling::upper, herk(filling::upper, A))), A); + +// Acpu = A; +// auto id = herk(filling::upper, Acpu); +// BOOST_TEST( real(id[1][1]) == 1.0 ); +// BOOST_TEST( imag(id[1][1]) == 0.0 ); +// BOOST_TEST( real(id[1][2]) == 0.0 ); +// BOOST_TEST( imag(id[1][2]) == 0.0 ); +// } + +/* +BOOST_AUTO_TEST_CASE(orthogonalization_over_columns, *boost::unit_test::tolerance(0.00001)){ + + auto A = randomize( multi::array({10, 3}) ); + using blas::hermitized; + lapack::onrm(hermitized(A)); + + using blas::filling; + auto id = herk(filling::upper, hermitized(A)); + BOOST_TEST( real(id[1][1]) == 1. ); BOOST_TEST( imag(id[1][1]) == 0. ); + BOOST_TEST( real(id[1][2]) == 0. ); BOOST_TEST( imag(id[1][2]) == 0. ); +}*/ + +BOOST_AUTO_TEST_CASE(numericalalgorithmsgroup_define_both_sides, *boost::unit_test::tolerance(0.0000001)) { + double const nan = std::numeric_limits::quiet_NaN(); + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + multi::array const A_gold = { + {3.23 + 0.00 * I, 1.51 - 1.92 * I, 1.90 + 0.84 * I, 0.42 + 2.50 * I}, + {1.51 + 1.92 * I, 3.58 + 0.00 * I, -0.23 + 1.11 * I, -1.18 + 1.37 * I}, + {1.90 - 0.84 * I, -0.23 - 1.11 * I, 4.09 + 0.00 * I, 2.33 - 0.14 * I}, + {0.42 - 2.50 * I, -1.18 - 1.37 * I, 2.33 + 0.14 * I, 4.29 + 0.00 * I}, + }; + + auto A = A_gold; // NOLINT(readability-identifier-length) lapack conventional name + + auto const As = multi::lapack::potrf(multi::lapack::filling::upper, A).size(); + BOOST_REQUIRE( As == A.size() ); + + auto AA = A; + + for(auto i = 0; i != 4; ++i) { + for(auto j = 0; j != i; ++j) { // NOLINT(altera-unroll-loops) + AA[i][j] = 0.0; + } + } + + auto const C = +blas::herk(1.0, blas::H(AA)); // +blas::gemm(1.0, blas::H(AA), AA); // NOLINT(readability-identifier-length) conventional lapack name + + for(auto i = 0; i != 4; ++i) { + for(auto j = 0; j != 4; ++j) { + BOOST_TEST( real(A_gold[i][j]) == real(C[i][j]) ); + BOOST_TEST( imag(A_gold[i][j]) == imag(C[i][j]) ); + } + } +} + +BOOST_AUTO_TEST_CASE(numericalalgorithmsgroup_define_upper, *boost::unit_test::tolerance(0.0000001)) { + double const nan = std::numeric_limits::quiet_NaN(); + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + multi::array const A_gold = { + {3.23 + 0.00 * I, 1.51 - 1.92 * I, 1.90 + 0.84 * I, 0.42 + 2.50 * I}, + { nan + nan * I, 3.58 + 0.00 * I, -0.23 + 1.11 * I, -1.18 + 1.37 * I}, + { nan - nan * I, nan - nan * I, 4.09 + 0.00 * I, 2.33 - 0.14 * I}, + { nan - nan * I, nan - nan * I, nan + nan * I, 4.29 + 0.00 * I}, + }; + + auto A = A_gold; // NOLINT(readability-identifier-length) lapack conventional name + + auto const As = multi::lapack::potrf(multi::lapack::filling::upper, A).size(); + + BOOST_REQUIRE( As == A.size() ); + + auto AA = A; + + for(auto i = 0; i != 4; ++i) { + for(auto j = 0; j != i; ++j) { // NOLINT(altera-unroll-loops) + AA[i][j] = 0.0; + } + } + + auto const C = +blas::herk(1.0, blas::H(AA)); // +blas::gemm(1.0, blas::H(AA), AA); // NOLINT(readability-identifier-length) conventional lapack name + + print(A_gold, "A gold"); // NOLINT(fuchsia-default-arguments-calls) + print(C, "recover"); // NOLINT(fuchsia-default-arguments-calls) + + for(auto i = 0; i != 4; ++i) { + for(auto j = i; j != 4; ++j) { // NOLINT(altera-id-dependent-backward-branch) // only compare upper part of the reference array (the other half is garbage) + BOOST_TEST( real(A_gold[i][j]) == real(C[i][j]) ); + BOOST_TEST( imag(A_gold[i][j]) == imag(C[i][j]) ); + } + } +} + +BOOST_AUTO_TEST_CASE(numericalalgorithmsgroup_trivial_imperfect, *boost::unit_test::tolerance(0.0000001)) { // NOLINT(fuchsia-default-arguments-calls) + double const nan = std::numeric_limits::quiet_NaN(); + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + multi::array const A_gold = { + {3.23 + 0.00 * I, 1.51 - 1.92 * I, 1.90 + 0.84 * I, 0.42 + 2.50 * I}, + { nan + nan * I, 3.58 + 0.00 * I, -0.23 + 1.11 * I, -1.18 + 1.37 * I}, + { nan - nan * I, nan - nan * I, -10000.00 + 0.00 * I, 0.00 - 0.00 * I}, + { nan - nan * I, nan - nan * I, nan + nan * I, -1000.00 + 0.00 * I}, + }; + + auto A = A_gold; // NOLINT(readability-identifier-length) lapack conventional name + + auto const& Adec = multi::lapack::potrf(multi::lapack::filling::upper, A); + + print(A, "A"); + print(Adec, "A dec"); + + auto AA = +Adec; + + for(auto i = 0; i != AA.size(); ++i) { // NOLINT(altera-id-dependent-backward-branch) + for(auto j = 0; j != i; ++j) { // NOLINT(altera-unroll-loops) + AA[i][j] = 0.0; + } + } + + auto const C = +blas::herk(1.0, blas::H(AA)); // +blas::gemm(1.0, blas::H(AA), AA); // NOLINT(readability-identifier-length) conventional lapack name + + print(A_gold, "A gold"); // NOLINT(fuchsia-default-arguments-calls) + print(C, "recover"); // NOLINT(fuchsia-default-arguments-calls) + + for(auto i = 0; i != AA.size(); ++i) { // NOLINT(altera-id-dependent-backward-branch) + for(auto j = i; j != std::get<1>(C.sizes()); ++j) { // only compare upper part of the reference array (the other half is garbage) // NOLINT(altera-id-dependent-backward-branch) + BOOST_TEST( real(A_gold[i][j]) == real(C[i][j]) ); + BOOST_TEST( imag(A_gold[i][j]) == imag(C[i][j]) ); + } + } +} + +BOOST_AUTO_TEST_CASE(numericalalgorithmsgroup_nontrivial_imperfect, *boost::unit_test::tolerance(0.0000001)) { // NOLINT(fuchsia-default-arguments-calls) + double const nan = std::numeric_limits::quiet_NaN(); + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + multi::array const A_gold = { + {1.00 + 0.00 * I, 0.00 - 0.00 * I, 0.00 + 0.00 * I, 0.00 + 0.00 * I}, + { nan + nan * I, 1.00 + 0.00 * I, 0.00 + 0.00 * I, 0.00 + 0.00 * I}, + { nan - nan * I, nan - nan * I, -1.00 + 0.00 * I, 0.00 - 0.00 * I}, + { nan - nan * I, nan - nan * I, nan + nan * I, -1.00 + 0.00 * I}, + }; + + auto A = A_gold; // NOLINT(readability-identifier-length) lapack conventional name + + auto const& Adec = multi::lapack::potrf(multi::lapack::filling::upper, A); + + print(A, "A"); + print(Adec, "A dec"); + + auto AA = +Adec; + + for(auto i = 0; i != AA.size(); ++i) { // NOLINT(altera-id-dependent-backward-branch) + for(auto j = 0; j != i; ++j) { // NOLINT(altera-unroll-loops) + AA[i][j] = 0.0; + } + } + + auto const C = +blas::herk(1.0, blas::H(AA)); // +blas::gemm(1.0, blas::H(AA), AA); // NOLINT(readability-identifier-length) conventional lapack name + + print(A_gold, "A gold"); // NOLINT(fuchsia-default-arguments-calls) + print(C, "recover"); // NOLINT(fuchsia-default-arguments-calls) + + for(auto i = 0; i != AA.size(); ++i) { // NOLINT(altera-id-dependent-backward-branch) + for(auto j = i; j != std::get<1>(C.sizes()); ++j) { // only compare upper part of the reference array (the other half is garbage) // NOLINT(altera-id-dependent-backward-branch) + BOOST_TEST( real(A_gold[i][j]) == real(C[i][j]) ); + BOOST_TEST( imag(A_gold[i][j]) == imag(C[i][j]) ); + } + } +} + +BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001)) { + double const nan = std::numeric_limits::quiet_NaN(); + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) + + { + // NOLINTNEXTLINE(readability-identifier-length) + multi::array A = { + {167.413 + 0.0 * I, 126.804 - 0.00143505 * I, 125.114 - 0.1485590 * I}, + { nan + nan * I, 167.381 + 0.0 * I, 126.746 + 0.0327519 * I}, + { nan + nan * I, nan + nan * I, 167.231 + 0.0 * I}, + }; + + print(A, "original A"); + using boost::multi::lapack::filling; + using boost::multi::lapack::potrf; + + auto const As = potrf(filling::upper, A).size(); // A is hermitic in upper triangular (implicit below) + BOOST_REQUIRE( As == A.size() ); + + BOOST_TEST( real(A[1][2]) == 3.78646 ); + BOOST_TEST( imag(A[1][2]) == 0.0170734 ); + // BOOST_TEST( A[2][1] != A[2][1] ); + print(A, "decomposition"); + + multi::array C(A.extensions(), complex{0.0, 0.0}); // NOLINT(readability-identifier-length) conventional lapack name + + multi::array AA = A; + + auto const [is, js] = AA.extensions(); + for(auto i : is) { + for(auto j = 0; j != i; ++j) { // NOLINT(altera-unroll-loops,altera-id-dependent-backward-branch) + AA[i][j] = std::conj(A[j][i]); + } + } + + blas::gemm(complex{1.0, 0.0}, blas::H(AA), AA, complex{0.0, 0.0}, C); + + print(C, "recovery"); + } + // { + // multi::cuda::managed::array A = { + // {167.413, 126.804 - 0.00143505 * I, 125.114 - 0.1485590 * I}, + // { NAN, 167.381, 126.746 + 0.0327519 * I}, + // { NAN, NAN, 167.231}, + // }; + // using lapack::filling; + // using lapack::potrf; + // potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) + // BOOST_TEST( real(A[1][2]) == 3.78646 ); + // BOOST_TEST( imag(A[1][2]) == 0.0170734 ); + // // BOOST_TEST( A[2][1] != A[2][1] ); + // } + // { + // multi::cuda::array A = { + // {167.413, 126.804 - 0.00143505 * I, 125.114 - 0.1485590 * I}, + // { NAN, 167.381, 126.746 + 0.0327519 * I}, + // { NAN, NAN, 167.231}, + // }; + // using lapack::filling; + // using lapack::potrf; + // potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) + // multi::array A_copy = A; + // print(A_copy); + // } +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/syev.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/syev.cpp similarity index 94% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/syev.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/syev.cpp index c862f2a319..8780320423 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/syev.cpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/test/syev.cpp @@ -1,7 +1,4 @@ -#ifdef COMPILATION_INSTRUCTIONS -`#nvcc -x cu --expt-relaxed-constexpr`$CXX -D_TEST_MULTI_ADAPTORS_LAPACK_SYEV $0 -o $0x `pkg-config --libs blas lapack` -lboost_unit_test_framework -lcudart -lcusolver&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 +// © Alfredo A. Correa 2020-2024 #define BOOST_TEST_MODULE "C++ Unit Tests for Multi lapack adaptor syev" #define BOOST_TEST_DYN_LINK @@ -10,8 +7,10 @@ #include "../../lapack/syev.hpp" #include "../../../array.hpp" -#include "../../../adaptors/cuda.hpp" -#include "../../lapack/cuda.hpp" +// #include "multi/adaptors/thrust.hpp" +// #include "../../lapack/cuda.hpp" + +namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ { @@ -26,6 +25,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { multi::cuda::managed::array A = { {167.413, 126.804, 125.114}, @@ -50,6 +50,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { multi::array A = { {167.413, 126.804, 125.114}, @@ -61,6 +62,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { multi::cuda::array A = { {167.413, 126.804, 125.114}, @@ -83,6 +85,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { multi::array A = { {167.413, 126.804, 125.114}, @@ -94,6 +97,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { multi::cuda::array A = { {167.413, 126.804, 125.114}, @@ -116,6 +120,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { namespace lapack = multi::lapack; multi::array A = { @@ -127,6 +132,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { namespace lapack = multi::lapack; multi::cuda::array A = { @@ -149,6 +155,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { namespace lapack = multi::lapack; multi::array A = { @@ -160,6 +167,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { namespace lapack = multi::lapack; multi::cuda::array A = { @@ -182,6 +190,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { multi::array const A = { {167.413, 126.804, 125.114}, @@ -195,6 +204,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( A_copy[2][1] == -0.579092 ); BOOST_TEST( W[1] == 42.2081 ); } +#if 0 { multi::cuda::array const A = { {167.413, 126.804, 125.114}, @@ -221,6 +231,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(A_copy[2][1]) == -0.579092 ); BOOST_TEST( double(W[1]) == 42.2081 ); } +#endif { multi::array A = { {167.413, 126.804, 0.}, @@ -245,6 +256,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( size(A_ref)==3 ); BOOST_TEST( W[0]==0. ); } +#if 0 { multi::cuda::array A = { {1. , 1., 1.}, @@ -259,9 +271,9 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ } { multi::cuda::managed::array A = { - {1. , 1., 1.}, - {NAN, 2 , 1.}, - {NAN, NAN, 1.} + {1.0, 1.0, 1.0}, + {NAN, 2.0, 1.0}, + {NAN, NAN, 1.0} }; multi::cuda::managed::array W(size(A)); namespace lapack = multi::lapack; @@ -269,36 +281,40 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( size(A_ref)==3 ); BOOST_TEST( double(W[0])==0. ); } +#endif { - multi::array A = {{5.}}; + multi::array A = {{5.0}}; multi::array W(size(A)); namespace lapack = multi::lapack; lapack::syev(lapack::filling::upper, A, W); BOOST_TEST( A[0][0] == 1. ); BOOST_TEST( W[0]==5. ); } +#if 0 { - multi::cuda::array A = {{5.}}; + multi::cuda::array A = {{5.0}}; multi::cuda::array W(size(A)); namespace lapack = multi::lapack; lapack::syev(lapack::filling::upper, A, W); - BOOST_TEST( A[0][0] == 1. ); - BOOST_TEST( W[0]==5. ); + BOOST_TEST( A[0][0] == 1.0 ); + BOOST_TEST( W[0]==5.0 ); } { - multi::cuda::managed::array A = {{5.}}; + multi::cuda::managed::array A = {{5.0}}; multi::cuda::managed::array W(size(A)); namespace lapack = multi::lapack; lapack::syev(lapack::filling::upper, A, W); - BOOST_TEST( A[0][0] == 1. ); - BOOST_TEST( W[0]==5. ); + BOOST_TEST( A[0][0] == 1.0 ); + BOOST_TEST( W[0]==5.0 ); } +#endif { multi::array A; multi::array W(size(A)); namespace lapack = multi::lapack; lapack::syev(lapack::filling::upper, A, W); } +#if 0 { multi::cuda::array A; multi::cuda::array W(size(A)); @@ -311,6 +327,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ namespace lapack = multi::lapack; lapack::syev(lapack::filling::upper, A, W); } +#endif { multi::array const A = { {167.413, 126.804, 125.114}, @@ -324,6 +341,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( sys.eigenvectors[2][1] == -0.579092 ); BOOST_TEST( sys.eigenvalues[1] == 42.2081 ); } +#if 0 { multi::cuda::array const A = { {167.413, 126.804, 125.114}, @@ -350,7 +368,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( double(sys.eigenvectors[2][1]) == -0.579092 ); BOOST_TEST( double(sys.eigenvalues[1]) == 42.2081 ); } -#if __cpp_structured_bindings +#endif { multi::array const A = { {167.413, 126.804, 125.114}, @@ -364,6 +382,7 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ BOOST_TEST( eigenvecs[2][1] == -0.579092 ); BOOST_TEST( eigenvals[1] == 42.2081 ); } +#if 0 { multi::cuda::array const A = { {167.413, 126.804, 125.114}, @@ -392,5 +411,3 @@ BOOST_AUTO_TEST_CASE(multi_lapack_syev, *boost::unit_test::tolerance(0.00001) ){ } #endif } - - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/triangular.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/triangular.hpp similarity index 78% rename from external_codes/boost_multi/multi/include/multi/adaptors/lapack/triangular.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/triangular.hpp index b248f117e4..050d432d66 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/triangular.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/lapack/triangular.hpp @@ -1,22 +1,28 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo "#include\""$0"\"" > $0x.cpp) && clang++ `#-DNDEBUG` -O3 -std=c++14 -Wall -Wextra -Wpedantic -D_TEST_MULTI_ADAPTORS_LAPACK_TRIANGULAR -DADD_ $0x.cpp -o $0x.x -lblas && time $0x.x $@ && rm -f $0x.x $0x.cpp; exit -#endif +// Copyright 2019-2024 Alfredo A. Correa + #ifndef MULTI_ADAPTORS_LAPACK_TRIANGULAR_HPP #define MULTI_ADAPTORS_LAPACK_TRIANGULAR_HPP -// Alfredo A. Correa 2019 © #include "../../../multi/array.hpp" +namespace boost::multi::lapack { + enum class filling : char { + lower = 'U', + upper = 'L', + } +} + namespace boost{ namespace multi{ namespace lapack{ template struct uhermitian : public multi::array{ -// using multi::array::array; +// using multi::array::array; template< class MultiArray, - typename = decltype(multi::array{std::forward(std::declval())}) + typename = decltype(multi::array{std::forward(std::declval())}), + std::enable_if_t, int> =0 > explicit uhermitian(MultiArray&& ma) : multi::array{std::forward(ma)}{} template decltype(auto) operator[](Index i) const{ diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/rangev3.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/rangev3_.hpp similarity index 57% rename from external_codes/boost_multi/multi/include/multi/adaptors/rangev3.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/rangev3_.hpp index 071c1f94f4..df5b7c65d9 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/rangev3.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/rangev3_.hpp @@ -8,8 +8,8 @@ #include -#ifndef MULTI_ADAPTORS_RANGEV3_HPP -#define MULTI_ADAPTORS_RANGEV3_HPP +#ifndef BOOST_MULTI_ADAPTORS_RANGEV3_HPP +#define BOOST_MULTI_ADAPTORS_RANGEV3_HPP namespace ranges{namespace v3{ namespace concepts{ // needed for later version of rangesv3 @@ -17,17 +17,17 @@ namespace concepts{ // needed for later version of rangesv3 #if 0 template struct common_reference< - boost::multi::basic_array&&, + boost::multi::subarray&&, MA& >{ - using type = boost::multi::basic_array&&; + using type = boost::multi::subarray&&; }; template struct common_reference< MA&, - boost::multi::basic_array&& + boost::multi::subarray&& >{ - using type = boost::multi::basic_array&&; + using type = boost::multi::subarray&&; }; #endif } @@ -47,5 +47,4 @@ int main(){ } #endif -#endif - +#endif // BOOST_MULTI_ADAPTORS_RANGEV3_HPP \ No newline at end of file diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/tblis.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis.hpp similarity index 81% rename from external_codes/boost_multi/multi/include/multi/adaptors/tblis.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis.hpp index 3ce1c7ae8a..ea8ae41265 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/tblis.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis.hpp @@ -1,7 +1,7 @@ #ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- $CXX -std=c++17 -O0 -I/home/correaa/include/tblis -I/home/correaa/tblis/src/external/tci -L/home/correaa/lib -Wl,-rpath=/home/correaa/lib -ltblis $0 -o $0x&&$0x&&rm $0x;exit #endif -// © Alfredo A. Correa 2021 +// Copyright 2021-2024 Alfredo A. Correa #include "tblis/tblis.h" @@ -103,17 +103,17 @@ namespace μνσ{ -template auto init_matrix = std::enable_if_t{}; -template<> auto init_matrix = ::tblis::tblis_init_matrix_s; -template<> auto init_matrix = ::tblis::tblis_init_matrix_d; -template<> auto init_matrix> = ::tblis::tblis_init_matrix_c; -template<> auto init_matrix> = ::tblis::tblis_init_matrix_z; +template auto const init_matrix = std::enable_if_t{}; +template<> auto const init_matrix = ::tblis::tblis_init_matrix_s; +template<> auto const init_matrix = ::tblis::tblis_init_matrix_d; +template<> auto const init_matrix> = ::tblis::tblis_init_matrix_c; +template<> auto const init_matrix> = ::tblis::tblis_init_matrix_z; -template auto init_tensor = std::enable_if_t{}; -template<> auto init_tensor = ::tblis::tblis_init_tensor_s; -template<> auto init_tensor = ::tblis::tblis_init_tensor_d; -template<> auto init_tensor> = ::tblis::tblis_init_tensor_c; -template<> auto init_tensor> = ::tblis::tblis_init_tensor_z; +template auto const init_tensor = std::enable_if_t{}; +template<> auto const init_tensor = ::tblis::tblis_init_tensor_s; +template<> auto const init_tensor = ::tblis::tblis_init_tensor_d; +template<> auto const init_tensor> = ::tblis::tblis_init_tensor_c; +template<> auto const init_tensor> = ::tblis::tblis_init_tensor_z; template struct indexed_tensor; @@ -124,13 +124,14 @@ struct tensor : ::tblis::tblis_tensor{ std::array<::tblis::len_type , D> lens_; std::array<::tblis::stride_type, D> strides_; template>{}, int> =0> - explicit tensor(A&& a) : + explicit tensor(A&& a) : // NOLINT(bugprone-forwarding-reference-overload) workaround for DeepSource lens_ (std::apply([](auto... s){return std::array<::tblis::len_type , D>{s...};}, sizes (a))), - strides_(std::apply([](auto... s){return std::array<::tblis::stride_type, D>{s...};}, strides(a))){ + strides_(std::apply([](auto... s){return std::array<::tblis::stride_type, D>{s...};}, strides(a))) + { tblis::init_tensor>(this, D, lens_.data(), const_cast*>(base(a)), strides_.data()); } tensor(tensor const&) = delete; - tensor(tensor&& other) : lens_{other.lens_}, strides_{other.strides_}{ + tensor(tensor&& other) noexcept : lens_{other.lens_}, strides_{other.strides_}{ tblis::init_tensor>(this, D, lens_.data(), const_cast*>(other.data()), strides_.data()); } using dimensionality_type = multi::dimensionality_type; @@ -155,7 +156,7 @@ struct indexed_tensor{ tensor tensor_; std::string indices_; indexed_tensor(tensor&& t, std::string indices) : tensor_(std::move(t)), indices_{std::move(indices)}{} - indexed_tensor(indexed_tensor&& other) = default; + indexed_tensor(indexed_tensor&& other) noexcept = default; tensor& tensor_part()&{return tensor_;} std::string indices() const{return indices_;} }; @@ -188,16 +189,17 @@ auto mult(ITensorA&& aijk, ITensorB&& bijk, ITensorC&& cijk) template struct matrix : ::tblis::tblis_matrix{ public: - template, std::decay_t>{}, int> =0> - matrix(A&& a){ + matrix(matrix const&) = delete; + matrix(matrix&&) noexcept = default; + +// template matrix(matrix const& other) : ::tblis::tblis_matrix + template>>> > + matrix(A&& a) { // NOLINT(bugprone-forwarding-reference-overload) workaround for DeepSource init_matrix(this, std::get<0>(a.sizes()), std::get<1>(a.sizes()), const_cast(a.base()), std::get<0>(a.strides()), std::get<1>(a.strides()) ); } -// template matrix(matrix const& other) : ::tblis::tblis_matrix - matrix(matrix const&) = delete; - matrix(matrix&&) = default; }; template::element_ptr> matrix(A&&)->matrix::element_type>; diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/tblis/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis/test/CMakeLists.txt similarity index 97% rename from external_codes/boost_multi/multi/include/multi/adaptors/tblis/test/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis/test/CMakeLists.txt index ae370d26e8..7874678de3 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/tblis/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis/test/CMakeLists.txt @@ -109,5 +109,5 @@ foreach(TEST_FILE ${TEST_SRCS}) /W4> ) endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) + add_test(NAME ${TEST_EXE} COMMAND $) endforeach() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/tblis/test/tensor.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis/test/tensor.cpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/tblis/test/tensor.cpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/tblis/test/tensor.cpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust.hpp new file mode 100644 index 0000000000..e6482df433 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust.hpp @@ -0,0 +1,258 @@ +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_THRUST_HPP_ +#define BOOST_MULTI_ADAPTORS_THRUST_HPP_ +#pragma once + +#include "../array.hpp" + +#include "./thrust/reference.hpp" + +#include + +#include +#include + +#if not defined(MULTI_USE_HIP) +#include // for ::thrust::cuda::allocator +#else +#include // for ::thrust::hip::allocator +#endif +// #include // ::thrust::cuda::allocator + +// #include + +#include // std::copy + +#include +#include + +// // begin of nvcc trhust 11.5 workaround : https://github.com/NVIDIA/thrust/issues/1629 +// namespace thrust { + +// template class pointer; +// template struct pointer_traits; + +// } // end namespace thrust + +// namespace std { + +// template struct pointer_traits> +// : thrust::detail::pointer_traits> { +// template +// using rebind = typename thrust::detail::pointer_traits>::template rebind::other; +// }; + +// } // end namespace std +// // end of nvcc thrust 11.5 workaround + +#if not defined(MULTI_USE_HIP) +#define HICUP cuda +#define HICUP_(NAME) cuda ## NAME +#else +#define HICUP hip +#define HICUP_(NAME) hip ## NAME +#endif + +namespace boost::multi { + +template +struct pointer_traits<::thrust::pointer> : std::pointer_traits<::thrust::pointer> { + using default_allocator_type = ::thrust::universal_allocator>; +}; + +} // end namespace boost::multi + +namespace boost::multi { + +template +struct allocator_traits<::thrust::mr::stateless_resource_allocator> +: std::allocator_traits<::thrust::mr::stateless_resource_allocator> { + private: + using Alloc = ::thrust::mr::stateless_resource_allocator; + using base = std::allocator_traits; + + public: + using typename base::pointer; + using typename base::size_type; + using typename base::const_void_pointer; + + using base::allocate; + [[nodiscard]] static constexpr auto allocate(Alloc& a, size_type n, const_void_pointer hint) -> pointer { + auto ret = allocator_traits::allocate(a, n); + if(not hint) { + prefetch_to_device(ret, n*sizeof(TT), get_current_device()); + return ret; + } + prefetch_to_device(ret, n*sizeof(TT), get_device(hint)); + return ret; + } + + private: + using device_index = int; + static auto get_current_device() -> device_index { + int device; + switch(HICUP_(GetDevice)(&device)) { + case HICUP_(Success) : break; + case HICUP_(ErrorInvalidValue): assert(0); + default: assert(0); + } + return device; + } + static void prefetch_to_device(const_void_pointer p, size_type byte_count, device_index d) { + switch(HICUP_(MemPrefetchAsync)(raw_pointer_cast(p), byte_count, d)) { + case HICUP_(Success) : break; + case HICUP_(ErrorInvalidValue) : assert(0); break; + case HICUP_(ErrorInvalidDevice): assert(0); break; + default: assert(0); + } + } + + static auto get_device(const_void_pointer p) -> device_index { + #if defined(__HIPCC__) + hipPointerAttribute_t attr{}; + #else // #if defined(__NVCC__) + cudaPointerAttributes attr{}; + #endif + switch(HICUP_(PointerGetAttributes)(&attr, raw_pointer_cast(p))) { + case HICUP_(Success): break; + case HICUP_(ErrorInvalidDevice): assert(0); break; + case HICUP_(ErrorInvalidValue): assert(0); break; + default: assert(0); // 71 enumeration values not handled in switch: 'hipErrorOutOfMemory', 'hipErrorNotInitialized', 'hipErrorDeinitialized'... + } + assert(attr.type == HICUP_(MemoryTypeManaged)); + return attr.device; + } +}; + +} // end namespace ::boost::multi + +// this is important for algorithms to dispatch to the right thrust executor +namespace thrust { + +template struct iterator_system; + +template +struct iterator_system>{ + using type = typename ::thrust::iterator_system::element_ptr>::type; +}; + +template +struct iterator_system> { + using type = typename ::thrust::iterator_system::pointer>::type; +}; + +// namespace detail { +// template +// struct pointer_traits< +// boost::multi::basic_array_ptr< +// boost::multi::subarray, thrust::use_default>, LO>, +// LO +// > +// > +// { +// using Ptr = boost::multi::basic_array_ptr< +// boost::multi::subarray, thrust::use_default>, LO>, +// LO +// >; +// using pointer = Ptr; +// using reference = thrust::tagged_reference; +// typedef typename pointer_element::type element_type; +// typedef typename pointer_difference::type difference_type; + +// template +// struct rebind +// { +// typedef typename rebind_pointer::type other; +// }; + +// // __host__ __device__ +// // inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) +// // { +// // // XXX this is supposed to be pointer::pointer_to(&r); (i.e., call a static member function of pointer called pointer_to) +// // // assume that pointer has a constructor from raw pointer instead + +// // return pointer(&r); +// // } + +// // thrust additions follow +// //typedef typename pointer_raw_pointer::type raw_pointer; +// using raw_pointer = boost::multi::basic_array_ptr< +// boost::multi::subarray, +// LO +// >; + +// __host__ __device__ +// inline static raw_pointer get(pointer ptr) +// { +// return reinterpret_cast(ptr); // return ptr.get(); +// } +// }; +// } + +} // end namespace ::thrust + +namespace boost::multi { +namespace thrust { + +// defines multi::thrust::device_array +// defines multi::thrust::host_array + +template> using device_array = multi::array; +template> using universal_array = multi::array; +template> using host_array = multi::array; + +// defines multi::thrust::device::array +// defines multi::thrust::host ::array +namespace device {template using array = device_array ;} // end namespace device +namespace universal {template using array = universal_array;} // end namespace universal +namespace host {template using array = host_array ;} // end namespace host + +// defines multi::thrust::cuda::array +// defines multi::thrust::cuda::managed::array +namespace cuda { + template using array = multi::array>; + + // namespace managed { + // template using array = multi::array>; + // } // end namespace managed +} // end namespace cuda + +namespace mr {template using array = array>;} +namespace pmr { + template using array = mr::array>; + template using universal_array = pmr::array>; +} // end namespace pmr + +namespace cuda { + +template using universal_array = multi::array >; + +namespace universal { + template using array = multi::thrust::cuda::universal_array; +} + +namespace pmr { + template using universal_array = ::boost::multi::thrust::pmr::array>; +} // end namespace pmr +} // end namespace cuda + + +} // end namespace thrust +} // end namespace boost::multi + +namespace boost::multi { + +template +constexpr auto default_allocator_of(::thrust::pointer /*unused*/) { + return ::thrust::HICUP::universal_allocator>::value_type>{}; +} + +} + +#undef HICUP +#undef HICUP_ + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/CMakeLists.txt new file mode 100644 index 0000000000..a732d26d9a --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.11) +project( + boost-multi-adaptor-cuda + VERSION 0.1 + LANGUAGES CXX +) + +find_package(Boost REQUIRED COMPONENTS unit_test_framework) + +if(ENABLE_CUDA OR DEFINED CXXCUDA) + enable_language(CUDA) + + enable_testing() + + add_subdirectory(test) +endif() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/algorithms.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/algorithms.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/algorithms.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/algorithms.hpp diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/allocator_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/allocator_traits.hpp similarity index 99% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/allocator_traits.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/allocator_traits.hpp index 9e01fa62ef..86c97a669e 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/allocator_traits.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/allocator_traits.hpp @@ -17,7 +17,7 @@ namespace thrust { namespace boost { namespace multi { -namespace memor y{ +namespace memory { template //template<> diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/complex.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/complex.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/complex.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/complex.hpp diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/managed.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/managed.hpp similarity index 88% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/managed.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/managed.hpp index 09a96251c9..e0be2472e4 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/managed.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/managed.hpp @@ -1,5 +1,9 @@ -#pragma once +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2020-2023 Alfredo A. Correa +#ifndef MULTI_ADAPTORS_THRUST_CUDA_MANAGED_HPP_ +#define MULTI_ADAPTORS_THRUST_CUDA_MANAGED_HPP_ +#pragma once #include "../../cuda/runtime/error.hpp" @@ -38,10 +42,14 @@ class pointer { using difference_type = typename ::thrust::iterator_traits<::thrust::cuda::pointer>::difference_type; using value_type = typename ::thrust::iterator_traits<::thrust::cuda::pointer>::value_type; -#pragma push -#pragma nv_diag_suppress = class_and_member_name_conflict // for nvcc warning: declaration of a member with the same name as its class TODO(correaa) switch to new stype pragma for nvcc and nvc++ +#pragma nv_diagnostic push +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diag_suppress = class_and_member_name_conflict // #280 for nvcc warning: declaration of a member with the same name as its class TODO(correaa) switch to new stype pragma for nvcc and nvc++ +#else +#pragma diag_suppress = class_and_member_name_conflict // #280 for nvcc 11.4 +#endif using pointer = pointer; -#pragma pop +#pragma nv_diagnostic pop using reference = managed::reference; using iterator_category = typename ::thrust::iterator_traits<::thrust::cuda::pointer>::iterator_category; @@ -68,8 +76,9 @@ class pointer { struct bad_alloc : std::bad_alloc{}; template -class allocator{// : cuda::allocator{ +class allocator { // : cuda::allocator{ static_assert( std::is_same>{}, "!" ); + public: using value_type = T; using pointer = managed::pointer; @@ -128,7 +137,7 @@ class allocator{// : cuda::allocator{ }} -#if not __INCLUDE_LEVEL__ +#if 0 #include #include @@ -145,3 +154,4 @@ int main(){ } #endif +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/CMakeLists.txt similarity index 89% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/CMakeLists.txt rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/CMakeLists.txt index 2e7025bfc8..48e04e69c8 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/CMakeLists.txt @@ -18,7 +18,7 @@ foreach(TEST_FILE ${TEST_SRCS}) add_executable(${TEST_EXE} ${TEST_FILE}) if(ENABLE_CUDA OR DEFINED CXXCUDA) set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - target_compile_options(${TEST_EXE} PRIVATE -std=c++17 --expt-relaxed-constexpr) + # target_compile_options(${TEST_EXE} PRIVATE -std=c++17 --expt-relaxed-constexpr) endif() # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") @@ -26,9 +26,11 @@ foreach(TEST_FILE ${TEST_SRCS}) # target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + target_link_libraries(${TEST_EXE} PRIVATE multi) + # if(NOT ENABLE_CUDA) target_compile_options (${TEST_EXE} PRIVATE -Werror -Wall -Wextra -fno-common $<$: -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion # $<$,$>: # -Wpedantic -Wmove> $<$: -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses # -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846 > $<$: /W4>) endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) + add_test(NAME ${TEST_EXE} COMMAND $) endforeach() diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/managed.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/managed.cu new file mode 100644 index 0000000000..42be7660e0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/cuda/test/managed.cu @@ -0,0 +1,144 @@ +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust" +#define BOOST_TEST_DYN_LINK +#include + +#include "../../cuda/managed.hpp" + +namespace multi = boost::multi; + +void set_one(double* p){ + *p = 1.; +} + +void set_two_gpu(thrust::cuda::pointer p){ + *p = 2.; +} + +void set_three_ref(double& p){ + p = 3.; +} + +template::value_type, class = std::enable_if_t{} and std::is_convertible>{}> > +void some_fun(Pointer p){} + +template::value_type, class = std::enable_if_t{} and std::is_convertible{}> > +void some_other_fun(Pointer p){} + +template class prio : std::conditional_t, std::false_type>{}; + +template::value_type, std::enable_if_t{} and std::is_convertible>{}, int> =0> +int overload_aux(Pointer p, prio<0>){return 0;} + +template::value_type, std::enable_if_t{} and std::is_convertible{}, int> =0> +int overload_aux(Pointer p, prio<1>){return 1;} + +template int overload(Pointer p){return overload_aux(p, prio<1>{});} + +BOOST_AUTO_TEST_CASE(vector){ + + multi::thrust::cuda::managed::allocator alloc; + multi::thrust::cuda::managed::pointer p = alloc.allocate(100); + + p[17] = 3.; + BOOST_TEST_REQUIRE( p[17] == 3. ); + + set_one(p); + BOOST_TEST_REQUIRE( p[0] == 1. ); + + set_two_gpu(p); + BOOST_TEST_REQUIRE( p[0] == 2. ); + + set_three_ref( p[1] ); + BOOST_TEST_REQUIRE( p[1] == 3. ); + + some_fun(p); + + BOOST_TEST_REQUIRE(overload(p) == 1); + + alloc.deallocate(p, 100); + +} + +BOOST_AUTO_TEST_CASE(vector) +{ + static_assert(std::is_same>::element_type, double>{}, "!"); + cuda::allocator calloc; + cuda::ptr p = calloc.allocate(100); + cuda::ptr v = p; + cuda::ptr vc{v}; + v = const_pointer_cast(vc); + assert( vc == v ); + std::pointer_traits::rebind pc = p; // cuda::ptr pc = p; + assert( pc == p ); + using cuda::const_pointer_cast; + auto end = p + 100; + auto rbegin = std::make_reverse_iterator(end); + auto rend = std::make_reverse_iterator(p); + std::transform(rbegin, rend, rbegin, [](auto&& e){return std::forward(e) + 99.;}); + assert( p[11] == 99. ); + p[33] = 123.; + p[99] = 321.; +// p[33] += 1; + add_one(p[33]); + double p33 = p[33]; + assert( p33 == 124. ); + assert( p[33] == 124. ); + assert( p[33] == p[33] ); + swap(p[33], p[99]); + assert( p[99] == 124. ); + assert( p[33] == 321. ); + std::cout << p[33] << std::endl; + calloc.deallocate(p, 100); + + multi::array> arr2(multi::array::extensions_type{100l}, 999.); + + assert(size(arr2) == 100); +} + +#ifdef COMPILATION_INSTRUCTIONS +nvcc -ccbin cuda-c++ -std=c++14 $0 -o $0x && $0x && rm -f $0x; exit +#endif + +#include "../../../../multi/array.hpp" +#include "../../../../multi/detail/stack_allocator.hpp" +#include "../../../../multi/detail/cuda/allocator.hpp" + +#include + +namespace multi = boost::multi; +namespace cuda = multi::detail::memory::cuda; + +using std::cout; + +int main(){ + { + std::size_t stack_size = 4000; + multi::stack_buffer> buf{stack_size}; + for(int i = 0; i != 3; ++i){ + cout<<"pass "<< i << std::endl; + { + multi::array>> A({2, 10}, &buf); + multi::array>> B({3, 10}, &buf); + multi::array>> C({4, 10}, &buf); + for(int j = 0; j != 100; ++j) + multi::array>> D({4, 10}, &buf); + B[1][1] = 33.; + B[2][2] = 33.; + assert( B[1][1] == B[2][2] ); + } + cout + <<" size: "<< buf.size() + <<"\n hits: "<< buf.hits() + <<"\n misses "<< buf.misses() + <<"\n allocated(bytes) "<< buf.allocated_bytes() + <<"\n deallocated(bytes) "<< buf.deallocated_bytes() + <<"\n max_needed(bytes) "<< buf.max_needed() + <<"\n stack recovered(bytes) " << buf.stack_recovered() + << std::endl + ; + assert( buf.allocated_bytes() == buf.deallocated_bytes() ); + if(buf.max_needed() > buf.size()) buf.reset(buf.max_needed()); + } + } + assert( cuda::allocation_counter::n_allocations == 1 ); +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/fix_complex_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_complex_traits_.hpp similarity index 72% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/fix_complex_traits.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_complex_traits_.hpp index 38986623e3..39030ccce3 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/fix_complex_traits.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_complex_traits_.hpp @@ -1,9 +1,9 @@ // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa +// Copyright 2022-2023 Alfredo A. Correa #pragma once -#include "../../detail/type_traits.hpp" +#include #include @@ -14,6 +14,9 @@ namespace multi { #pragma message "By including this header, the behavior of initialization of thrust::complex in multi::array's changes. ::thrust::complex elements will not be initialized." #endif +template +inline constexpr bool force_element_trivial_default_construction<::thrust::complex> = std::is_trivially_default_constructible_v; + template struct is_trivially_default_constructible<::thrust::complex> : std::is_trivially_default_constructible {}; static_assert(is_trivially_default_constructible<::thrust::complex>::value); diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_copy.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_copy.hpp new file mode 100644 index 0000000000..edc4f87f3f --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_copy.hpp @@ -0,0 +1,160 @@ +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_THRUST_FIX_COPY_HPP_ +#define BOOST_MULTI_ADAPTORS_THRUST_FIX_COPY_HPP_ +#pragma once + +namespace boost::multi { + +#if 0 +template +auto copy_n( + boost::multi::elements_iterator_t< Q1* , L1> first, Size count, + boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first +)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { + if constexpr(std::is_trivially_assignable{}) { + if constexpr(L1::dimensionality == 1 and L2::dimensionality == 1) { + if(first.layout().stride() == 1 and d_first.layout().stride() == 1) { + auto s = hipMemcpy (raw_pointer_cast(d_first.current()), first.current(), sizeof(Q2)* static_cast(count), hipMemcpyHostToDevice); assert( s == hipSuccess ); + } else { + auto s = hipMemcpy2D(raw_pointer_cast(d_first.current()), static_cast(d_first.layout().stride())*sizeof(Q2), first.current(), static_cast(first.layout().stride())*sizeof(Q2), sizeof(Q2), static_cast(count), hipMemcpyHostToDevice); assert( s == hipSuccess ); + } + return d_first + count; + } else if constexpr(L1::dimensionality == 2 and L1::dimensionality == 2) { + if(std::get<1>(first.layout().strides()) == 1 and std::get<1>(d_first.layout().strides()) == 1 and count%std::get<1>(first.layout().sizes()) == 0) { + {auto s = hipMemcpy2D(raw_pointer_cast(d_first.current()), static_cast(d_first.layout().stride())*sizeof(Q2), first.current(), static_cast(first.layout().stride())*sizeof(Q2), static_cast(std::get<1>(first.layout().sizes()))*sizeof(Q2), static_cast(count/std::get<1>(first.layout().sizes())), hipMemcpyHostToDevice); assert( s == cudaSuccess );} + return d_first + count; + } // else fallthrough + } + {auto r = hipHostRegister( + const_cast(static_cast(first.base())), + static_cast (first.layout().hull_size()*sizeof(Q1)), + hipHostRegisterPortable + ); assert( r == hipSuccess );} + auto ret = ::thrust::copy_n( + ::thrust::hip::par, + first, count, d_first + ); + {auto r = hipHostUnregister( + const_cast(static_cast(first.base())) + ); assert( r == hipSuccess );} + return ret; + } else { + return ::thrust::copy_n(first, count, d_first); + } + return d_first + count; +} + +template +auto copy_n( + boost::multi::elements_iterator_t<::thrust::pointer, L1> first, Size count, + boost::multi::elements_iterator_t< Q2* , L2> d_first +)-> boost::multi::elements_iterator_t< Q2* , L2> { + if constexpr(std::is_trivially_assignable{}) { + if constexpr(L1::dimensionality == 1 and L2::dimensionality == 1) { + if(first.layout().stride() == 1 and d_first.layout().stride() == 1) { + auto s = hipMemcpy ( d_first.current() , raw_pointer_cast(first.current()), sizeof(Q2)* static_cast(count), hipMemcpyDeviceToHost); assert( s == cudaSuccess ); + } else { + auto s = hipMemcpy2D( d_first.current() , static_cast(d_first.layout().stride())*sizeof(Q2), raw_pointer_cast(first.current()), static_cast(first.layout().stride())*sizeof(Q2), sizeof(Q2), static_cast(count), hipMemcpyDeviceToHost); assert( s == cudaSuccess ); + } + return d_first + count; + } else if constexpr(L1::dimensionality == 2 and L1::dimensionality == 2) { + if(std::get<1>(first.layout().strides()) == 1 and std::get<1>(d_first.layout().strides()) == 1 and count%std::get<1>(first.layout().sizes()) == 0) { + auto s = hipMemcpy2D( d_first.current() , static_cast(d_first.layout().stride())*sizeof(Q2), raw_pointer_cast(first.current()), static_cast(first.layout().stride())*sizeof(Q2), static_cast(std::get<1>(first.layout().sizes()))*sizeof(Q2), static_cast(count/std::get<1>(first.layout().sizes())), hipMemcpyDeviceToHost); assert( s == cudaSuccess ); + return d_first + count; + } + } + {auto r = hipHostRegister( + const_cast(static_cast(d_first.base())), + static_cast (d_first.layout().hull_size()*sizeof(Q1)), + hipHostRegisterPortable + ); assert( r == hipSuccess );} + auto ret = ::thrust::copy_n( + ::thrust::hip::par, + first, count, d_first + ); + {auto r = hipHostUnregister( + const_cast(static_cast(d_first.base())) + ); assert( r == hipSuccess );} + return ret; + } else { + return ::thrust::copy_n(first, count, d_first); + } + return d_first + count; +} + +template +auto uninitialized_copy_n( + boost::multi::elements_iterator_t< Q1* , L1> first, Size count, + boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first +)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { + if constexpr(std::is_trivially_assignable{}) { + return boost::multi::copy_n(first, count, d_first); + } else { + return ::thrust::uninitialized_copy_n(first, count, d_first); + } +} + +template +auto uninitialized_copy_n( + boost::multi::elements_iterator_t<::thrust::pointer, L1> first, Size count, + boost::multi::elements_iterator_t< Q2* , L2> d_first +)-> boost::multi::elements_iterator_t< Q2* , L2> { + if constexpr(std::is_trivially_assignable{}) { + return boost::multi::copy_n(first, count, d_first); + } else { + return ::thrust::uninitialized_copy_n(first, count, d_first); + } +} + +template +auto copy( + boost::multi::elements_iterator_t< Q1* , L1> first, + boost::multi::elements_iterator_t< Q1* , L1> last , + boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first +)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { + return boost::multi::copy_n(first, last - first, d_first); +} + +template +auto copy( + boost::multi::elements_iterator_t<::thrust::pointer, L1> first, + boost::multi::elements_iterator_t<::thrust::pointer, L1> last , + boost::multi::elements_iterator_t< Q2* , L2> d_first +)-> boost::multi::elements_iterator_t< Q2* , L2> { + return boost::multi::copy_n(first, last - first, d_first); +} + +template +auto uninitialized_copy( + boost::multi::elements_iterator_t< Q1* , L1> first, + boost::multi::elements_iterator_t< Q1* , L1> last , + boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first +)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { + if constexpr(std::is_trivially_assignable{}) { + return boost::multi::copy(first, last, d_first); + } else { + return ::thrust::uninitialized_copy(first, last, d_first); + } +} + +template +auto uninitialized_copy( + boost::multi::elements_iterator_t<::thrust::pointer, L1> first, + boost::multi::elements_iterator_t<::thrust::pointer, L1> last , + boost::multi::elements_iterator_t< Q2* , L2> d_first +)-> boost::multi::elements_iterator_t< Q2* , L2> { + if constexpr(std::is_trivially_assignable{}) { + return boost::multi::copy(first, last, d_first); + } else { + return ::thrust::uninitialized_copy(first, last, d_first); + } +} + +#endif + +} // end namespace boost::multi + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_pointer_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_pointer_traits.hpp new file mode 100644 index 0000000000..59981db301 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/fix_pointer_traits.hpp @@ -0,0 +1,29 @@ +// Copyright 2021-2023 Alfredo A. Correa + +#ifndef MULTI_ADAPTORS_THRUST_FIX_POINTER_TRAITS_HPP_ +#define MULTI_ADAPTORS_THRUST_FIX_POINTER_TRAITS_HPP_ +#pragma once + +#include + +#include // for std::pointer_traits + +// #include + +// begin of nvcc trhust 11.5 workaround : https://github.com/NVIDIA/thrust/issues/1629 +namespace thrust { + +template class pointer; +template struct pointer_traits; + +} // end namespace thrust + +template +struct std::pointer_traits<::thrust::pointer> // NOLINT(cert-dcl58-cpp) normal way to specialize pointer_traits +: ::thrust::detail::pointer_traits> { + template + using rebind = typename ::thrust::detail::pointer_traits<::thrust::pointer>::template rebind::other; +}; + +// end of nvcc thrust 11.5 workaround +#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/managed_allocator.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/managed_allocator.hpp similarity index 73% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/managed_allocator.hpp rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/managed_allocator.hpp index ae3fb45b08..37108fde2c 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/managed_allocator.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/managed_allocator.hpp @@ -32,7 +32,7 @@ class managed_allocator { static_assert( std::is_same>{}, "!" ); public: -// using allocator_type = managed_allocator; +// using allocator_type = managed_allocator; using value_type = T; using pointer = ::thrust::cuda::pointer; using size_type = ::size_t; // as specified by CudaMalloc @@ -59,38 +59,38 @@ class managed_allocator { if(!ret) throw bad_alloc{}; return pointer{ret}; } - pointer allocate(size_type n, const_void_pointer hint) { - MULTI_MARK_SCOPE("thrust::managed_allocate"); + [[deprecated]] pointer allocate(size_type n, const_void_pointer hint) { + // MULTI_MARK_SCOPE("thrust::managed_allocate"); auto const ret = allocate(n); // if(not hint){ - // if(cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), /*device*/ 0) != cudaSuccess) throw std::runtime_error{"cannot prefetch"}; - // return ret; + // if(cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), /*device*/ 0) != cudaSuccess) throw std::runtime_error{"cannot prefetch"}; + // return ret; // } // cudaPointerAttributes attr; if(cudaPointerGetAttributes(&attr, raw_pointer_cast(hint))!=cudaSuccess) throw std::runtime_error{"cannot use attributes for hint"}; // switch(attr.type){ - // case cudaMemoryTypeUnregistered:{//std::cout<< n <<" cudaMemoryTypeUnregistered"<< attr.device <<" "<< attr.device <<" cpuid:"<< cudaCpuDeviceId <>(p)); + [[deprecated]] void deallocate(pointer p, size_type) { + // MULTI_MARK_SCOPE("thrust::managed_deallocate"); + // cuda::managed::free(static_cast>(p)); cudaFree(raw_pointer_cast(p)); } template diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/reference.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/reference.hpp new file mode 100644 index 0000000000..a25c7e437e --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/reference.hpp @@ -0,0 +1,42 @@ +// Copyright 2022-2024 Alfredo A. Correa + +#pragma once + +#include // hipthrust needs this + +#if defined(__NVCC__) +#define BOOST_MULTI_HD_ __host__ __device__ +#else +#define BOOST_MULTI_HD_ +#endif + +namespace thrust { + + // TODO(correaa) consider restrict this for universal memory only + template + BOOST_MULTI_HD_ constexpr auto operator*( + thrust::tagged_reference const& r1, + thrust::tagged_reference const& r2 + ) + ->decltype(thrust::raw_reference_cast(r1) * thrust::raw_reference_cast(r2)) { + return thrust::raw_reference_cast(r1) * thrust::raw_reference_cast(r2); } + + template + BOOST_MULTI_HD_ constexpr auto operator*( + thrust::tagged_reference const& r1, + T2 const& r2 + ) + ->decltype(thrust::raw_reference_cast(r1) * r2) { + return thrust::raw_reference_cast(r1) * r2; } + + template + BOOST_MULTI_HD_ constexpr auto operator*( + T1 const& r1, + thrust::tagged_reference const& r2 + ) + ->decltype(r2 * thrust::raw_reference_cast(r1)) { + return r2 * thrust::raw_reference_cast(r1); } + +} + +#undef BOOST_MULTI_HD_ diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/CMakeLists.txt new file mode 100644 index 0000000000..f550c43dca --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.18) +project( + boost-multi-adaptor-cuda-thrust-test + VERSION 0.1 + LANGUAGES CXX CUDA +) + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) +endif() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda --Werror=cross-execution-space-call -Xcudafe \"--display_error_number\"") + +find_package(Boost REQUIRED COMPONENTS unit_test_framework) + +enable_testing() + +find_program(MEMORYCHECK_COMMAND valgrind) +set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --error-exitcode=1") +include(CTest) + +find_package(Boost COMPONENTS unit_test_framework timer) +if(NOT Boost_FOUND) + message(ERROR " Cannot find Boost.Timer. Try\n sudo apt install libboost-timer-dev # in Debian/Ubuntu") +endif() + +set(TEST_SRCS + array.cu + memory_resource.cu + set_identity_kernel.cu + speed.cu + speed_algo.cu + universal.cu + vector.cu +) + +foreach(TEST_FILE ${TEST_SRCS}) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + if(ENABLE_CUDA OR DEFINED CXXCUDA) + set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + set_property(TARGET ${TEST_EXE} PROPERTY CUDA_STANDARD 17) + endif() + + target_include_directories(${TEST_EXE} PUBLIC ../../../../../include) + + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") + target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) + target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) + target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) + target_link_libraries(${TEST_EXE} PRIVATE multi) + target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) + if(ENABLE_CUDA) + add_test(NAME ${TEST_EXE} COMMAND $) + endif() +endforeach() + +set_tests_properties(speed.cu.x PROPERTIES RUN_SERIAL TRUE) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/array.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/array.cu similarity index 83% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/array.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/array.cu index 63d6ce24a0..1d61708b71 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/array.cu +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/array.cu @@ -1,50 +1,108 @@ -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust" -#include +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#include "../../../adaptors/thrust/fix_complex_traits.hpp" -#include "../../../detail/fix_complex_traits.hpp" +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust" +#include +#include #include -#include "../../../adaptors/thrust.hpp" +#include +#include +#include #include #include #include #include -//#include "../../../memory/adaptors/cuda/allocator.hpp" -#include "../../../memory/adaptors/cuda/cached/allocator.hpp" -#include "../../../memory/adaptors/cuda/managed/allocator.hpp" - -#include - -#include - #include namespace multi = boost::multi; +#ifdef __NVCC__ +template<> +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = true; +template<> +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = true; +template<> +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = true; +template<> +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = true; +#else // vvv nvcc (12.1?) doesn't support this kind of customization: "error: expected initializer before ‘<’" +template +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::std::complex> = std::is_trivially_default_constructible::value; +template +inline constexpr bool ::boost::multi::force_element_trivial_default_construction<::thrust::complex> = std::is_trivially_default_constructible::value; +#endif + namespace { template using test_allocator = // multi ::memory::cuda::allocator -// multi ::memory::cuda::managed::allocator//, std::integral_constant > -// multi ::memory::cuda::cached::allocator > - multi::thrust::cuda::managed_allocator -// thrust::cuda::allocator +// multi ::memory::cuda::managed::allocator//, std::integral_constant > +// multi ::memory::cuda::cached::allocator > +// multi::thrust::cuda::managed_allocator + thrust::cuda::allocator ; } using types_list = boost::mpl::list< - //char, + // char, double, - //std::complex, + // std::complex, thrust::complex >; -#if 1 +BOOST_AUTO_TEST_CASE(dummy_test) {} + +BOOST_AUTO_TEST_CASE(cuda_universal_empty) { + using complex = std::complex; + multi::array> A; + multi::array> B = A; + BOOST_REQUIRE( A.is_empty() ); + BOOST_REQUIRE( B.is_empty() ); + BOOST_REQUIRE( A == B ); +} + +BOOST_AUTO_TEST_CASE(cuda_allocators) { + + multi::array > A1(200, 0.0); + + BOOST_REQUIRE( size(A1) == 200 ); + A1[100] = 1.0; + + multi::array> const B1(200, 2.0); + BOOST_REQUIRE( B1[10] == 2.0 ); + + A1[10] = B1[10]; + BOOST_REQUIRE( A1[10] == 2.0 ); +} + + +BOOST_AUTO_TEST_CASE(test_univ_alloc) { + multi::array > Dev({128, 128}); + *raw_pointer_cast(Dev.base()) = 99.0; +} + +BOOST_AUTO_TEST_CASE(mtc_universal_array) { + multi::thrust::cuda::universal_array Dev({128, 128}); + *raw_pointer_cast(Dev.base()) = 99.0; +} + +BOOST_AUTO_TEST_CASE(mtc_universal_coloncolon_array) { + multi::thrust::cuda::universal::array Dev({128, 128}); + *raw_pointer_cast(Dev.base()) = 99.0; +} + +BOOST_AUTO_TEST_CASE(test_alloc) { + multi::array > Dev({128, 128}); + // *raw_pointer_cast(Dev.base()) = 99.0; // segmentation fault (correct behavior) +} + +#ifdef NDEBUG BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_copy_1D_issue123, T, types_list) { // BOOST_AUTO_TEST_CASE(fdfdfdsfds) { using T = char; static_assert( multi::is_trivially_default_constructible{}, "!"); static_assert( std::is_trivially_copy_constructible{} , "!"); @@ -163,6 +221,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_copy_1D_issue123, T, types_list) { // BOOS } BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_cpugpu_2D_issue123, T, types_list) { +// BOOST_AUTO_TEST_CASE(thrust_cpugpu_2D_issue123) { using T = double; auto const exts = multi::extensions_t<2>({10240, 10240}); @@ -278,7 +337,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_cpugpu_2D_issue123, T, types_list) { } BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_cpugpu_issue123_3D, T, types_list) { - +// BOOST_AUTO_TEST_CASE(thrust_cpugpu_issue123_3D) { using T = double; auto const exts = multi::extensions_t<3>({1024, 1024, 100}); std::cout<<"| 3D `"<< typeid(T).name() <<"` max data size "<< exts.num_elements()*sizeof(T) / 1073741824. <<" GB | speed |\n|---|---|"< C({2, 3}); +// multi::thrust::cuda::array C({2, 3}); -// C[0][0] = 0. ; -// C[1][1] = 11.; -// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// C[0][0] = 0. ; +// C[1][1] = 11.; +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); //} //{ -// multi::array const H = { -// {00., 01., 02.}, -// {10., 11., 12.}, -// }; - -// BOOST_TEST_REQUIRE( H[1][1] == 11. ); - -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); - -// thrust::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); -// BOOST_TEST_REQUIRE( C[1][1] == 11. ); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); - -// std::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); -// BOOST_TEST_REQUIRE( C[1][1] == 11. ); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); - -// std::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); -// BOOST_TEST_REQUIRE( C[1][1] == 11. ); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); - -// thrust::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); -// BOOST_TEST_REQUIRE( C[1][1] == 11. ); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.extensions() == H.extensions() ); -// thrust::copy_n(H.begin(), H.size(), C.begin()); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// BOOST_REQUIRE( C.extensions() == H.extensions() ); -// std::copy_n(H.begin(), H.size(), C.begin()); -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C(H.extensions()); -// C = H; -// BOOST_REQUIRE( C == H ); -// } -// { -// multi::thrust::cuda::array C = H; -// BOOST_REQUIRE( C == H ); -// } +// multi::array const H = { +// {00., 01., 02.}, +// {10., 11., 12.}, +// }; + +// BOOST_TEST_REQUIRE( H[1][1] == 11. ); + +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// thrust::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// std::copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// std::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.num_elements() == H.num_elements() ); + +// thrust::uninitialized_copy_n(H.data_elements(), H.num_elements(), C.data_elements()); +// BOOST_TEST_REQUIRE( C[1][1] == 11. ); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.extensions() == H.extensions() ); +// thrust::copy_n(H.begin(), H.size(), C.begin()); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// BOOST_REQUIRE( C.extensions() == H.extensions() ); +// std::copy_n(H.begin(), H.size(), C.begin()); +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C(H.extensions()); +// C = H; +// BOOST_REQUIRE( C == H ); +// } +// { +// multi::thrust::cuda::array C = H; +// BOOST_REQUIRE( C == H ); +// } //} } #endif - -// Local Variables: -// mode: c++ -// End: diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/device_vector.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/device_vector.cu similarity index 100% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/device_vector.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/device_vector.cu diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/memory_resource.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/memory_resource.cu new file mode 100644 index 0000000000..b58159e817 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/memory_resource.cu @@ -0,0 +1,327 @@ +// Copyright 2022-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust memory resource" +#include + +#include +#include + +#include // for cuda_pointer + +#include +#include // for thrust::mr::disjoint_unsynchronized_pool_resource +#include // for thrust::mr::tls_disjoint_pool +#include // for thrust::mr::unsynchronized_pool_resource + +#include + +#include +#include + +namespace multi = boost::multi; + +template +void do_stuff_with_array(typename MultiArray::allocator_type alloc) { + MultiArray arr1({5, 10}, 99., alloc); + + BOOST_REQUIRE( arr1[3][7] == 99. ); + + MultiArray arr2(alloc); + + arr2 = arr1; + + arr1.swap(arr2); + + arr1.clear(); + arr1.reextent({20, 30}); + BOOST_REQUIRE(arr1.num_elements() == 600); +} + +BOOST_AUTO_TEST_CASE(thrust_host_memory_resource) { + + thrust::mr::new_delete_resource memres; + + { + using Alloc = thrust::mr::allocator; + Alloc alloc(&memres); + + do_stuff_with_array>(alloc); + } + + { + // virtual calls will be issued - wrapping in a polymorphic wrapper + thrust::mr::polymorphic_adaptor_resource adaptor(&memres); + using Alloc = thrust::mr::polymorphic_allocator; + Alloc alloc(&adaptor); + + do_stuff_with_array>(alloc); + } + + { + using Pool = thrust::mr::unsynchronized_pool_resource; + auto pool = Pool{&memres}; + { + using Alloc = thrust::mr::allocator; + auto alloc = Alloc{&pool}; + + do_stuff_with_array>(alloc); + } + } +} + +template void what(T&&) = delete; + +BOOST_AUTO_TEST_CASE(thrust_device_memory_resource) { + { + // use the global device_ptr-flavored device memory resource + using Resource = thrust::device_ptr_memory_resource; + thrust::mr::polymorphic_adaptor_resource> adaptor( + thrust::mr::get_global_resource() + ); + using Alloc = thrust::mr::polymorphic_allocator>; + Alloc alloc(&adaptor); + + do_stuff_with_array>(alloc); + + multi::array arr({10, 10}, &adaptor); + } + + thrust::mr::new_delete_resource memres; + + using Pool = thrust::mr::unsynchronized_pool_resource; + Pool pool(&memres); + { + typedef thrust::mr::allocator Alloc; + Alloc alloc(&pool); + + do_stuff_with_array>(alloc); + } + + using DisjointPool = thrust::mr::disjoint_unsynchronized_pool_resource< + thrust::mr::new_delete_resource, + thrust::mr::new_delete_resource + >; + + DisjointPool disjoint_pool(&memres, &memres); + { + typedef thrust::mr::allocator Alloc; + Alloc alloc(&disjoint_pool); + + do_stuff_with_array>(alloc); + } +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource) { + { + // use the global device_ptr-flavored device memory resource + thrust::mr::polymorphic_adaptor_resource> adaptor( + thrust::mr::get_global_resource() + ); + using Alloc = thrust::mr::polymorphic_allocator>; + Alloc alloc(&adaptor); + + do_stuff_with_array>(alloc); + + multi::array arr({10, 10}, &adaptor); + } +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource_global_resource_b) { + // use the global device_ptr-flavored device memory resource + auto adaptor = thrust::mr::polymorphic_adaptor_resource>{thrust::mr::get_global_resource()}; + multi::array>> arr({10, 10}, &adaptor); +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource_global_resource_c) { + // use the global device_ptr-flavored device memory resource + auto adaptor = thrust::mr::disjoint_unsynchronized_pool_resource + ( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() + ); + + using Alloc = thrust::mr::allocator>; + + multi::thrust::mr::array< + int, 2, + thrust::mr::disjoint_unsynchronized_pool_resource< + thrust::system::cuda::universal_memory_resource, + thrust::mr::new_delete_resource + > + > arr({10, 10}, &adaptor); +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource_global_resource_d) { + // use the global device_ptr-flavored device memory resource + auto adaptor = thrust::mr::disjoint_unsynchronized_pool_resource + ( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() + ); + + multi::thrust::mr::array< + int, 2, + thrust::mr::memory_resource> + > arr({10, 10}, &adaptor); +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource_global_resource_e) { + // use the global device_ptr-flavored device memory resource + auto res = thrust::mr::disjoint_unsynchronized_pool_resource( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() + ); + + multi::thrust::pmr::array> arr({10, 10}, &res); +} + +BOOST_AUTO_TEST_CASE(thrust_universal_memory_resource_global_resource_f) { + // use the global device_ptr-flavored device memory resource + auto res = thrust::mr::disjoint_unsynchronized_pool_resource( + thrust::mr::get_global_resource(), + thrust::mr::get_global_resource() + ); + +// multi::thrust::pmr::array> arr({10, 10}, &res); + multi::thrust::cuda::pmr::universal_array arr({10, 10}, &res); +} + +#if 1 +template +inline __attribute__((always_inline)) void DoNotOptimize(Tp const& value) { + asm volatile("" : : "r,m"(value) : "memory"); +} + +template +inline __attribute__((always_inline)) void DoNotOptimize(Tp& value) { +#if defined(__clang__) + asm volatile("" : "+r,m"(value) : : "memory"); +#else + asm volatile("" : "+m,r"(value) : : "memory"); +#endif +} + +BOOST_AUTO_TEST_CASE(thrust_benchmark) { + + auto count = 50; + + { + auto tick = std::chrono::high_resolution_clock::now(); + + for(int64_t i = 0; i != count; ++i) { + multi::thrust::universal_array arr({1000 - i%10, 1000 + i%10}); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "normal arrays " << time.count() <(); + + for(int64_t i = 0; i != count; ++i) { + multi::thrust::cuda::pmr::universal_array arr({1000 - i%10, 1000 + i%10}, &res); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "default resource " << time.count() <(), + thrust::mr::get_global_resource() + ); + + for(int64_t i = 0; i != count; ++i) { + multi::thrust::cuda::pmr::universal_array arr({1000 - i%10, 1000 + i%10}, &res); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "polymorphic pool resource " << time.count() <(), + thrust::mr::get_global_resource() + ); + + for(int64_t i = 0; i != count; ++i) { + multi::thrust::mr::array arr({1000 - i%10, 1000 + i%10}, &res); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "static pool resource " << time.count() <(), + thrust::mr::get_global_resource() + ); + + for(int64_t i = 0; i != count; ++i) { + multi::array > arr({1000 - i%10, 1000 + i%10}, &res); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "2 static pool resource " << time.count() <>> +// = std::pmr::polymorphic_allocator +> +struct caching_allocator : Base_ { + caching_allocator() : Base_{ + &thrust::mr::tls_disjoint_pool(thrust::mr::get_global_resource(), thrust::mr::get_global_resource()) + // & tls_pool (std::pmr::new_delete_resource()) + } {} + caching_allocator(caching_allocator const&) : caching_allocator{} {} + template struct rebind {using other = caching_allocator;}; +}; + +BOOST_AUTO_TEST_CASE(thrust_benchmark_contd) { + + auto count = 50; + + { + auto tick = std::chrono::high_resolution_clock::now(); + + for(int64_t i = 0; i != count; ++i) { + multi::array> arr({1000 - i%10, 1000 + i%10}); + // auto arr2 = arr; + // arr2 = arr; + // arr2 = std::move(arr); + // DoNotOptimize(arr2); + DoNotOptimize(arr); + } + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + std::cout<< "caching allocator " << time.count() < @@ -27,18 +26,16 @@ BOOST_AUTO_TEST_CASE(multi_cuda_mngd_ptr){ f(p); } -template void what(T&&) = delete; - -BOOST_AUTO_TEST_CASE(multi_cuda_mngd_ptr_call_gemm){ - using complex = std::complex; complex const I{0, 1}; +BOOST_AUTO_TEST_CASE(const multi_cuda_mngd_ptr_call_gemm){ + using complex = std::complex; complex const I{0.0, 1.0}; boost::multi::cuda::managed::array m = { - { 1. + 2.*I, 3. - 3.*I, 1.-9.*I}, - { 9. + 1.*I, 7. + 4.*I, 1.-8.*I}, + { 1.0 + 2.0*I, 3.0 - 3.0*I, 1.0 - 9.0*I}, + { 9.0 + 1.0*I, 7.0 + 4.0*I, 1.0 - 8.0*I}, }; boost::multi::cuda::managed::array const b = { - { 11.+1.*I, 12.+1.*I, 4.+1.*I, 8.-2.*I}, - { 7.+8.*I, 19.-2.*I, 2.+1.*I, 7.+1.*I}, - { 5.+1.*I, 3.-1.*I, 3.+8.*I, 1.+1.*I} + { 11.0 + 1.0*I, 12.0 + 1.0*I, 4.0 + 1.0*I, 8.0 - 2.0*I}, + { 7.0 + 8.0*I, 19.0 - 2.0*I, 2.0 + 1.0*I, 7.0 + 1.0*I}, + { 5.0 + 1.0*I, 3.0 - 1.0*I, 3.0 + 8.0*I, 1.0 + 1.0*I}, }; // { // blas::context ctxt; @@ -57,10 +54,10 @@ BOOST_AUTO_TEST_CASE(multi_cuda_mngd_ptr_call_gemm){ // } { multi::cuda::cublas::context ctxt; - auto c =+ blas::gemm(&ctxt, 1., m, b); + auto c =+ blas::gemm(&ctxt, 1.0, m, b); static_assert( std::is_same>{} ); BOOST_REQUIRE( c[1][2] == complex(112, 12) ); - BOOST_REQUIRE( b[1][2] == 2.+1.*I ); + BOOST_REQUIRE( b[1][2] == 2.0 + 1.0*I ); } // { // auto c =+ blas::gemm(1., m, b); diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/overlap_kernel_2d.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/overlap_kernel_2d.cu similarity index 100% rename from external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/overlap_kernel_2d.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/overlap_kernel_2d.cu diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/saxpy_kernel.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/saxpy_kernel.cu similarity index 100% rename from external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/saxpy_kernel.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/saxpy_kernel.cu diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/set_identity_kernel.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/set_identity_kernel.cu new file mode 100644 index 0000000000..aba9b37266 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/set_identity_kernel.cu @@ -0,0 +1,90 @@ +#include +#include + +#include +#include + +namespace multi = boost::multi; + +#define CUDA_CHECKED(ans) { cudaAssert((ans), __FILE__, __LINE__); } +inline void cudaAssert(cudaError_t code, const char *file, int line, bool abort=true) { + if (code != cudaSuccess) { + std::cerr<<"error: "<< cudaGetErrorString(code) <<" "<< file <<":"<< line < +__global__ void kernel_setIdentity(Array2DCursor home, int m, int n) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + int j = threadIdx.y + blockDim.y * blockIdx.y; + if ((i < m) && (j < n)) { + if (i == j) { + home[i][j] = 1.0; + } else { + home[i][j] = 0.0; + } + } +} + +template +auto set_identity(Array2D&& arr) -> Array2D&&{ + int xblock_dim = 16; + auto [m, n] = arr.sizes(); + int xgrid_dim = (m + xblock_dim - 1) / xblock_dim; + int ygrid_dim = (n + xblock_dim - 1) / xblock_dim; + dim3 block_dim(xblock_dim, xblock_dim); + dim3 grid_dim(xgrid_dim, ygrid_dim); + kernel_setIdentity<<>>(arr.home(), m, n); + CUDA_CHECKED(cudaGetLastError()); + // CUDA_CHECKED(cudaDeviceSynchronize()); + return std::forward(arr); +} + +int main() { + using T = thrust::complex; + + { + multi::array> A({10000, 10000}); + auto const size = A.num_elements()*sizeof(T)/1e9; + std::cout<<"size is "<< size << "GB\n"; + + auto start_time = std::chrono::high_resolution_clock::now(); + + thrust::fill(A.elements().begin(), A.elements().end(), 0.0); + thrust::fill(A.diagonal().begin(), A.diagonal().end(), 1.0); + + REQUIRE( A[0][0] == 1.0 ); + REQUIRE( A[1][1] == 1.0 ); + REQUIRE( A[2][1] == 0.0 ); + + auto time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size/time.count(); + std::cout<<"algorithm rate = "<< rate <<" GB/s (ratio = 1)\n"; + } + + { + multi::array> A({10000, 10000}); + auto const size = A.num_elements()*sizeof(T)/1e9; + std::cout<<"size is "<< size << "GB\n"; + + auto start_time = std::chrono::high_resolution_clock::now(); + set_identity(A); + + REQUIRE( A[0][0] == 1.0 ); + REQUIRE( A[1][1] == 1.0 ); + REQUIRE( A[2][1] == 0.0 ); + + auto time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size/time.count(); + std::cout<<"kernel rate = "<< rate <<" GB/s (ratio = 1)\n"; + } +} diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed.cu new file mode 100644 index 0000000000..c00ab46cbc --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed.cu @@ -0,0 +1,250 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust universal copy and assignment" +#include + +#include + +#include + +#include + +#include + +namespace multi = boost::multi; + +template<> +inline constexpr bool multi::force_element_trivial_default_construction> = true; + +template<> +inline constexpr bool multi::force_element_trivial_default_construction> = true; + +// template<> +// inline constexpr bool multi::force_element_trivial_default_construction> = true; + +// template<> +// inline constexpr bool multi::force_element_trivial_default_construction> = true; + +using test_types = boost::mpl::list< + char, unsigned, int, + ::thrust::complex, // std::complex, + ::thrust::complex, // std::complex, + double, float +>; + +BOOST_AUTO_TEST_CASE(warmup) { + using T = double; + + auto const n = 8000; + + multi::array> src({n, n}); + multi::array> dst(extensions(src)); + + auto const threshold = 0.30; + + auto const size = src.num_elements() * sizeof(T) / 1e9; + + auto const dummy = std::invoke([&] { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + // std::cout<<"memcpy rate = "<< rate <<" GB/s (ratio = 1)\n"; + return rate; + }); + + auto const memcpy_rate = std::invoke([&] { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + // std::cout<<"memcpy rate = "<< rate <<" GB/s (ratio = 1)\n"; + return rate; + }); + + { // cctor + auto tick = std::chrono::high_resolution_clock::now(); + + auto dst2 = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + // std::cout<<"cctor rate = "<< rate <<" GB/s (ratio = "<< ratio <<")\n"; + if(ratio >= threshold) { + std::cout << "x"; + } + } + { // assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + // std::cout << "assign rate = "<< rate <<" GB/s (ratio = "<< ratio <<")\n"; + if(ratio >= threshold) { + std::cout << "x"; + } + } + { // subarray assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst({0, n - 2}, {0, n - 2}) = src({2, n}, {2, n}); + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + // std::cout << "subasssign rate = "<< rate <<" GB/s (ratio = "<< ratio << ")\n"; + if(ratio >= threshold) { + std::cout << "x"; + } + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_nonuniversal_speed, T, test_types) { + std::cout << typeid(T).name() << " ******************************************\n"; + + auto const n = 8000; + + using AllocatorT = thrust::cuda::allocator; + + multi::array src({n, n}); + multi::array dst(extensions(src)); + + auto const threshold = 0.10; + + auto const size = src.num_elements() * sizeof(T) / 1e9; + + auto const dummy = std::invoke([&] __host__ { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + cudaDeviceSynchronize(); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (warmup)\n"; + return rate; + }); + + auto const memcpy_rate = std::invoke([&] __host__ { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + cudaDeviceSynchronize(); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (ratio = 1)\n"; + return rate; + }); + + { // cctor + auto tick = std::chrono::high_resolution_clock::now(); + + auto dst2 = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "cctor rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } + { // assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "assign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } + { // subarray assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst({0, n - 2}, {0, n - 2}) = src({2, n}, {2, n}); + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + std::cout << "subasssign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_universal_speed, T, test_types) { + std::cout << typeid(T).name() << " ******************************************\n"; + + auto const n = 8000; + + using AllocatorT = thrust::cuda::universal_allocator; + + multi::array src({n, n}); + multi::array dst(extensions(src)); + + auto const threshold = 0.10; + + auto const size = src.num_elements() * sizeof(T) / 1e9; + + auto const dummy = std::invoke([&] __host__ { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + cudaDeviceSynchronize(); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (warmup)\n"; + return rate; + }); + + auto const memcpy_rate = std::invoke([&] __host__ { + auto start_time = std::chrono::high_resolution_clock::now(); + cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements() * sizeof(T), cudaMemcpyDeviceToDevice); + cudaDeviceSynchronize(); + std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; + auto rate = size / time.count(); + std::cout << "memcpy rate = " << rate << " GB/s (ratio = 1)\n"; + return rate; + }); + + { // cctor + auto tick = std::chrono::high_resolution_clock::now(); + + auto dst2 = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "cctor rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_WARN(ratio >= threshold); + } + { // assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst = src; + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + + std::cout << "assign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } + { // subarray assign + auto tick = std::chrono::high_resolution_clock::now(); + + dst({0, n - 2}, {0, n - 2}) = src({2, n}, {2, n}); + + std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; + double rate = size / time.count(); + double ratio = rate / memcpy_rate; + std::cout << "subasssign rate = " << rate << " GB/s (ratio = " << ratio << ")\n"; + BOOST_TEST(ratio >= threshold); + } +} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed_algo.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed_algo.cu similarity index 91% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed_algo.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed_algo.cu index 4eb0e2d3d4..3b47f47e8a 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed_algo.cu +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/speed_algo.cu @@ -1,10 +1,14 @@ +// Copyright 2022-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + #define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust universal copy and assignment" #include -#include +#include //#include -#include +#include //#include @@ -101,3 +105,6 @@ BOOST_AUTO_TEST_CASE(thrust_universal_speed_algo) { } } +BOOST_AUTO_TEST_CASE(thrust_run) { + multi::array> A(100); +} \ No newline at end of file diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/universal.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/universal.cu similarity index 62% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/universal.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/universal.cu index 7fee49582a..ad2402752d 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/universal.cu +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/universal.cu @@ -1,9 +1,12 @@ +// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +// Copyright 2023 Alfredo A. Correa + #define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust universal" #include -#include +#include -#include +#include #include @@ -15,9 +18,8 @@ namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(thrust_universal_ptr) { - multi::array Host({1024, 1024}); - std::iota(Host.data_elements(), Host.data_elements() + Host.num_elements(), 12.); + std::iota(Host.data_elements(), Host.data_elements() + Host.num_elements(), 12.0); multi::array> Univ({1024, 1024}); @@ -26,57 +28,53 @@ BOOST_AUTO_TEST_CASE(thrust_universal_ptr) { multi::array Hos2({1024, 1024}); Hos2({0, 10}, {0, 20}) = Univ({0, 10}, {0, 20}); - BOOST_REQUIRE( Hos2[0][0] == 12. ); - + BOOST_REQUIRE( Hos2[0][0] == 12.0 ); } -template void what(T const&) = delete; - BOOST_AUTO_TEST_CASE(thrust_universal_ptr_initializer_list) { - - multi::array Host = {1., 2., 3.}; + multi::array Host = {1.0, 2.0, 3.0}; BOOST_REQUIRE( Host.size() == 3 ); { multi::array> Univ(multi::extensions_t<1>{3}); - Univ[0] = 3.; - Univ[1] = 2.; - Univ[2] = 1.; + Univ[0] = 3.0; + Univ[1] = 2.0; + Univ[2] = 1.0; Host() = Univ(); - BOOST_REQUIRE( Host[0] == 3. ); + BOOST_REQUIRE( Host[0] == 3.0 ); } { - multi::array tmp = {3., 2., 1.}; + multi::array tmp = {3.0, 2.0, 1.0,}; multi::array> Univ{multi::extensions_t<1>{3}}; Univ = tmp; Host() = Univ(); - BOOST_REQUIRE( Host[0] == 3. ); + BOOST_REQUIRE( Host[0] == 3.0 ); } { - multi::array tmp = {3., 2., 1.}; + multi::array tmp = {3.0, 2.0, 1.0,}; multi::array> Univ{tmp}; Host() = Univ(); - BOOST_REQUIRE( Host[0] == 3. ); + BOOST_REQUIRE( Host[0] == 3.0 ); } { - multi::array> Univ = {3., 2., 1.}; + multi::array> Univ = {3.0, 2.0, 1.0,}; Host() = Univ(); - BOOST_REQUIRE( Host[0] == 3. ); + BOOST_REQUIRE( Host[0] == 3.0 ); } -// what( thrust::cuda::universal_allocator{} ); -// { -// multi::array> Univ = {3., 2., 1.}; +// what( thrust::cuda::universal_allocator{} ); +// { +// multi::array> Univ = {3., 2., 1.}; -// Host() = Univ(); +// Host() = Univ(); -// BOOST_REQUIRE( Host[0] == 3. ); -// } +// BOOST_REQUIRE( Host[0] == 3. ); +// } } diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/vector.cu b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/vector.cu similarity index 81% rename from external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/vector.cu rename to external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/vector.cu index 9d3d892755..aab63e4197 100644 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/vector.cu +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/thrust/test/vector.cu @@ -1,5 +1,4 @@ #define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust" -#define BOOST_TEST_DYN_LINK #include #include @@ -29,13 +28,14 @@ BOOST_AUTO_TEST_CASE(vector){ // Copy host_vector H to device_vector D thrust::device_vector D = H; -// f(D.data()); +// f(D.data()); // elements of D can be modified D[0] = 99; D[1] = 88; - thrust::cuda::pointer p = D.data(); + // thurst::device_ptr p = D.data(); // doesn't work with CUDA 11.8 + thrust::cuda::pointer p = D.data(); // this works with thrust from CUDA 12.1 BOOST_REQUIRE( p[0] == 99 ); BOOST_TEST_REQUIRE( D[1] == 88 ); diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview.hpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview.hpp new file mode 100644 index 0000000000..307f7a2c62 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview.hpp @@ -0,0 +1,121 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ADAPTORS_TOTALVIEW_HPP +#define BOOST_MULTI_ADAPTORS_TOTALVIEW_HPP + +#include +#include // TODO(correaa) remove +#include + +#include +#include + +#include "../adaptors/../array.hpp" + +#include "../src/tv_data_display.c" // you have to find the directory with the totalview include files +#include "../src/sys/totalview/tv_data_display.h" // you have to find the directory with the totalview include files +// ^^^^^^^^^^^ this can produce problemas later with linking +// https://docs.roguewave.com/totalview/2018.1/html/index.html#page/Reference_Guide%2FCompilingAndLinkingTV_data_display.html%23ww1738654 + +template constexpr char const* pretty_name = "unknown"; + +template<> constexpr char const* pretty_name = "double"; +template<> constexpr char const* pretty_name = "float"; + +template<> constexpr char const* pretty_name> = "std::complex"; +template<> constexpr char const* pretty_name> = "std::complex"; + +template<> constexpr char const* pretty_name = "long"; +template<> constexpr char const* pretty_name = "int"; + +template +#ifdef __GCC__ +__attribute__((used)) +#endif +int +TV_ttf_display_type(boost::multi::array const* mad1P) { + if(! mad1P->is_empty()) { + std::array tname; + snprintf(tname.data(), tname.size(), "%s[%ld]", pretty_name, (long)mad1P->size()); //, (long)mad1P->stride()); + int result = TV_ttf_add_row("elements", tname.data(), mad1P->origin()); + if(result != 0) { + int res = fprintf(stderr, "TV_ttf_add_row returned error %d\n", result); + assert(res > -1); + return TV_ttf_format_failed; + } + } + return TV_ttf_format_ok_elide; +} + +template +#ifdef __GCC__ +__attribute__((used)) +#endif +int +TV_ttf_display_type(boost::multi::array const* mad2P) { + if(! mad2P->is_empty()) { + std::arra tname; + using std::get; + snprintf(tname.data(), tname.size(), "%s[%ld][%ld]", pretty_name, (long)get<0>(mad2P->sizes()), (long)get<1>(mad2P->sizes())); //, (long)mad1P->stride()); + int result = TV_ttf_add_row("elements", tname.data(), mad2P->origin()); + + if(result != 0) { + int res = fprintf(stderr, "TV_ttf_add_row returned error %d\n", result); + assert(res >= 0); + return TV_ttf_format_failed; + } + } + return TV_ttf_format_ok_elide; +} + +template +#ifdef __GCC__ +__attribute__((used)) +#endif +int +TV_ttf_display_type(boost::multi::subarray const* mad2P) { + boost::multi::array const value = *mad2P; + return TV_ttf_display_type(std::addressof(value)); +} + +template +#ifdef __GCC__ +__attribute__((used)) +#endif +int +TV_ttf_display_type(boost::multi::subarray const* mad2P) { + boost::multi::array const value = *mad2P; + return TV_ttf_display_type(std::addressof(value)); +} + +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type>(boost::multi::array, 1> const*); +template int TV_ttf_display_type>(boost::multi::array, 1> const*); +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type(boost::multi::array const*); + +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type>(boost::multi::array, 2> const*); +template int TV_ttf_display_type>(boost::multi::array, 2> const*); +template int TV_ttf_display_type(boost::multi::array const*); +template int TV_ttf_display_type(boost::multi::array const*); + +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type>(boost::multi::subarray, 1> const*); +template int TV_ttf_display_type>(boost::multi::subarray, 1> const*); +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type(boost::multi::subarray const*); + +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type>(boost::multi::subarray, 2> const*); +template int TV_ttf_display_type>(boost::multi::subarray, 2> const*); +template int TV_ttf_display_type(boost::multi::subarray const*); +template int TV_ttf_display_type(boost::multi::subarray const*); + +#endif // BOOST_MULTI_ADAPTORS_TOTALVIEW_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/CMakeLists.txt new file mode 100644 index 0000000000..1960d41fcb --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/CMakeLists.txt @@ -0,0 +1,47 @@ +# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- +#[=[Multi Test suite can be run like this: + mkdir -p build + cd build + cmake .. [-DENABLE_CUDA=1] + make -j + ctest -j --output-on-error [-T memcheck] + exit +#]=] +cmake_minimum_required(VERSION 3.11) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") + +set(CMAKE_VERBOSE_MAKEFILE ON) + +project( + boost-multi-adaptors-blis-test + VERSION 0.1 + LANGUAGES CXX +) + +find_package(Boost REQUIRED COMPONENTS unit_test_framework) + +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +enable_testing() + +include(CTest) + +include_directories(${CMAKE_BINARY_DIR}) + +include_directories(../../../../../include) + +set(TEST_SRCS matrix.cpp) + +add_executable(totalview.x totalview.cpp) + +target_compile_features(totalview.x PUBLIC cxx_std_17) + +target_compile_definitions(totalview.x PRIVATE "BOOST_PP_VARIADICS") +target_compile_definitions(totalview.x PRIVATE ${Boost_DEFINITIONS}) +target_include_directories(totalview.x SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) +target_link_libraries(totalview.x PRIVATE ${Boost_LIBRARIES}) +target_link_directories(totalview.x PRIVATE ${Boost_LIBRARY_DIRS}) + +add_test(NAME totalview.x COMMAND $) diff --git a/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/totalview.cpp b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/totalview.cpp new file mode 100644 index 0000000000..a99f7868d3 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/adaptors/totalview/test/totalview.cpp @@ -0,0 +1,37 @@ +#define BOOST_TEST_MODULE "C++ Unit Tests for Multi TotalView adaptor" +#define BOOST_TEST_DYN_LINK + +#include + +#include "multi/array.hpp" +#include "multi/utility.hpp" + +#include "../../../adaptors/totalview.hpp" + +#include // transform +#include +#include +#include // iota + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(multi_1d) { + + std::vector V = {10, 20, 30}; + + multi::array const A = {1.0, 2.0, 3.0, 4.0, 5.0}; + auto&& Apart = A({1, 3}); + + multi::array const B = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + }; + + double sum = 0.0; + for(auto i : A.extension()) { + sum += A[i]; + } + + BOOST_REQUIRE( sum == 15.0 ); + BOOST_REQUIRE( B[1][0] == 4.0 ); +} diff --git a/external_codes/boost_multi/multi/include/multi/algorithms/gemm.hpp b/external_codes/boost_multi/multi/include/boost/multi/algorithms/gemm.hpp similarity index 93% rename from external_codes/boost_multi/multi/include/multi/algorithms/gemm.hpp rename to external_codes/boost_multi/multi/include/boost/multi/algorithms/gemm.hpp index b562243963..460ca467e9 100644 --- a/external_codes/boost_multi/multi/include/multi/algorithms/gemm.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/algorithms/gemm.hpp @@ -1,11 +1,13 @@ -// Copyright 2021 Alfredo A. Correa +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt // this header contains a generic gemm algorithm (not the blas one) // it is ~3 times slower than blas::gemm but it is more generic in the type and in the operations // when compiled using -DCMAKE_CXX_FLAGS_RELEASE="-Ofast -DNDEBUG -mfpmath=sse -march=native -funroll-loops -fargument-noalias" -#ifndef MULTI_ALGORITHM_GEMM_HPP -#define MULTI_ALGORITHM_GEMM_HPP +#ifndef BOOST_MULTI_ALGORITHM_GEMM_HPP +#define BOOST_MULTI_ALGORITHM_GEMM_HPP #include // for par // needs linking to TBB library #include // for inner_product and transform_reduce @@ -86,4 +88,4 @@ auto gemm(Talpha const& alpha, MatrixA const& A, MatrixB const& B, Tbeta const& } // end namespace multi } // end namespace boost -#endif +#endif // BOOST_MULTI_ALGORITHM_GEMM_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/array.hpp b/external_codes/boost_multi/multi/include/boost/multi/array.hpp new file mode 100644 index 0000000000..626f3bc1a5 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/array.hpp @@ -0,0 +1,1461 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ARRAY_HPP_ +#define BOOST_MULTI_ARRAY_HPP_ + +#include + +#include + +#include +#include +#include + +#include // for std::allocator_traits +#include // needed by a deprecated function +#include // for std::common_reference +#include // for std::move + +#if __has_include() +# include +// Apple clang provides the header but not the compiled library prior to version 16 +# if (defined(__cpp_lib_memory_resource) && (__cpp_lib_memory_resource >= 201603)) && !(defined(__APPLE__) && defined(__clang_major__) && __clang_major__ <= 15) && (!defined(_LIBCPP_VERSION) || !(_LIBCPP_VERSION <= 160001) ) +# define BOOST_MULTI_HAS_MEMORY_RESOURCE +# endif +#endif + +// TODO(correaa) or should be (__CUDA__) or CUDA__ || HIP__ +#if defined(__NVCC__) +#define BOOST_MULTI_HD __host__ __device__ +#else +#define BOOST_MULTI_HD +#endif + +namespace boost::multi { + +namespace detail { + +template +struct array_allocator { + using allocator_type = Allocator; + array_allocator() = default; + + private: + BOOST_MULTI_NO_UNIQUE_ADDRESS allocator_type alloc_; + + using allocator_traits = typename multi::allocator_traits; + using size_type_ = typename allocator_traits::size_type; + using pointer_ = typename allocator_traits::pointer; + + protected: + constexpr auto alloc() & -> allocator_type& { return alloc_; } + constexpr auto alloc() const& -> allocator_type const& { return alloc_; } + + constexpr explicit array_allocator(allocator_type const& alloc) : alloc_{alloc} {} // NOLINT(modernize-pass-by-value) + + constexpr auto allocate(size_type_ n) -> pointer_ { + return n ? allocator_traits::allocate(alloc_, n) : pointer_{nullptr}; + } + constexpr auto allocate(size_type_ n, typename allocator_traits::const_void_pointer hint) -> pointer_ { + return n ? allocator_traits::allocate(alloc_, n, hint) : pointer_{nullptr}; + } + + constexpr auto uninitialized_fill_n(pointer_ first, size_type_ count, typename allocator_traits::value_type const& value) { + return adl_alloc_uninitialized_fill_n(alloc_, first, count, value); + } + template + auto uninitialized_copy_n(It first, size_type count, pointer_ d_first) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v::element_type> && ! multi::force_element_trivial_default_construction::element_type> ) { + adl_alloc_uninitialized_default_construct_n(alloc_, d_first, count); + } + return adl_copy_n ( first, count, d_first); + #else + return adl_alloc_uninitialized_copy_n(alloc_, first, count, d_first); + #endif + } + + template + auto uninitialized_copy_n(EP&& ep, It first, size_type count, pointer_ d_first) { + // #if defined(__clang__) && defined(__CUDACC__) + // if constexpr(! std::is_trivially_default_constructible_v::element_type> && ! multi::force_element_trivial_default_construction::element_type> ) { + // adl_alloc_uninitialized_default_construct_n(alloc_, d_first, count); + // } + // return adl_copy_n ( first, count, d_first); + // #else + return adl_uninitialized_copy_n(std::forward(ep), first, count, d_first); + // return adl_alloc_uninitialized_copy_n(std::forward(ep), alloc_, first, count, d_first); + // #endif + } + + template + auto destroy_n(It first, size_type n) { return adl_alloc_destroy_n(this->alloc(), first, n); } + + public: + constexpr auto get_allocator() const -> allocator_type { return alloc_; } +}; +} // end namespace detail + +template> // DummyAlloc mechanism allows using the convention array>, is an_allocator supports void template argument +struct static_array // NOLINT(fuchsia-multiple-inheritance) : multiple inheritance used for composition +: protected detail::array_allocator< + // Alloc + typename allocator_traits::template rebind_alloc> +, public array_ref::template rebind_alloc>::pointer> +, boost::multi::random_iterable::template rebind_alloc>> { + static_assert( + std::is_same_v< + std::remove_const_t::value_type>, + typename static_array::element + > + || + std::is_same_v< + std::remove_const_t::value_type>, + void + >, // allocator template can be redundant or void (which can be a default for the allocator) + "allocator value type must match array value type" + ); + + private: + // using Alloc = typename allocator_traits::template rebind_alloc; + + protected: + using array_alloc = detail::array_allocator::template rebind_alloc >; + + public: + // constexpr auto get_allocator() const -> allocator_type { return alloc_; } + using detail::array_allocator::template rebind_alloc >::get_allocator; + + using allocator_type = typename detail::array_allocator::template rebind_alloc>::allocator_type; + using decay_type = array; + using layout_type = typename array_ref::pointer>::layout_type; + + using ref = array_ref< + T, D, + typename multi::allocator_traits::template rebind_alloc>::pointer + >; + + auto operator new(std::size_t count) -> void* { return ::operator new(count); } + auto operator new(std::size_t count, void* ptr) -> void* { return ::operator new(count, ptr); } + void operator delete(void* ptr) noexcept { ::operator delete(ptr); } // this overrides the deleted delete operator in reference (base) class subarray + + protected: + using alloc_traits = typename multi::allocator_traits; + + auto uninitialized_value_construct() { + return adl_alloc_uninitialized_value_construct_n(static_array::alloc(), this->base_, this->num_elements()); + } + + auto uninitialized_default_construct() { + if constexpr(!std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction) { + return adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->base_, this->num_elements()); + } + } + + template auto uninitialized_copy_elements(It first) { + return array_alloc::uninitialized_copy_n(first, this->num_elements(), this->data_elements()); + } + + template auto uninitialized_copy_elements(EP&& ep, It first) { + return array_alloc::uninitialized_copy_n(std::forward(ep), first, this->num_elements(), this->data_elements()); + } + + constexpr void destroy() { + if constexpr(!(std::is_trivially_destructible_v || multi::force_element_trivial_destruction)) { + array_alloc::destroy_n(this->data_elements(), this->num_elements()); + } + } + + void allocate() { + this->base_ = array_alloc::allocate(static_cast::size_type>(this->static_array::num_elements())); + } + + public: + using value_type = typename std::conditional_t< + (D > 1), // this parenthesis is needed + array, + typename static_array::element>; + + using typename ref::difference_type; + using typename ref::size_type; + explicit static_array(allocator_type const& alloc) : array_alloc{alloc}, ref(nullptr, {}) {} + + using ref:: operator(); + BOOST_MULTI_HD constexpr auto operator()() && -> decltype(auto) { return ref::element_moved(); } + + using ref::taked; + constexpr auto taked(difference_type n) && -> decltype(auto) { return ref::taked(n).element_moved(); } + + using ref::dropped; + constexpr auto dropped(difference_type n) && -> decltype(auto) { return ref::dropped(n).element_moved(); } + + static_array(static_array&& other) noexcept : static_array{other.element_moved()} {} + + constexpr static_array(decay_type&& other, allocator_type const& alloc) noexcept + : array_alloc{alloc}, ref{std::exchange(other.base_, nullptr), other.extensions()} { + std::move(other).layout_mutable() = {}; + } + + constexpr explicit static_array(decay_type&& other) noexcept + : static_array(std::move(other), allocator_type{}) {} // 6b + + template>::difference_type> + constexpr explicit static_array(It first, It last, allocator_type const& alloc) + : array_alloc{alloc}, ref{ + array_alloc::allocate(static_cast::size_type>(layout_type{index_extension{adl_distance(first, last)} * multi::extensions(*first)}.num_elements())), + index_extension{adl_distance(first, last)} * multi::extensions(*first)} { + if(adl_distance(first, last) == 0) {return;} + #if defined(__clang__) && defined(__CUDACC__) + // TODO(correaa) add workaround for non-default constructible type and use adl_alloc_uninitialized_default_construct_n + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), ref::data_elements(), ref::num_elements()); + } + adl_copy_n(first, last - first, ref::begin()); + #else + adl_alloc_uninitialized_copy(static_array::alloc(), first, last, ref::begin()); + #endif + } + + template>::difference_type> + constexpr explicit static_array(It first, It last) : static_array(first, last, allocator_type{}) {} + + template< + class Range, class = std::enable_if_t>{}>, + class = decltype(/*static_array*/ (std::declval().begin() - std::declval().end())), // instantiation of static_array here gives a compiler error in 11.0, partially defined type? + class = std::enable_if_t::value> + > + // cppcheck-suppress noExplicitConstructor ; because I want to use equal for lazy assigments form range-expressions // NOLINTNEXTLINE(runtime/explicit) + static_array(Range const& rng) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax + : static_array{std::begin(rng), std::end(rng)} {} // Sonar: Prefer free functions over member functions when handling objects of generic type "Range". + // : static_array{rng.begin(), rng.end()} {} // Sonar: Prefer free functions over member functions when handling objects of generic type "Range". + + template + auto uninitialized_fill_elements(TT const& value) { + return array_alloc::uninitialized_fill_n(this->data_elements(), this->num_elements(), value); + } + + template + static_array(array_ref const& other, allocator_type const& alloc) + : array_alloc{alloc}, ref{ + array_alloc::allocate(static_cast::size_type>(other.num_elements())), + other.extensions()} { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->data_elements(), this->num_elements()); + } + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + #else + adl_alloc_uninitialized_copy_n(static_array::alloc(), other.data_elements(), other.num_elements(), this->data_elements()); + #endif + } + + static_array(typename static_array::extensions_type extensions, typename static_array::element const& elem, allocator_type const& alloc) // 2 + : array_alloc{alloc}, ref{array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), nullptr), extensions} { + array_alloc::uninitialized_fill_n(this->data_elements(), static_cast::size_type>(this->num_elements()), elem); + } + + // template + // explicit static_array(std::tuple extensions, Ts&&... args) // this is important to pass arguments to boost::interprocess::construct + // : static_array{ + // std::apply([](auto... exts) {return typename static_array::extensions_type{exts...};}, extensions), + // std::forward(args)... + // } {} + + template + explicit static_array( + Element const& elem, allocator_type const& alloc, + std::enable_if_t && (D == 0), int> /*dummy*/ = 0 // NOLINT(fuchsia-default-arguments-declarations) for classic sfinae, needed by MSVC? + ) + : static_array(typename static_array::extensions_type{}, elem, alloc) {} + + constexpr static_array(typename static_array::extensions_type exts, typename static_array::element const& elem) + : + array_alloc{}, + array_ref::template rebind_alloc>::pointer>( + exts, + array_alloc::allocate( + static_cast::size_type>(typename static_array::layout_t(exts).num_elements()) , + nullptr + ) + ) + { + array_alloc::uninitialized_fill_n(this->base(), static_cast::size_type>(this->num_elements()), elem); + } + + template().extensions()), std::enable_if_t, int> =0> + explicit static_array(typename static_array::index_extension const& extension, ValueType const& value, allocator_type const& alloc) // fill constructor + : array_alloc{alloc}, ref(array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t(extension*value.extensions()).num_elements())), extension*value.extensions()) + { + static_assert(std::is_trivially_default_constructible_v || multi::force_element_trivial_default_construction ); // TODO(correaa) not implemented for non-trivial types, + adl_fill_n(this->begin(), this->size(), value); // TODO(correaa) implement via .elements()? substitute with uninitialized version of fill, uninitialized_fill_n? + } + + template().extensions()), std::enable_if_t, int> =0> + explicit static_array(typename static_array::index_extension const& extension, ValueType const& value) // fill constructor + : static_array(extension, value, allocator_type{}) {} + + explicit static_array(typename static_array::extensions_type extensions, allocator_type const& alloc) + : array_alloc{alloc}, ref(array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements())), extensions) { + uninitialized_default_construct(); + } + + explicit static_array(typename static_array::extensions_type extensions) + : static_array(extensions, allocator_type{}) {} + + template::element>{}>, + class = decltype(adl_copy(std::declval const&>().begin(), std::declval const&>().end(), std::declval()))> + constexpr static_array(multi::subarray const& other, allocator_type const& alloc) + : array_alloc{alloc}, ref(array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{other.extensions()}.num_elements())), other.extensions()) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->data_elements(), this->num_elements()); + } + adl_copy (other.begin(), other.end(), this->begin()); // TODO(correaa) implement via .elements() + #else + adl_uninitialized_copy(/*static_array::alloc()*/ other.begin(), other.end(), this->begin()); // TODO(correaa) implement via .elements() + #endif + } + + template const&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval const&>().begin(), std::declval const&>().end(), std::declval()))> + // cppcheck-suppress noExplicitConstructor // NOLINTNEXTLINE(runtime/explicit) + constexpr /*mplct*/ static_array(multi::subarray const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : static_array(other, allocator_type{}) {} + + template const&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval const&>().begin(), std::declval const&>().end(), std::declval()))> + explicit static_array(multi::subarray const& other) + : static_array(other, allocator_type{}) {} + + template&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval&>().begin(), std::declval const&>().end(), std::declval()))> + // cppcheck-suppress noExplicitConstructor ; // NOLINTNEXTLINE(runtime/explicit) + /*mplct*/ static_array(multi::subarray& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : static_array(other, allocator_type{}) {} + + template&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval&>().begin(), std::declval const&>().end(), std::declval()))> + explicit static_array(multi::subarray& other) + : static_array(other, allocator_type{}) {} + + template&&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval&&>().begin(), std::declval const&>().end(), std::declval()))> + // cppcheck-suppress noExplicitConstructor ; // NOLINTNEXTLINE(runtime/explicit) + /*mplct*/ static_array(multi::subarray&& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : static_array(std::move(other), allocator_type{}) {} + + template&&>().base()), T>, int> = 0, + class = decltype(adl_copy(std::declval&&>().begin(), std::declval const&>().end(), std::declval()))> + explicit static_array(multi::subarray&& other) + : static_array(std::move(other), allocator_type{}) {} + + template&>().base()), T>, int> = 0> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax + /*mplct*/ static_array(array_ref& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : array_alloc{}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements())), other.extensions()} { + static_array::uninitialized_copy_elements(other.data_elements()); + } + + template&>().base()), T>, int> = 0> + explicit static_array(array_ref& other) // NOLINT(fuchsia-default-arguments-declarations) + : array_alloc{}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements())), other.extensions()} { + static_array::uninitialized_copy_elements(other.data_elements()); + } + + template&&>().base()), T>, int> = 0> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax + /*mplct*/ static_array(array_ref&& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : array_alloc{}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements())), other.extensions()} { + static_array::uninitialized_copy_elements(std::move(other).data_elements()); + } + + template&&>().base()), T>, int> = 0> + explicit static_array(array_ref&& other) // NOLINT(fuchsia-default-arguments-declarations) + : array_alloc{}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements())), other.extensions()} { + static_array::uninitialized_copy_elements(std::move(other).data_elements()); + } + + template const&>().base()), T>, int> = 0> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax + /*mplct*/ static_array(array_ref const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : array_alloc{}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements())), other.extensions()} { + static_array::uninitialized_copy_elements(other.data_elements()); + } + + template const&>().base()), T>, int> = 0> + explicit static_array(array_ref const& other) // NOLINT(fuchsia-default-arguments-declarations) + : + array_alloc{}, + ref{ + array_alloc::allocate(static_cast::size_type>(other.num_elements())), + other.extensions() + } { + static_array::uninitialized_copy_elements(std::move(other).data_elements()); + } + + static_array(static_array const& other) // 5b + : + array_alloc{ + multi::allocator_traits::select_on_container_copy_construction(other.alloc()) + }, + ref{ + array_alloc::allocate( + static_cast::size_type>(other.num_elements()) //, + // other.data_elements() + ), + other.extensions() + } + { + uninitialized_copy_elements(other.data_elements()); + } + + template, int> =0> + static_array(ExecutionPolicy&& policy, static_array const& other) + : array_alloc{multi::allocator_traits::select_on_container_copy_construction(other.alloc())}, ref{array_alloc::allocate(static_cast::size_type>(other.num_elements()), other.data_elements()), extensions(other)} { + uninitialized_copy_elements(std::forward(policy), other.data_elements()); + } + + // cppcheck-suppress noExplicitConstructor ; to allow assignment-like construction of nested arrays + static_array(std::initializer_list::value_type> values) + : static_array{array(values.begin(), values.end())} {} // construct all with default constructor and copy to special memory at the end + + static_array( + std::initializer_list::value_type> values, + allocator_type const& alloc + ) + : static_array{static_array(values.begin(), values.end()), alloc} {} + + template + constexpr explicit static_array(TT (&array)[N]) // @SuppressWarnings(cpp:S5945) NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backward compatibility // NOSONAR + : static_array(std::begin(array), std::end(array)) {} + + constexpr auto begin() const& -> typename static_array::const_iterator { return ref::begin(); } + constexpr auto end() const& -> typename static_array::const_iterator { return ref::end(); } + + constexpr auto begin() && -> typename static_array::iterator { return ref::begin(); } + constexpr auto end() && -> typename static_array::iterator { return ref::end(); } + + constexpr auto begin() & -> typename static_array::iterator { return ref::begin(); } + constexpr auto end() & -> typename static_array::iterator { return ref::end(); } + + using ref::operator[]; + + BOOST_MULTI_HD constexpr auto operator[](index idx) const& -> typename static_array::const_reference { return ref::operator[](idx); } + BOOST_MULTI_HD constexpr auto operator[](index idx) && -> decltype(auto) { + if constexpr(D == 1) { + return std::move(ref::operator[](idx)); + } else { + return ref::operator[](idx).moved(); + } // NOLINT(readability/braces) + } + BOOST_MULTI_HD constexpr auto operator[](index idx) & -> typename static_array::reference { return ref::operator[](idx); } + + constexpr auto max_size() const noexcept { return static_cast(multi::allocator_traits::max_size(this->alloc())); } // TODO(correaa) divide by nelements in under-dimensions? + + protected: + constexpr void deallocate() { + if(this->num_elements()) { + multi::allocator_traits::deallocate(this->alloc(), this->base_, static_cast::size_type>(this->num_elements())); + } + } + void clear() noexcept { + this->destroy(); + deallocate(); + this->layout_mutable() = {}; + } + template + constexpr auto reindex(Indices... idxs) & -> static_array& { + static_array::layout_t::reindex(idxs...); + return *this; + } + template + constexpr auto reindex(Indices... idxs) && -> static_array&& { + reindex(idxs...); + return std::move(*this); + } + + public: + constexpr static_array() noexcept // decay_type&& other, allocator_type const& alloc) noexcept + : array_alloc{}, ref{nullptr, {}} { // other.extensions()} { + // std::move(other).layout_mutable() = {}; + } + + // static_array() = default; +#if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + constexpr +#endif + ~static_array() /*noexcept*/ { + destroy(); + deallocate(); + } + + using element_const_ptr = typename std::pointer_traits::template rebind; + using element_move_ptr = multi::move_ptr; + + using reference = std::conditional_t< + (D > 1), + subarray, + std::conditional_t< + D == 1, + typename std::iterator_traits::reference, + void>>; + using const_reference = std::conditional_t< + (D > 1), + subarray, // TODO(correaa) should be const_reference, but doesn't work witn rangev3? + std::conditional_t< + D == 1, + decltype(*std::declval()), + void>>; + + using iterator = multi::array_iterator; + using const_iterator = multi::array_iterator; + + friend auto get_allocator(static_array const& self) -> allocator_type { return self.get_allocator(); } + + BOOST_MULTI_HD constexpr auto data_elements() const& -> element_const_ptr { return this->base_; } + BOOST_MULTI_HD constexpr auto data_elements() & -> typename static_array::element_ptr { return this->base_; } + BOOST_MULTI_HD constexpr auto data_elements() && -> typename static_array::element_move_ptr { return std::make_move_iterator(this->base_); } + + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(static_array const& self) { return self.data_elements(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(static_array& self) { return self.data_elements(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(static_array&& self) { return std::move(self).data_elements(); } + + constexpr auto base() & -> typename static_array::element_ptr { return ref::base(); } + constexpr auto base() const& -> typename static_array::element_const_ptr { return typename static_array::element_const_ptr{ref::base()}; } + + BOOST_MULTI_FRIEND_CONSTEXPR auto base(static_array& self) -> typename static_array::element_ptr { return self.base(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto base(static_array const& self) -> typename static_array::element_const_ptr { return self.base(); } + + constexpr auto origin() & -> typename static_array::element_ptr { return ref::origin(); } + constexpr auto origin() const& -> typename static_array::element_const_ptr { return ref::origin(); } + + BOOST_MULTI_FRIEND_CONSTEXPR auto origin(static_array& self) -> typename static_array::element_ptr { return self.origin(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto origin(static_array const& self) -> typename static_array::element_const_ptr { return self.origin(); } + + // private: + // constexpr auto rotated_aux() const { + // typename static_array::layout_t new_layout = this->layout(); + // new_layout.rotate(); + // return subarray{new_layout, this->base_}; + // } + + // constexpr auto rotated() const& {return std::move(*this).rotated_aux();} + // constexpr auto rotated() & {return std::move(*this).rotated_aux();} + // constexpr auto rotated() && {return std::move(*this).rotated_aux();} + + // friend constexpr auto rotated(static_array& self) -> decltype(auto) {return self.rotated();} + // friend constexpr auto rotated(static_array const& self) -> decltype(auto) {return self.rotated();} + + // constexpr auto unrotated() const& -> subarray const { + // typename static_array::layout_t new_layout = this->layout(); + // new_layout.unrotate(); + // return subarray{new_layout, this->base_}; + // } + // constexpr auto unrotated() & { + // typename static_array::layout_t new_layout = this->layout(); + // new_layout.unrotate(); + // return subarray{new_layout, this->base_}; + // } + + // friend constexpr auto unrotated(static_array & self) -> decltype(auto) {return self.unrotated();} + // friend constexpr auto unrotated(static_array const& self) -> decltype(auto) {return self.unrotated();} + + template + auto operator=(multi::subarray const& other) -> static_array& { + ref::operator=(other); // TODO(correaa) : protect for self assigment + return *this; + } + auto operator=(static_array const& other) & -> static_array& { + if(std::addressof(other) == this) { + return *this; + } // cert-oop54-cpp + assert(other.extensions() == this->extensions()); + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + return *this; + } + constexpr auto operator=(static_array&& other) noexcept -> static_array& { // lints (cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + assert(extensions(other) == static_array::extensions()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert + adl_move(other.data_elements(), other.data_elements() + other.num_elements(), this->data_elements()); // there is no std::move_n algorithm + return *this; + } + template + auto operator=(static_array const& other) & -> static_array& { + assert(extensions(other) == static_array::extensions()); + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + return *this; + } + + // constexpr explicit operator subarray()& { + // return this->template static_array_cast(*this); + // } + + template + void serialize(Archive& arxiv, unsigned int const version) { + ref::serialize(arxiv, version); + } + + private: + void swap_(static_array& other) noexcept { operator()().swap(other()); } + + public: + friend void swap(static_array& lhs, static_array& rhs) noexcept { + lhs.swap_(rhs); + } +}; + +template +struct static_array // NOLINT(fuchsia-multiple-inheritance) : design +: protected detail::array_allocator +, public array_ref::allocator_type>::pointer> { + static_assert(std::is_same_v::value_type, typename static_array::element>, + "allocator value type must match array value type"); + + private: + using array_alloc = detail::array_allocator; + + public: + // NOLINTNEXTLINE(runtime/operator) + constexpr auto operator&() && -> static_array* = delete; // NOSONAR(cpp:S877) NOLINT(google-runtime-operator) : delete to avoid taking address of temporary + // NOLINTNEXTLINE(runtime/operator) + constexpr auto operator&() & -> static_array* { return this; } // NOSONAR(cpp:S877) NOLINT(google-runtime-operator) : override from base + // NOLINTNEXTLINE(runtime/operator) + constexpr auto operator&() const& -> static_array const* { return this; } // NOSONAR(cpp:S877) NOLINT(google-runtime-operator) : override from base + + using array_alloc::get_allocator; + using allocator_type = typename static_array::allocator_type; + using decay_type = array; + + template + void assign(Ptr data) & { + if(data) { + assert(this->num_elements() == 1); + adl_copy_n(data, this->num_elements(), this->base()); + } + } + + template && !std::is_same_v, int> = 0, + class = decltype(adl_copy_n(&std::declval(), 1, typename static_array::element_ptr{}))> + auto operator=(Singleton const& single) -> static_array& { + assign(&single); + return *this; + } + + protected: + using alloc_traits = typename multi::allocator_traits; + using ref = array_ref::template rebind_alloc>::pointer>; + + auto uninitialized_value_construct() { + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction) { + return adl_alloc_uninitialized_value_construct_n(static_array::alloc(), this->base_, this->num_elements()); + } + } + + template auto uninitialized_copy(It first) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(this->alloc(), this->data_elements(), this->num_elements()); + } + return adl_copy ( first, this->num_elements(), this->data_elements()); + #else + return adl_alloc_uninitialized_copy_n(this->alloc(), first, this->num_elements(), this->data_elements()); + #endif + } + template + auto uninitialized_move(It first) { + return adl_alloc_uninitialized_move_n(this->alloc(), first, this->num_elements(), this->data_elements()); + } + + constexpr void destroy() { + if constexpr(!(std::is_trivially_destructible_v || multi::force_element_trivial_destruction)) { + array_alloc::destroy_n(this->data_elements(), this->num_elements()); + } + } + // auto destroy() { + // return adl_alloc_destroy_n(this->alloc(), this->data_elements(), this->num_elements()); + // // array_alloc::destroy_n(this->data_elements(), this->num_elements()); + // } + + public: + using typename ref::difference_type; + using typename ref::size_type; + using typename ref::value_type; + constexpr explicit static_array(allocator_type const& alloc) : array_alloc{alloc} {} + + constexpr static_array(decay_type&& other, allocator_type const& alloc) // 6b + : array_alloc{alloc}, ref{other.base_, other.extensions()} { + std::move(other).ref::layout_t::operator=({}); + } + + using ref::operator==; + using ref::operator!=; + + static_array(typename static_array::extensions_type extensions, typename static_array::element const& elem, allocator_type const& alloc) // 2 + : array_alloc{alloc}, ref(static_array::allocate(typename static_array::layout_t{extensions}.num_elements()), extensions) { + uninitialized_fill(elem); + } + + static_array(typename static_array::element_type const& elem, allocator_type const& alloc) + : static_array(typename static_array::extensions_type{}, elem, alloc) {} + + template + explicit static_array(multi::subarray const& other, allocator_type const& alloc) + : array_alloc{alloc}, ref(static_array::allocate(other.num_elements()), extensions(other)) { + assert(other.num_elements() <= 1); + if(other.num_elements()) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->data_elements(), this->num_elements()); + } + adl_copy ( other.base(), other.base() + other.num_elements(), this->base()); + #else + adl_alloc_uninitialized_copy(static_array::alloc(), other.base(), other.base() + other.num_elements(), this->base()); + #endif + } + } + + template + explicit static_array(multi::static_array const& other, allocator_type const& alloc) // TODO(correaa) : call other constructor (above) + : array_alloc{alloc}, ref(static_array::allocate(other.num_elements()), extensions(other)) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->data_elements(), this->num_elements()); + } + adl_copy_n ( other.data_elements(), other.num_elements(), this->data_elements()); + #else + adl_alloc_uninitialized_copy_n(static_array::alloc(), other.data_elements(), other.num_elements(), this->data_elements()); + #endif + } + + template + explicit static_array(multi::static_array const& other) + : static_array(other, allocator_type{}) {} + + auto uninitialized_fill(typename static_array::element const& elem) { + array_alloc::uninitialized_fill_n( + this->base_, + static_cast::size_type>(this->num_elements()), + elem + ); + } + + template + auto operator=(multi::subarray const& other) -> static_array& { + adl_copy_n(other.base(), 1, this->base()); + return *this; + } + + static_array( + typename static_array::extensions_type const& extensions, + typename static_array::element const& elem + ) // 2 + : array_alloc{}, ref(static_array::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), nullptr), extensions) { + uninitialized_fill(elem); + } + + static_array() : static_array(multi::iextensions<0>{}) {} + + explicit static_array(typename static_array::element const& elem) + : static_array(multi::iextensions<0>{}, elem) {} + + template && !std::is_same_v, int> = 0, + class = decltype(adl_copy_n(&std::declval(), 1, typename static_array::element_ptr{}))> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax // NOLINTNEXTLINE(runtime/explicit) + static_array(Singleton const& single) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : ref(static_array::allocate(1), typename static_array::extensions_type{}) { + #if defined(__clang__) && defined(__CUDACC__) + if constexpr(! std::is_trivially_default_constructible_v && ! multi::force_element_trivial_default_construction ) { + adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->data_elements(), this->num_elements()); + } + adl_copy_n ( &single, 1, this->data_elements()); + #else + adl_alloc_uninitialized_copy_n(static_array::alloc(), &single, 1, this->data_elements()); + #endif + } + + template{}>> + explicit static_array(typename static_array::index_extension const& extension, ValueType const& value, allocator_type const& alloc) // 3 + : static_array(extension * extensions(value), alloc) { + using std::fill; + fill(this->begin(), this->end(), value); + } + template{}>> + explicit static_array(typename static_array::index_extension const& extension, ValueType const& value) // 3 // TODO(correaa) : call other constructor (above) + : static_array(extension * extensions(value)) { + using std::fill; + fill(this->begin(), this->end(), value); + } + + explicit static_array(typename static_array::extensions_type const& extensions, allocator_type const& alloc) // 3 + : array_alloc{alloc}, ref{static_array::allocate(typename static_array::layout_t{extensions}.num_elements()), extensions} { + uninitialized_value_construct(); + } + explicit static_array(typename static_array::extensions_type const& extensions) // 3 + : static_array(extensions, allocator_type{}) {} + + static_array(static_array const& other, allocator_type const& alloc) // 5b + : array_alloc{alloc}, ref{static_array::allocate(other.num_elements()), extensions(other)} { + uninitialized_copy_(other.data_elements()); + } + + static_array(static_array const& other) // 5b + : array_alloc{other.get_allocator()}, ref{static_array::allocate(other.num_elements(), other.data_elements()), {}} { + uninitialized_copy(other.data_elements()); + } + + static_array(static_array&& other) noexcept // it is private because it is a valid operation for derived classes //5b + : array_alloc{other.get_allocator()}, ref{static_array::allocate(static_cast::size_type>(other.num_elements()), other.data_elements()), other.extensions()} { + uninitialized_move(std::move(other).data_elements()); + } + // template static auto distance(It a, It b) {using std::distance; return distance(a, b);} + + protected: + void deallocate() { // TODO(correaa) : move this to detail::array_allocator + if(this->num_elements()) { + multi::allocator_traits::deallocate(this->alloc(), this->base_, static_cast::size_type>(this->num_elements())); + } + } + void clear() noexcept { + this->destroy(); + deallocate(); + layout_t<0>::operator=({}); + } + + public: + ~static_array() noexcept { + this->destroy(); + deallocate(); + } + using element_const_ptr = typename std::pointer_traits::template rebind; + + BOOST_MULTI_FRIEND_CONSTEXPR auto get_allocator(static_array const& self) -> allocator_type { return self.get_allocator(); } + + constexpr auto base() & -> typename static_array::element_ptr { return ref::base(); } + constexpr auto base() const& -> typename static_array::element_const_ptr { return ref::base(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto base(static_array& self) -> typename static_array::element_ptr { return self.base(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto base(static_array const& self) -> typename static_array::element_const_ptr { return self.base(); } + + constexpr auto origin() & -> typename static_array::element_ptr { return ref::origin(); } + constexpr auto origin() const& -> typename static_array::element_const_ptr { return ref::origin(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto origin(static_array& self) -> typename static_array::element_ptr { return self.origin(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto origin(static_array const& self) -> typename static_array::element_const_ptr { return self.origin(); } + + constexpr operator typename std::iterator_traits::reference() const& { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + return *(this->base_); + } + constexpr operator std::add_rvalue_reference_t::reference>() && { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + return std::move(*(this->base_)); + } + constexpr operator typename std::iterator_traits::reference() & { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + return *(this->base_); + } + + constexpr explicit operator typename static_array::element_type() const { + return *(this->base_); + } + + constexpr auto rotated() const& { + typename static_array::layout_t new_layout = *this; + new_layout.rotate(); + return subarray{new_layout, this->base_}; + } + + constexpr auto rotated() & { + typename static_array::layout_t new_layout = *this; + new_layout.rotate(); + return subarray{new_layout, this->base_}; + } + + constexpr auto rotated() && { + typename static_array::layout_t new_layout = *this; + new_layout.rotate(); + return subarray{new_layout, this->base_}; + } + + friend constexpr auto rotated(static_array& self) -> decltype(auto) { return self.rotated(); } + friend constexpr auto rotated(static_array const& self) -> decltype(auto) { return self.rotated(); } + + private: + constexpr auto unrotated_aux_() { + typename static_array::layout_t new_layout = *this; + new_layout.unrotate(); + return subarray{new_layout, this->base_}; + } + + public: + constexpr auto unrotated() & { return unrotated_aux_(); } + constexpr auto unrotated() const& { return unrotated_aux_().as_const(); } + + friend constexpr auto unrotated(static_array& self) -> decltype(auto) { return self.unrotated(); } + friend constexpr auto unrotated(static_array const& self) -> decltype(auto) { return self.unrotated(); } + + // TODO(correaa) find a symbolic way to express rotations, A << 1, A >> 1, A <>o; ~A; !A; ++A; A++; --A; A--; -A; +A; e<>e; e>>A; < decltype(auto) {return rotated(d);} + // constexpr auto operator>>(dimensionality_type d) -> decltype(auto) {return unrotated(d);} + // constexpr auto operator<<(dimensionality_type d) const -> decltype(auto) {return rotated(d);} + // constexpr auto operator>>(dimensionality_type d) const -> decltype(auto) {return unrotated(d);} + + constexpr auto operator=(static_array const& other) -> static_array& { + assert(extensions(other) == static_array::extensions()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert + if(this == &other) { + return *this; + } // lints (cert-oop54-cpp) : handle self-assignment properly + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + return *this; + } + + private: + constexpr auto equal_extensions_if_(std::true_type /*true */, static_array const& other) { return this->extensions() == extensions(other); } + constexpr auto equal_extensions_if_(std::false_type /*false*/, static_array const& /*other*/) { return true; } + + public: + constexpr auto operator=(static_array&& other) noexcept -> static_array& { + assert(equal_extensions_if_(std::integral_constant{}, other)); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert + adl_move(other.data_elements(), other.data_elements() + other.num_elements(), this->data_elements()); // there is no std::move_n algorithm + return *this; + } + + template{}>> + auto operator=(static_array const& other) & -> static_array& { + assert(extensions(other) == static_array::extensions()); + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + return *this; + } + + constexpr explicit operator subarray() & { + return this->template static_array_cast(); + // return static_array_cast(*this); + } + + template + void serialize(Archive& arxiv, unsigned int const version) { + ref::serialize(arxiv, version); + } +}; + +template +struct array : static_array { + // using static_ = static_array; + using static_array::static_array; + + + using static_array::operator=; + + #if !defined(__NVCOMPILER) || (__NVCOMPILER_MAJOR__ > 22 || (__NVCOMPILER_MAJOR__ == 22 && __NVCOMPILER_MINOR__ > 5)) // bug in nvcc 22.5: error: "operator=" has already been declared in the current scope + template + auto operator=(multi::array const& other) & -> array& { + if(other.base()) { + adl_copy_n(other.base(), other.num_elements(), this->base()); + } + return *this; + } + + template + auto operator=(multi::array const& other) && -> array&& { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) should assigment return auto& ? + if(other.base()) { + adl_copy_n(other.base(), other.num_elements(), this->base()); + } + return std::move(*this); + } + #endif + + template>{}, int> /*dummy*/ = 0> + auto operator=(Other const& other) -> array& { + this->assign(&other); + return *this; + } // NOLINT(google-runtime-operator) allow assigment from other ranges + + auto reextent(typename array::extensions_type const& /*empty_extensions*/) -> array& { + return *this; + } + + // NOLINTNEXTLINE(runtime/operator) + constexpr auto operator&() && -> array* = delete; // NOLINT(google-runtime-operator) //NOSONAR delete operator&& defined in base class to avoid taking address of temporary + // auto operator&() & -> array *{return this;} + // auto operator&() const& -> array const*{return this;} +}; + +template +struct array : static_array { + using static_ = static_array; + static_assert( + std::is_same_v< + typename multi::allocator_traits::value_type, std::remove_const_t + // typename array::alloc_traits::value_type, std::remove_const_t + > + || + std::is_same_v< + typename multi::allocator_traits::value_type, void + // typename array::alloc_traits::value_type, void + >, + "only exact type of array element or void (default?) is allowed as allocator value type" + ); + + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() && -> array* = delete; // NOLINT(google-runtime-operator) //NOSONAR delete operator&& defined in base class to avoid taking address of temporary + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() & -> array* { return this; } // NOLINT(google-runtime-operator) //NOSONAR delete operator&& defined in base class to avoid taking address of temporary + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() const& -> array const* { return this; } // NOLINT(google-runtime-operator) //NOSONAR delete operator&& defined in base class to avoid taking address of temporary + + friend auto sizes(array const& self) -> typename array::sizes_type { return self.sizes(); } + + template> + void serialize(Archive& arxiv, unsigned int const version) { + auto extensions_ = this->extensions(); + arxiv& ArTraits::make_nvp("extensions", extensions_); // don't try `using ArTraits::make_nvp`, make_nvp is a static member + if(this->extensions() != extensions_) { + clear(); + this->reextent(extensions_); + } + static_::serialize(arxiv, version); + } + + // vvv workaround for MSVC 14.3 and ranges, TODO(correaa) good solution would be to inherit from const_subarray + BOOST_MULTI_HD operator subarray const&() const { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + return reinterpret_cast const&>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + + // move this to static_array + template< + class Range, + std::enable_if_t>::value, int> =0, + class = decltype(Range{std::declval(), std::declval()}) + > + constexpr explicit operator Range() const { + // vvv Range{...} needed by Windows GCC? + return Range{this->begin(), this->end()}; // NOLINT(fuchsia-default-arguments-calls) e.g. std::vector(it, it, alloc = {}) + } + + // move this to static_array + template, int> = 0> + constexpr explicit operator TTN const&() const& { return this->template to_carray_(); } + template, int> = 0> + constexpr explicit operator TTN&() && { return this->template to_carray_(); } + template, int> = 0> + constexpr explicit operator TTN&() & { return this->template to_carray_(); } + + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) false positive in clang-tidy 17 ? + using static_array::static_array; // MSVC wants fullname here? // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) passing c-arrays to base + using typename static_array::value_type; // MSVC wants fullname here? + +#ifdef _MSC_VER + array(typename array::extensions_type exts, typename array::allocator_type const& alloc) + : static_array(exts, alloc) {} + array(typename array::extensions_type exts) + : static_array(exts) {} +#endif + + // cppcheck-suppress noExplicitConstructor ; to allow assignment-like construction of nested arrays + constexpr array(std::initializer_list::value_type> ilv) + : static_{array(ilv.begin(), ilv.end())} {} + + template::value_type, OtherT> && !std::is_convertible_v::value_type> && (D == 1)>> + constexpr explicit array(std::initializer_list ilv) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) inherit explicitness of conversion from the elements + : static_{array(ilv.begin(), ilv.end()).element_transformed([](auto const& elem) noexcept { return static_cast(elem); })} {} // TODO(correaa) investigate why noexcept is necessary + + array() = default; + array(array const&) = default; + + auto reshape(typename array::extensions_type extensions) & -> array& { + typename array::layout_t const new_layout{extensions}; // TODO(correaa) implement move-reextent in terms of reshape + assert(new_layout.num_elements() == this->num_elements()); + this->layout_mutable() = new_layout; + return *this; + } + + auto clear() noexcept -> array& { + static_::clear(); + return *this; + } + friend auto clear(array& self) noexcept -> array& { return self.clear(); } + + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(array const& self) { return self.data_elements(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(array& self) { return self.data_elements(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto data_elements(array&& self) { return std::move(self).data_elements(); } + + auto move() & -> subarray> { + subarray> + ret = multi::static_array_cast>(*this); + layout_t:: operator=({}); + return ret; + } + friend auto move(array& self) -> subarray> { + return self.move(); + } + + array(array&& other, typename array::allocator_type const& alloc) noexcept : static_array{std::move(other), alloc} {} + array(array&& other) noexcept : array{std::move(other), other.get_allocator()} {} + + friend auto get_allocator(array const& self) -> typename array::allocator_type { return self.get_allocator(); } + + void swap(array& other) noexcept { + using std::swap; + if constexpr(multi::allocator_traits::propagate_on_container_swap::value) { + swap(this->alloc(), other.alloc()); + } + swap(this->base_, other.base_); + swap( + this->layout_mutable(), + other.layout_mutable() + ); + } + +#ifndef NOEXCEPT_ASSIGNMENT + auto operator=(array&& other) noexcept -> array& { + if(this == std::addressof(other)) { + return *this; + } + clear(); + this->base_ = other.base_; + if constexpr(multi::allocator_traits::propagate_on_container_move_assignment::value) { + this->alloc() = std::move(other.alloc()); + } + this->layout_mutable() = std::exchange(other.layout_mutable(), {}); + return *this; + } + + auto operator=(array const& other) -> array& { + if(array::extensions() == other.extensions()) { + if(this == &other) { + return *this; + } // required by cert-oop54-cpp + if constexpr(multi::allocator_traits::propagate_on_container_copy_assignment::value) { + this->alloc() = other.alloc(); + } + static_::operator=(other); + } else { + clear(); + if constexpr(multi::allocator_traits::propagate_on_container_copy_assignment::value) { + this->alloc() = other.alloc(); + } + this->layout_mutable() = other.layout(); + array::allocate(); + array::uninitialized_copy_elements(other.data_elements()); + } + return *this; + } +#else + auto operator=(array o) noexcept -> array& { return swap(o), *this; } +#endif + + template + auto operator=(multi::subarray const& other) -> array& { + if(array::extensions() == other.extensions()) { + static_::operator=(other); // TODO(correaa) : protect for self assigment + } else { + operator=(array{other}); + } + return *this; + } + + template // , std::enable_if_t> , int> =0> + auto operator=(multi::array const& other) -> array& { // TODO(correaa) : check that LHS is not read-only? + if(array::extensions() == other.extensions()) { + // this->operator()() = other; + static_::operator=(other); + } else if(this->num_elements() == other.extensions().num_elements()) { + reshape(other.extensions()); + static_::operator=(other); + // this->operator()() = other; + } else { + operator=(static_cast(other)); + } + return *this; + } + + template< + class Range, + class = decltype(std::declval().operator=(std::declval())), + std::enable_if_t>::value, int> = 0, + std::enable_if_t>{}, int> = 0> + auto operator=(Range&& other) -> array& { // TODO(correaa) : check that LHS is not read-only? + if(array::extensions() == other.extensions()) { + this->operator()() = other; + // static_::operator=(other); + } else if(this->num_elements() == other.extensions().num_elements()) { + reshape(other.extensions()); + // static_::operator=(other); + this->operator()() = other; + } else { + operator=(static_cast(std::forward(other))); + } + return *this; + } + + template< + class Range, + class = decltype(std::declval().operator=(std::declval())), + std::enable_if_t>{}, int> = 0> + auto from(Range&& other) -> array& { // TODO(correaa) : check that LHS is not read-only? + if(array::extensions() == other.extensions()) { + this->operator()() = other; + // static_::operator=(other); + } else if(this->num_elements() == other.extensions().num_elements()) { + reshape(other.extensions()); + this->operator()() = other; + // static_::operator=(other); + } else { + operator=(static_cast(std::forward(other))); + } + return *this; + } + + friend void swap(array& self, array& other) noexcept(true /*noexcept(self.swap(other))*/) { self.swap(other); } + + void assign(typename array::extensions_type extensions, typename array::element const& elem) { + if(array::extensions() == extensions) { + adl_fill_n(this->base_, this->num_elements(), elem); + } else { + this->clear(); + (*this).array::layout_t::operator=(layout_t{extensions}); + this->base_ = this->static_::array_alloc::allocate(this->num_elements(), nullptr); + adl_alloc_uninitialized_fill_n(this->alloc(), this->base_, this->num_elements(), elem); + } + } + + template + auto assign(It first, It last) -> array& { + using std::all_of; + using std::next; + if(adl_distance(first, last) == this->size()) { + static_::ref::assign(first); + } else { + this->operator=(array(first, last)); + } + return *this; + } + void assign(std::initializer_list values) { assign(values.begin(), values.end()); } + + template auto assign(Range&& other) & -> decltype(assign(adl_begin(std::forward(other)), adl_end(std::forward(other)))) { // TODO(correaa) use forward + return assign(adl_begin(std::forward(other)), adl_end(std::forward(other))); + } + + auto operator=(std::initializer_list values) -> array& { + assign(values.begin(), values.end()); + return *this; + } + + // template + // [[deprecated("use extensions for reextents, not tuples")]] + // auto reextent(std::tuple const& other) -> array& { + // return reextent( + // std::apply([](auto const&... extensions) {return typename array::extensions_type(extensions...);}, other) + // ); // paren is important here ext_type(...) for allow narrowing casts ^^^ + // } + + auto reextent(typename array::extensions_type const& extensions) && -> array&& { + if(extensions == this->extensions()) { + return std::move(*this); + } + this->destroy(); + this->deallocate(); + this->layout_mutable() = typename array::layout_t{extensions}; + this->base_ = this->static_::array_alloc::allocate( + static_cast::size_type>( + typename array::layout_t{extensions}.num_elements() + ), + this->data_elements() // used as hint + ); + if constexpr(!(std::is_trivially_default_constructible_v || multi::force_element_trivial_default_construction)) { + adl_alloc_uninitialized_value_construct_n(this->alloc(), this->base_, this->num_elements()); + } + return std::move(*this); + } + + auto reextent(typename array::extensions_type const& extensions) & -> array& { + if(extensions == this->extensions()) { + return *this; + } + auto&& tmp = typename array::ref( + this->static_::array_alloc::allocate( + static_cast::size_type>( + typename array::layout_t{extensions}.num_elements() + ), + this->data_elements() // used as hint + ), + extensions + ); + if constexpr(!(std::is_trivially_default_constructible_v || multi::force_element_trivial_default_construction)) { + adl_alloc_uninitialized_value_construct_n(this->alloc(), tmp.data_elements(), tmp.num_elements()); + } + auto const is = intersection(this->extensions(), extensions); + tmp.apply(is) = this->apply(is); // TODO(correaa) : use (and implement) `.move();` + this->destroy(); + this->deallocate(); + this->base_ = tmp.base(); + this->layout_mutable() = tmp.layout(); + return *this; + } + + constexpr auto operator+() const& { return array{*this}; } + constexpr auto operator+() && { return array{std::move(*this)}; } + + // auto reextent(typename array::extensions_type const& extensions, typename array::element const& elem) && -> array&& { + // if(extensions == this->extensions()) {return std::move(*this);} + // this->destroy(); + // this->deallocate(); + // this->layout_mutable() = typename array::layout_t{extensions}; + // this->base_ = this->static_::array_alloc::allocate( + // static_cast::size_type>( + // typename array::layout_t{extensions}.num_elements() + // ), + // this->data_elements() // used as hint + // ); + // this->uninitialized_fill_n(this->base_, static_cast::size_type>(this->num_elements()), elem); + + // return std::move(*this); + // } + + auto reextent(typename array::extensions_type const& exs, typename array::element const& elem) & -> array& { + if(exs == this->extensions()) { + return *this; + } + + // array tmp(x, e, this->get_allocator()); // TODO(correaa) opportunity missed to use hint allocation + // auto const is = intersection(this->extensions(), x); + // tmp.apply(is) = this->apply(is); + // swap(tmp); + + // implementation with hint + auto&& tmp = typename array::ref( + this->static_::array_alloc::allocate( + static_cast::size_type>(typename array::layout_t{exs}.num_elements()), + this->data_elements() // use as hint + ), + exs + ); + this->uninitialized_fill_n(tmp.data_elements(), static_cast::size_type>(tmp.num_elements()), elem); + auto const is = intersection(this->extensions(), exs); + tmp.apply(is) = this->apply(is); + this->destroy(); + this->deallocate(); + this->base_ = tmp.base(); // TODO(correaa) : use (and implement) `.move();` + this->layout_mutable() = tmp.layout(); + // (*this).array::layout_t::operator=(tmp.layout()); + + return *this; + } + template constexpr auto reindex(Indices... idxs) && -> array&& { + this->layout_mutable().reindex(idxs...); + return std::move(*this); + } + template constexpr auto reindex(Indices... idxs) & -> array& { + this->layout_mutable().reindex(idxs...); + return *this; + } + + ~array() = default; +}; + +#if defined(__cpp_deduction_guides) + +#define BOOST_MULTI_IL std::initializer_list // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing TODO(correaa) remove + +// vvv MSVC 14.3 in c++17 mode needs paranthesis in dimensionality_type(d) +template static_array(BOOST_MULTI_IL) -> static_array(1U), std::allocator>; // MSVC needs the allocator argument error C2955: 'boost::multi::static_array': use of class template requires template argument list +template static_array(BOOST_MULTI_IL>) -> static_array(2U), std::allocator>; +template static_array(BOOST_MULTI_IL>>) -> static_array(3U), std::allocator>; +template static_array(BOOST_MULTI_IL>>>) -> static_array(4U), std::allocator>; +template static_array(BOOST_MULTI_IL>>>>) -> static_array(5U), std::allocator>; + +// TODO(correaa) add zero dimensional case? +template array(BOOST_MULTI_IL) -> array(1U)>; +template array(BOOST_MULTI_IL>) -> array(2U)>; +template array(BOOST_MULTI_IL>>) -> array(3U)>; +template array(BOOST_MULTI_IL>>>) -> array(4U)>; +template array(BOOST_MULTI_IL>>>>) -> array(5U)>; + +#undef BOOST_MULTI_IL + +template array(T[]) -> array(1U)>; // NOSONAR(cpp:S5945) NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + +// vvv these are necessary to catch {n, m, ...} notation (or single integer notation) +template>> array(iextensions<0>, T) -> array(0U)>; // TODO(correaa) use some std::allocator_traits instead of is_allocator +template>> array(iextensions<1>, T) -> array(1U)>; +template>> array(iextensions<2>, T) -> array(2U)>; +template>> array(iextensions<3>, T) -> array(3U)>; +template>> array(iextensions<4>, T) -> array(4U)>; +template>> array(iextensions<5>, T) -> array(5U)>; + +// generalization, will not work with naked {n, m, ...} notation (or single integer notation) +template>> +array(iextensions, T) -> array; + +template +array(MatrixRef) -> array; + +template array(subarray) -> array; + +#endif // ends defined(__cpp_deduction_guides) + +template +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility +auto decay(const T (&arr)[N]) noexcept -> multi::array, std::rank_v> { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility + return multi::array_cref, std::rank_v>(data_elements(arr), extensions(arr)); +} + +template +struct array_traits { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility + using reference = T&; + using element = std::remove_all_extents_t; // NOSONAR(cpp:S5945) NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility + using decay_type = multi::array; +}; + +} // end namespace boost::multi + +namespace boost::multi::pmr { + +#ifdef BOOST_MULTI_HAS_MEMORY_RESOURCE +template +using array = boost::multi::array>; +#else +template +struct [[deprecated("no PMR allocator")]] array; // your version of C++ doesn't provide polymorphic_allocators +#endif + +} // end namespace boost::multi::pmr + +// common_reference for compatibility with ranges +#if defined(__cpp_lib_common_reference) || defined(__cpp_lib_ranges) +// TODO(correaa) achieve this by normal inheritance +template struct std::common_reference::basic_const_array &&, ::boost::multi::array &> { using type = typename ::boost::multi::array::basic_const_array &&; }; +template struct std::common_reference::basic_const_array &&, ::boost::multi::array const&> { using type = typename ::boost::multi::array::basic_const_array &&; }; +template struct std::common_reference< ::boost::multi::array &, typename ::boost::multi::array::basic_const_array &&> { using type = typename ::boost::multi::array::basic_const_array &&; }; +template struct std::common_reference< ::boost::multi::array const&, typename ::boost::multi::array::basic_const_array &&> { using type = typename ::boost::multi::array::basic_const_array &&; }; +template struct std::common_reference::basic_const_array , ::boost::multi::array &> { using type = typename ::boost::multi::array::basic_const_array ; }; +template struct std::common_reference< ::boost::multi::array const&, typename ::boost::multi::array::basic_const_array const&> { using type = typename ::boost::multi::array::basic_const_array const&; }; +template struct std::common_reference::basic_const_array const&, ::boost::multi::array const&> { using type = typename ::boost::multi::array::basic_const_array const&; }; + +template struct std::common_reference::basic_const_array &, ::boost::multi::array &> { using type = typename ::boost::multi::array::basic_const_array &; }; +template struct std::common_reference< ::boost::multi::array &, typename ::boost::multi::array::basic_const_array &> { using type = typename ::boost::multi::array::basic_const_array &; }; +#endif + +namespace boost::serialization { + +template +struct version> { + using type = std::integral_constant; // TODO(correaa) use constexpr variable here, not macro + enum /*class value_t*/ { value = type::value }; // NOSONAR(cpp:S3642) // https://community.sonarsource.com/t/suppress-issue-in-c-source-file/43154/24 +}; + +} // end namespace boost::serialization + +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_ARRAY_HPP_ diff --git a/external_codes/boost_multi/multi/include/boost/multi/array_ref.hpp b/external_codes/boost_multi/multi/include/boost/multi/array_ref.hpp new file mode 100644 index 0000000000..6806b94442 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/array_ref.hpp @@ -0,0 +1,3153 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_ARRAY_REF_HPP_ +#define BOOST_MULTI_ARRAY_REF_HPP_ +#pragma once + +#include +#include + +#include +#include +#include // for pointer_traits +#include // for random_iterable +#include +#include // for dimensionality_type + +#include // fpr copy_n +#include +#include // for std::memset in reinterpret_cast +#include // for std::invoke +#include // for std::next +#include // for std::pointer_traits +#include // for std::launder + +#if __has_include() +#if !defined(_MSVC_LANG) || (_MSVC_LANG > 202002L) +#include +#endif +#if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L && !defined(_MSVC_LANG) +#define BOOST_MULTI_HAS_SPAN +#endif +#endif + +#include // for forward + +#if !defined(__NVCC__) + #define BOOST_MULTI_FRIEND_CONSTEXPR friend constexpr // this generates a problem with intel compiler 19 and v2021 "a constexpr function cannot have a nonliteral return type" +#else + #define BOOST_MULTI_FRIEND_CONSTEXPR friend /*constexpr*/ +#endif + +#if defined(__NVCC__) + #define BOOST_MULTI_HD __host__ __device__ +#else + #define BOOST_MULTI_HD +#endif + +namespace boost::multi { + +template> +struct subarray; + +template +constexpr auto is_subarray_aux(subarray const&) -> std::true_type; +constexpr auto is_subarray_aux(... ) -> std::false_type; + +template struct is_subarray: decltype(is_subarray_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + +template +struct of_dim { +template +static constexpr auto is_subarray_of_dim_aux(subarray const&) -> std::true_type; +static constexpr auto is_subarray_of_dim_aux(... ) -> std::false_type; + +template struct is_subarray_of_dim: decltype(is_subarray_of_dim_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) +}; + +template +constexpr auto home(Array&& arr) +->decltype(std::forward(arr).home()) { + return std::forward(arr).home(); } + +template> struct array; + +template> +struct array_types : private Layout { // cppcheck-suppress syntaxError ; false positive in cppcheck + using element = T; + using element_type = element; // this follows more closely https://en.cppreference.com/w/cpp/memory/pointer_traits + + using element_ptr = ElementPtr; + using element_const_ptr = typename std::pointer_traits::template rebind; + using element_move_ptr = multi::move_ptr; + + using element_ref = typename std::iterator_traits::reference; + + using layout_t = Layout; + + using rank = typename layout_t::rank; + + using layout_t::rank_v; + + // using typename layout_t::dimensionality_type; // needed by MSVC + using dimensionality_type = typename layout_t::dimensionality_type; // needed by MSVC + + using Layout::dimensionality; +// #else +// static constexpr auto dimensionality = layout_t::dimensionality; +// #endif + + // using layout_t::num_dimensions; + + [[deprecated("this is from BMA")]] static constexpr auto num_dimensions() {return dimensionality;} + + using typename layout_t::stride_type; + using layout_t::stride ; + + using layout_t::num_elements; + using layout_t::offset; + + using layout_t::offsets; + + using typename layout_t::index; + using typename layout_t::index_range; + using typename layout_t::index_extension; + + using typename layout_t::strides_type; + // using layout_t::strides ; + + auto strides() const { return convertible_tuple(layout_t::strides()); } + [[deprecated("BMA backward compatible")]] auto index_bases() const -> std::ptrdiff_t const*; // = delete; + + using typename layout_t::difference_type; + + using typename layout_t::size_type; + using layout_t::size ; + + using layout_t::nelems; + + using typename layout_t::extension_type; + using layout_t::extension; + + using typename layout_t::extensions_type; + using layout_t::extensions; + + constexpr auto extensions() const -> extensions_type {return static_cast(*this).extensions();} + + using layout_t::is_empty; + using layout_t:: empty; + + using layout_t::sub; + + using typename layout_t::sizes_type; + using layout_t::sizes; + + [[deprecated("from BMA")]] constexpr auto shape() const {return convertible_tuple(this->sizes());} + + using layout_t::is_compact; + + friend constexpr auto size (array_types const& self) noexcept -> size_type {return self.size ();} + friend constexpr auto extension (array_types const& self) noexcept -> extension_type {return self.extension ();} + friend constexpr auto is_empty (array_types const& self) noexcept -> bool {return self.is_empty ();} + friend constexpr auto num_elements(array_types const& self) noexcept -> size_type {return self.num_elements();} + + friend constexpr auto extensions (array_types const& self) noexcept -> extensions_type {return self.extensions ();} + friend constexpr auto sizes (array_types const& self) noexcept -> sizes_type {return self.sizes ();} + + // TODO(correaa) [[deprecated("use member syntax for non-salient properties")]] + friend + constexpr auto stride (array_types const& self) noexcept -> stride_type {return self.stride ();} + + // TODO(correaa) [[deprecated("use member syntax for non-salient properties")]] + friend + constexpr auto strides (array_types const& self) noexcept -> strides_type {return self.strides ();} + + protected: + constexpr auto layout_mutable() -> layout_t& {return static_cast(*this);} + + public: + using value_type = typename std::conditional_t< + (D > 1), + array::default_allocator_type>, + element + >; + + using reference = typename std::conditional_t< + (D > 1), + subarray, + typename std::iterator_traits::reference + >; + + using const_reference = typename std::conditional_t< + (D > 1), + subarray, + typename std::iterator_traits::reference + >; + + BOOST_MULTI_HD constexpr auto base() & -> element_ptr {return base_;} + BOOST_MULTI_HD constexpr auto base() && -> element_ptr {return base_;} + BOOST_MULTI_HD constexpr auto base() const& -> element_const_ptr {return base_;} + + BOOST_MULTI_HD constexpr auto cbase() const -> element_const_ptr {return base_;} + BOOST_MULTI_HD constexpr auto mbase() const& -> element_ptr& {return base_;} + + friend /*constexpr*/ auto base(array_types & self) -> element_ptr {return self.base();} + friend /*constexpr*/ auto base(array_types && self) -> element_ptr {return std::move(self).base();} + friend /*constexpr*/ auto base(array_types const& self) -> element_const_ptr {return self.base();} + + BOOST_MULTI_HD constexpr auto layout() const -> layout_t const& {return *this;} + friend constexpr auto layout(array_types const& self) -> layout_t const& {return self.layout();} + + constexpr auto origin() const& -> decltype(auto) {return base_ + Layout::origin();} + friend constexpr auto origin(array_types const& self) -> decltype(auto) {return self.origin();} + + protected: + element_ptr base_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes,misc-non-private-member-variables-in-classes) : TODO(correaa) try to make it private, [static_]array needs mutation + template friend struct array_iterator; + + using derived = subarray; + BOOST_MULTI_HD constexpr explicit array_types(std::nullptr_t) : Layout{}, base_(nullptr) {} + + public: + array_types() = default; + + BOOST_MULTI_HD constexpr array_types(layout_t const& lyt, element_ptr const& data) + : Layout{lyt}, base_{data} {} + + protected: + template< + class ArrayTypes, + typename = std::enable_if_t>{}> + , decltype(multi::detail::explicit_cast(std::declval().base_))* = nullptr + > + // underlying pointers are explicitly convertible + BOOST_MULTI_HD constexpr explicit array_types(ArrayTypes const& other) + : Layout{other.layout()}, base_{other.base_} {} + + template< + class ArrayTypes, + typename = std::enable_if_t>{}>, + decltype(multi::detail::implicit_cast(std::declval().base_))* = nullptr + > + // cppcheck-suppress noExplicitConstructor ; because underlying pointers are implicitly convertible + BOOST_MULTI_HD constexpr /*implt*/ array_types(ArrayTypes const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : inherit behavior of underlying pointer + : Layout{other.layout()}, base_{other.base_} {} + + template< + typename ElementPtr2, + typename = decltype(Layout{std::declval const&>().layout()}), + typename = decltype(element_ptr{std::declval const&>().base_}) + > + BOOST_MULTI_HD constexpr explicit array_types(array_types const& other) + : Layout{other.layout()}, base_{other.base_} {} + + template friend struct array_types; +}; + +template +struct subarray_ptr // NOLINT(fuchsia-multiple-inheritance) : to allow mixin CRTP +//: private Ref // TODO(correaa) : remove inheritance from Ref?? +: boost::multi::iterator_facade< + subarray_ptr, void, std::random_access_iterator_tag, + Ref const&, typename Layout::difference_type +> { //, boost::multi::totally_ordered2, void> +private: + mutable Ref ref_; + +public: + ~subarray_ptr() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + + using pointer = Ref const*; + using element_type = typename Ref::decay_type; + using difference_type = typename Layout::difference_type; + + using value_type = element_type; + using reference = Ref; + using iterator_category = std::random_access_iterator_tag; + + // cppcheck-suppress noExplicitConstructor + BOOST_MULTI_HD constexpr subarray_ptr(std::nullptr_t nil) : ref_{nil} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) terse syntax and functionality by default + BOOST_MULTI_HD constexpr subarray_ptr() : subarray_ptr{nullptr} {} // TODO(correaa) consider uninitialized ptr + + template friend struct subarray_ptr; + + BOOST_MULTI_HD constexpr subarray_ptr(typename Ref::element_ptr base, layout_t lyt) : ref_{lyt, base} {} + BOOST_MULTI_HD constexpr subarray_ptr(typename Ref::element_ptr base, index_extensions exts) : ref_{base, exts} {} + + template(std::declval()))* = nullptr + > + // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible + BOOST_MULTI_HD constexpr/*mplct*/ subarray_ptr(subarray_ptr const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : propagate implicitness of pointer + : ref_(other->layout(), other->base()) {} + + template + // cppcheck-suppress noExplicitConstructor ; no information loss, allows comparisons + BOOST_MULTI_HD constexpr subarray_ptr(Array* other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : subarray_ptr(other->data_elements(), other->layout()) {} + + subarray_ptr(subarray_ptr const&) noexcept = default; + subarray_ptr(subarray_ptr &&) noexcept = default; // TODO(correaa) remove inheritnace from reference to remove this move ctor + + BOOST_MULTI_HD constexpr auto operator=(subarray_ptr const& other) noexcept -> subarray_ptr& { + if(this == std::addressof(other)) { // lints(cert-oop54-cpp) + return *this; + } + this->ref_.base_ = other.ref_.base_; + this->ref_.layout_mutable() = other.ref_.layout(); + return *this; + } + + BOOST_MULTI_HD constexpr auto operator=(subarray_ptr&& other) noexcept // TODO(correaa) remove move constructor to remove this move assignment + -> subarray_ptr& { + if(this == std::addressof(other)) { // lints(cert-oop54-cpp) + return *this; + } + operator=(other); + return *this; + } + + BOOST_MULTI_HD constexpr explicit operator bool() const {return base();} + + BOOST_MULTI_HD constexpr auto dereference() const -> Ref {return Ref{this->layout(), this->base_};} + + BOOST_MULTI_HD constexpr auto operator* () const -> Ref {return Ref{ref_};} + + BOOST_MULTI_HD constexpr auto operator->() const -> Ref* {return std::addressof(ref_);} + // BOOST_MULTI_HD constexpr auto operator->() const -> Ref* {return const_cast(this);} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) find a better way without const_cast + // BOOST_MULTI_HD constexpr auto operator->() -> Ref* {return this;} + + BOOST_MULTI_HD constexpr auto operator[](difference_type n) const -> Ref {return *(*this + n);} + + BOOST_MULTI_HD constexpr auto operator<(subarray_ptr const& other) const -> bool {return distance_to(other) > 0;} + + BOOST_MULTI_HD constexpr subarray_ptr(typename Ref::element_ptr base, Layout const& lyt) : ref_{lyt, base} {} + + template + friend struct subarray; + + BOOST_MULTI_HD constexpr auto base() const -> typename Ref::element_ptr {return ref_.base();} + + friend BOOST_MULTI_HD constexpr auto base(subarray_ptr const& self) {return self.base();} + + constexpr auto operator==(subarray_ptr const& other) const -> bool { + return (this->ref_.base_ == other.ref_.base_) && (this->ref_.layout() == other.ref_.layout()); + } + + template>, int> =0> // TODO(correaa) improve this + friend BOOST_MULTI_HD constexpr auto operator==(subarray_ptr const& self, subarray_ptr const& other) -> bool { + return self.base() == other->base() && self->layout() == other->layout(); + } + template>, int> =0> + friend BOOST_MULTI_HD constexpr auto operator!=(subarray_ptr const& self, subarray_ptr const& other) -> bool { + return self.base() == other->base() && self->layout() == other->layout(); + } + + protected: + BOOST_MULTI_HD constexpr void increment() {ref_.base_ += Ref::nelems();} + BOOST_MULTI_HD constexpr void decrement() {ref_.base_ -= Ref::nelems();} + + BOOST_MULTI_HD constexpr void advance(difference_type n) {ref_.base_ += ref_.nelems()*n;} + BOOST_MULTI_HD constexpr auto distance_to(subarray_ptr const& other) const -> difference_type { + assert( Ref::nelems() == other.Ref::nelems() && Ref::nelems() != 0 ); + assert( (other.base() - base())%Ref::nelems() == 0); + assert( ref_.layout() == other.ref_.layout() ); + return (other.base() - base())/Ref::nelems(); + } + + public: + BOOST_MULTI_HD constexpr auto operator+=(difference_type n) -> subarray_ptr& {advance(n); return *this;} +}; + +template +struct array_iterator; + +template +struct array_iterator // NOLINT(fuchsia-multiple-inheritance) +: boost::multi::iterator_facade< + array_iterator, void, std::random_access_iterator_tag, + subarray const&, typename layout_t::difference_type +> +, multi::decrementable> +, multi::incrementable> +, multi::affine, multi::difference_type> +, multi::totally_ordered2, void> { + ~array_iterator() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + + constexpr auto operator=(array_iterator&&) // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + noexcept // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) + -> array_iterator& = default; + + array_iterator(array_iterator&&) noexcept // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) + = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + + using difference_type = typename layout_t::difference_type; + using element = Element; + using element_ptr = ElementPtr; + using element_const_ptr = typename std::pointer_traits::template rebind; + using value_type = typename subarray::decay_type; + + using pointer = subarray*; + using reference = subarray; + using const_reference [[deprecated("not friendly with std::ranges concepts")]] = subarray; // TODO(correaa) should be const_subarray (base of subarray) + + using iterator_category = std::random_access_iterator_tag; + + constexpr static dimensionality_type rank_v = D; + using rank = std::integral_constant; // TODO(correaa) make rank a function for compat with mdspan? + + using ptr_type = subarray_ptr, layout_t>; + + using stride_type = index; + using layout_type = typename reference::layout_type; + + BOOST_MULTI_HD constexpr explicit array_iterator(std::nullptr_t nil) : ptr_{nil} {} + BOOST_MULTI_HD constexpr array_iterator() : array_iterator{nullptr} {} + + template friend struct array_iterator; + + template< + class EElement, typename PPtr, + decltype(multi::detail::explicit_cast(std::declval>().base()))* = nullptr + > + BOOST_MULTI_HD constexpr explicit array_iterator(array_iterator const& other) + : ptr_{element_ptr{other.base()}, other.ptr_->layout()}, stride_{other.stride_} {} + + template(std::declval>().base()))* = nullptr + > + // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible + BOOST_MULTI_HD constexpr/*mplct*/ array_iterator(array_iterator const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : propagate implicitness of pointer + : ptr_{element_ptr{other.ptr_->base()}, other.ptr_->layout()}, stride_{other.stride_} {} + + array_iterator(array_iterator const&) = default; + auto operator=(array_iterator const&) -> array_iterator& = default; + + BOOST_MULTI_HD constexpr explicit operator bool() const {return ptr_->base();} // TODO(correaa) implement bool conversion for subarray_ptr + BOOST_MULTI_HD constexpr auto operator*() const -> subarray {return {*ptr_};} + + BOOST_MULTI_HD constexpr auto operator->() const -> decltype(auto) {return ptr_;} + + BOOST_MULTI_HD constexpr auto operator+ (difference_type n) const -> array_iterator {array_iterator ret{*this}; ret += n; return ret;} + BOOST_MULTI_HD constexpr auto operator[](difference_type n) const -> subarray {return *((*this) + n);} + + friend BOOST_MULTI_HD constexpr auto operator==(array_iterator const& self, array_iterator const& other) -> bool { + return self.ptr_ == other.ptr_ && self.stride_== other.stride_ && self.ptr_->layout() == other.ptr_->layout(); + } + + BOOST_MULTI_HD constexpr auto operator< (array_iterator const& other) const -> bool { + assert(ptr_->layout() == other.ptr_->layout()); + assert(stride_ != 0); + return + ((0 < stride_) && (ptr_.base() - other.ptr_.base() < 0)) + || ((stride_ < 0) && (0 < ptr_.base() - other.ptr_.base())); // TODO(correaa) consider the case where stride_ is negative + } + + BOOST_MULTI_HD constexpr explicit array_iterator(typename subarray::element_ptr base, layout_t lyt, index stride) + : ptr_{base, lyt}, stride_{stride} {} + + template friend struct subarray; + + template + BOOST_MULTI_HD constexpr auto operator()(index idx, As... args) const -> decltype(auto) {return this->operator[](idx)(args...); } + BOOST_MULTI_HD constexpr auto operator()(index idx) const -> decltype(auto) {return this->operator[](idx) ; } + + private: + template + static BOOST_MULTI_HD constexpr auto apply_impl_(Self&& self, Tuple const& tuple, std::index_sequence/*012*/) -> decltype(auto) { + return std::forward(self)(std::get(tuple)...); + } + + public: + template BOOST_MULTI_HD constexpr auto apply(Tuple const& tpl) const& -> decltype(auto) { return apply_impl_( *this , tpl, std::make_index_sequence::value>()); } + template BOOST_MULTI_HD constexpr auto apply(Tuple const& tpl) && -> decltype(auto) { return apply_impl_(std::move(*this), tpl, std::make_index_sequence::value>()); } + template BOOST_MULTI_HD constexpr auto apply(Tuple const& tpl) & -> decltype(auto) { return apply_impl_( *this , tpl, std::make_index_sequence::value>()); } + + private: + ptr_type ptr_; + stride_type stride_ = {1}; // nice non-zero default // TODO(correaa) use INT_MAX? + + BOOST_MULTI_HD constexpr void decrement_() {ptr_->base_ -= stride_;} + BOOST_MULTI_HD constexpr void advance_(difference_type n) {ptr_->base_ += stride_*n;} + + public: + BOOST_MULTI_HD constexpr auto base() const& -> element_ptr {return ptr_.base();} + friend /*constexpr*/ auto base(array_iterator const& self) -> element_ptr {return self.base();} + + BOOST_MULTI_HD constexpr auto stride() const& -> stride_type {return stride_;} + friend constexpr auto stride(array_iterator const& self) -> stride_type {return self.stride_;} + + + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + + constexpr auto operator++() -> array_iterator& {ptr_->base_ += stride_; return *this;} + constexpr auto operator--() -> array_iterator& {decrement_(); return *this;} + + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + + friend constexpr auto operator-(array_iterator const& self, array_iterator const& other) -> difference_type { + assert(self.stride_ == other.stride_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function + assert(self.stride_ != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function + return (self.ptr_.base() - other.ptr_.base())/self.stride_; + } + + constexpr auto operator+=(difference_type n) -> array_iterator& {advance_(+n); return *this;} + constexpr auto operator-=(difference_type n) -> array_iterator& {advance_(-n); return *this;} +}; + +template +struct cursor_t { + using difference_type = typename std::iterator_traits::difference_type; + using strides_type = StridesType; + + using element_ptr = ElementPtr; + using element_ref = typename std::iterator_traits::reference; + using element_type = typename std::iterator_traits::value_type; + + using pointer = element_ptr; + using reference = element_ref; + + using indices_type = typename extensions_t::indices_type; + + private: + strides_type strides_; + element_ptr base_; + + template friend struct subarray; + template friend struct cursor_t; + + BOOST_MULTI_HD constexpr cursor_t(element_ptr base, strides_type const& strides) : strides_{strides}, base_{base} {} + + public: + BOOST_MULTI_HD constexpr auto operator[](difference_type n) const -> decltype(auto) { + if constexpr(D != 1) { + return cursor_t< + ElementPtr, + D-1, + std::decay_t + >{ + base_ + + std::get<0>(strides_)*n, + strides_.tail() + }; + } else { + return base_[std::get<0>(strides_)*n]; + } + } + BOOST_MULTI_HD constexpr auto operator()(difference_type n) const -> decltype(auto) { + return operator[](n); + } + template + BOOST_MULTI_HD constexpr auto operator()(difference_type n, Ns... rest) const -> decltype(auto) { + return operator[](n)(rest...); + } + + private: + template + BOOST_MULTI_HD constexpr auto apply_impl_(Tuple const& tup, std::index_sequence /*012*/) const -> decltype(auto) { + return ((std::get(tup)*std::get(strides_)) + ...); + } + + public: + template + BOOST_MULTI_HD constexpr auto operator+=(Tuple const& tup) -> cursor_t& { + base_ += apply_impl_(tup, std::make_index_sequence::value>{}); + return *this; + } + BOOST_MULTI_HD constexpr auto operator* () const -> reference {return *base_;} + BOOST_MULTI_HD constexpr auto operator->() const -> pointer {return base_;} +}; + +template +struct elements_iterator_t // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) +: boost::multi::random_accessable, typename std::iterator_traits::difference_type, typename std::iterator_traits::reference> +{ + using difference_type = typename std::iterator_traits::difference_type; + using value_type = typename std::iterator_traits::value_type; + using pointer = Pointer; + using reference = typename std::iterator_traits::reference; + using iterator_category = std::random_access_iterator_tag; + + using const_pointer = typename std::pointer_traits::template rebind; + + using layout_type = LayoutType; + + private: + pointer base_; + layout_type l_; + difference_type n_ = 0; + extensions_t xs_; + + using indices_type = typename extensions_t::indices_type; + indices_type ns_ = {}; + + template friend struct elements_iterator_t; + template friend struct elements_range_t; + + constexpr elements_iterator_t(pointer base, layout_type lyt, difference_type n) + : base_{base}, l_{lyt}, n_{n}, xs_{l_.extensions()}, ns_{lyt.is_empty()?indices_type{}:xs_.from_linear(n)} {} + + public: + constexpr auto base() -> pointer {return base_;} + constexpr auto base() const -> const_pointer {return base_;} + BOOST_MULTI_HD constexpr auto layout() const -> layout_type {return l_;} + + template(std::declval().base_))* = nullptr> + // cppcheck-suppress noExplicitConstructor + BOOST_MULTI_HD constexpr /*impl*/ elements_iterator_t(Other const& other) : elements_iterator_t{other.base_, other.l_, other.n_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + template + BOOST_MULTI_HD constexpr explicit elements_iterator_t(Other const& other) : elements_iterator_t{other.base_, other.l_, other.n_} {} + + elements_iterator_t(elements_iterator_t const&) = default; + + BOOST_MULTI_HD constexpr auto operator=(elements_iterator_t const& other) -> elements_iterator_t& { // fixes (?) warning: definition of implicit copy assignment operator for 'elements_iterator_t *, boost::multi::layout_t<1>>' is deprecated because it has a user-declared copy constructor [-Wdeprecated-copy] + if(&other == this) {return *this;} // for cert-oop54-cpp + base_ = other.base_; + xs_ = other.xs_; + n_ = other.n_; + return *this; + } + + BOOST_MULTI_HD constexpr auto operator++() -> elements_iterator_t& { + std::apply([&xs = this->xs_](auto&... idxs) { return xs.next_canonical(idxs...); }, ns_); + ++n_; + return *this; + } + BOOST_MULTI_HD constexpr auto operator--() -> elements_iterator_t& { + std::apply([&xs = this->xs_](auto&... idxs) { return xs.prev_canonical(idxs...); }, ns_); + --n_; + return *this; + } + + BOOST_MULTI_HD constexpr auto operator+=(difference_type n) -> elements_iterator_t& { + auto const nn = std::apply(xs_, ns_); + ns_ = xs_.from_linear(nn + n); + n_ += n; + return *this; + } + BOOST_MULTI_HD constexpr auto operator-=(difference_type n) -> elements_iterator_t& { + auto const nn = std::apply(xs_, ns_); + ns_ = xs_.from_linear(nn - n); + n_ -= n; + return *this; + } + + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + + BOOST_MULTI_HD /*[[gnu::pure]]*/ constexpr auto operator-(elements_iterator_t const& other) const -> difference_type { + assert(base_ == other.base_ && l_ == other.l_); + return n_ - other.n_; + } + BOOST_MULTI_HD constexpr auto operator<(elements_iterator_t const& other) const -> difference_type { + assert(base_ == other.base_ && l_ == other.l_); + return n_ < other.n_; + } + + constexpr auto current() const -> pointer {return base_ + std::apply(l_, ns_);} + + BOOST_MULTI_HD constexpr auto operator->() const -> pointer {return base_ + std::apply(l_, ns_) ;} + BOOST_MULTI_HD constexpr auto operator*() const -> reference {return base_ [std::apply(l_, ns_)];} + BOOST_MULTI_HD constexpr auto operator[](difference_type const& n) const -> reference { + auto const nn = std::apply(xs_, ns_); + return base_[std::apply(l_, xs_.from_linear(nn + n))]; + } // explicit here is necessary for nvcc/thrust + + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + + BOOST_MULTI_HD constexpr auto operator+(difference_type n) const -> elements_iterator_t {auto ret{*this}; ret += n; return ret;} // explicitly necessary for nvcc/thrust + BOOST_MULTI_HD constexpr auto operator-(difference_type n) const -> elements_iterator_t {auto ret{*this}; ret -= n; return ret;} // explicitly necessary for nvcc/thrust + + BOOST_MULTI_HD constexpr auto operator==(elements_iterator_t const& other) const -> bool { + assert(base_ == other.base_ && l_ == other.l_); // TODO(correaa) calling host function from host device + return n_ == other.n_; // and base_ == other.base_ and l_ == other.l_; + } + BOOST_MULTI_HD constexpr auto operator!=(elements_iterator_t const& other) const -> bool { + assert(base_ == other.base_ && l_ == other.l_); // TODO(correaa) calling host function from host device + return n_ != other.n_; + } +}; + +template +struct elements_range_t { + using pointer = Pointer; + using layout_type = LayoutType; + + using value_type = typename std::iterator_traits::value_type; + using const_pointer = typename std::pointer_traits::template rebind; + + using reference = typename std::iterator_traits< pointer>::reference; + using const_reference = typename std::iterator_traits::reference; + + using size_type = typename std::iterator_traits::difference_type; + using difference_type = typename std::iterator_traits::difference_type; + + using iterator = elements_iterator_t; + using const_iterator = elements_iterator_t; + + private: + pointer base_; + layout_type l_; + + public: + template(std::declval().base_))* = nullptr> + // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible // NOLINTNEXTLINE(runtime/explicit) + constexpr /*impl*/ elements_range_t(OtherRange const& other) : base_{other.base}, l_{other.l_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) to reproduce the implicitness of the argument + template(std::declval().base_))* = nullptr> + constexpr explicit elements_range_t(OtherRange const& other) : elements_range_t{other} {} + + constexpr elements_range_t(pointer base, layout_type lyt) : base_{base}, l_{lyt} {} + + private: + constexpr auto at_aux_(difference_type n) const -> reference { + assert( ! is_empty() ); + return base_[std::apply(l_, l_.extensions().from_linear(n))]; + } + + public: + BOOST_MULTI_HD constexpr auto operator[](difference_type n) const& -> const_reference {return at_aux_(n);} + BOOST_MULTI_HD constexpr auto operator[](difference_type n) && -> reference {return at_aux_(n);} + BOOST_MULTI_HD constexpr auto operator[](difference_type n) & -> reference {return at_aux_(n);} + + constexpr auto size() const -> size_type {return l_.num_elements();} + + [[nodiscard]] + constexpr auto empty() const -> bool {return l_. empty();} + constexpr auto is_empty() const -> bool {return l_.is_empty();} + + elements_range_t(elements_range_t const&) = delete; + elements_range_t(elements_range_t &&) = delete; + + template auto operator==(elements_range_t const& other) const -> bool { + if( is_empty() && other.is_empty()) {return true;} + return size() == other.size() && adl_equal(other.begin(), other.end(), begin()); + } + template auto operator!=(elements_range_t const& other) const -> bool { + if(is_empty() && other.is_empty()) {return false;} + return size() != other.size() || ! adl_equal(other.begin(), other.end(), begin()); + } + + template void swap(elements_range_t& other) & noexcept {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} + template void swap(elements_range_t& other) && noexcept {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} + template void swap(elements_range_t&& other) & noexcept {assert(size() == other.size()); adl_swap_ranges(begin(), end(), std::move(other).begin());} + template void swap(elements_range_t&& other) && noexcept {assert(size() == other.size()); adl_swap_ranges(begin(), end(), std::move(other).begin());} + + ~elements_range_t() = default; + + private: + constexpr auto begin_aux_() const {return iterator{base_, l_, 0 };} + constexpr auto end_aux_ () const {return iterator{base_, l_, l_.num_elements()};} + + public: + constexpr auto begin() const& -> const_iterator {return begin_aux_();} + constexpr auto end () const& -> const_iterator {return end_aux_ ();} + + constexpr auto begin() && -> iterator {return begin_aux_();} + constexpr auto end () && -> iterator {return end_aux_ ();} + + constexpr auto begin() & -> iterator {return begin_aux_();} + constexpr auto end () & -> iterator {return end_aux_ ();} + + constexpr auto front() const& -> const_reference {return *begin();} + constexpr auto back () const& -> const_reference {return *std::prev(end(), 1);} + + constexpr auto front() && -> reference {return *begin();} + constexpr auto back () && -> reference {return *std::prev(end(), 1);} + + constexpr auto front() & -> reference {return *begin();} + constexpr auto back () & -> reference {return *std::prev(end(), 1);} + + auto operator=(elements_range_t const&) -> elements_range_t& = delete; + + auto operator=(elements_range_t && other) noexcept -> elements_range_t& { // cannot be =delete in NVCC? + if(! is_empty()) {adl_copy(std::begin(std::move(other)), std::end(std::move(other)), begin());} + return *this; + } + + template()), std::end(std::declval()), std::declval()))> + auto operator=(OtherElementRange&& other) & -> elements_range_t& {assert(size() == other.size()); // NOLINT(cppcoreguidelines-missing-std-forward) std::forward(other) creates a problem with move-only elements + if(! is_empty()) {adl_copy(std::begin(other), std::end(other), begin());} + return *this; + } + + template()), std::end(std::declval()), std::declval()))> + constexpr auto operator=(OtherElementRange&& other) && -> elements_range_t& {assert(size() == other.size()); // NOLINT(cppcoreguidelines-missing-std-forward) std::forward(other) creates a problem with move-only elements + if(! is_empty()) {adl_copy(std::begin(other), std::end(other), begin());} + return *this; + } + + auto operator=(std::initializer_list values) && -> elements_range_t& {operator=(values); return *this;} + auto operator=(std::initializer_list values) & -> elements_range_t& { + assert(static_cast(values.size()) == size()); + adl_copy_n(values.begin(), values.size(), begin()); + return *this; + } +}; + +template +BOOST_MULTI_HD constexpr auto ref(It begin, It end) +->multi::subarray { + return multi::subarray{begin, end}; +} + +template struct static_array; // this might be needed by MSVC 14.3 in c++17 mode + +template +struct subarray : array_types { + using types = array_types; + using ref_ = subarray; + + using array_types::rank_v; + + friend struct subarray; + + using types::layout; + using typename types::element_type; + + using layout_type = Layout; + + BOOST_MULTI_HD constexpr auto layout() const -> layout_type {return array_types::layout();} + + using basic_const_array = subarray::template rebind, Layout>; + + subarray() = default; + + BOOST_MULTI_HD constexpr subarray(layout_type const& layout, ElementPtr const& base) + : array_types{layout, base} {} + + auto operator=(subarray&& other) & noexcept(std::is_nothrow_copy_assignable_v) -> subarray& { // allows assigment in temporaries // NOLINT(cppcoreguidelines-noexcept-move-operations,hicpp-noexcept-move,performance-noexcept-move-constructor) //NOSONAR + operator=(other); return *this; + } + + auto operator=(subarray&& other) && noexcept(std::is_nothrow_copy_assignable_v) -> subarray& { // allows assigment in temporaries // NOLINT(cppcoreguidelines-noexcept-move-operations,hicpp-noexcept-move,performance-noexcept-move-constructor) //NOSONAR + operator=(std::move(other)); return *this; + } + +#if defined(__NVCOMPILER) +#pragma diagnostic push +#pragma diag_suppress = conversion_function_not_usable +#elif defined(__NVCC__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress = conversion_function_not_usable +#endif + BOOST_MULTI_HD constexpr operator subarray const& () const { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) this is needed by std::ranges, TODO(correaa) think if this can be solved by inheritance from subarray + return reinterpret_cast const&>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-reinterpret-cast) think if this can be solved by inheritance from subarray + } +#ifdef __NVCOMPILER +#pragma diagnostic pop +#elif defined(__NVCC__) +#pragma nv_diagnostic pop +#endif + + protected: + // using types::types; + BOOST_MULTI_HD constexpr explicit subarray(std::nullptr_t nil) : types{nil} {} + + template friend struct static_array; + + // TODO(correaa) vvv consider making it explicit (seems that in C++23 it can prevent auto s = a[0];) + subarray(subarray const&) = default; // NOTE: reference type cannot be copied. perhaps you want to return by std::move or std::forward if you got the object from a universal reference argument + + template friend struct subarray_ptr; + + public: + using element = typename types::element; + using element_ptr = typename types::element_ptr; + using element_const_ptr = typename types::element_const_ptr; + using element_move_ptr = multi::move_ptr; + using element_ref = typename types::element_ref; + using element_cref = typename std::iterator_traits::reference; + + using elements_iterator = elements_iterator_t; + using celements_iterator = elements_iterator_t; + + using elements_range = elements_range_t; + using const_elements_range = elements_range_t; + + using index_gen [[deprecated("here to fulfill MultiArray concept")]] = char*; + using extent_gen [[deprecated("here to fulfill MultiArray concept")]] = void ; + using extent_range [[deprecated("here to fulfill MultiArray concept")]] = void; + + private: + constexpr auto elements_aux_() const {return elements_range(this->base_, this->layout());} + + public: + subarray(subarray&&) noexcept = default; // lints(readability-redundant-access-specifiers) + + constexpr auto elements() & -> elements_range { return elements_aux_(); } + constexpr auto elements() && -> elements_range { return elements_aux_(); } + constexpr auto elements() const& { return const_elements_range(this->base(), this->layout()); } + constexpr auto const_elements() const -> const_elements_range { return elements_aux_(); } + + constexpr auto hull() const -> std::pair { + return {this->base(), std::abs(this->hull_size())}; + } + + ~subarray() = default; // this lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + + BOOST_MULTI_FRIEND_CONSTEXPR auto sizes(subarray const& self) noexcept -> typename subarray::sizes_type {return self.sizes();} // needed by nvcc + BOOST_MULTI_FRIEND_CONSTEXPR auto size (subarray const& self) noexcept -> typename subarray::size_type {return self.size ();} // needed by nvcc + + template friend constexpr auto reinterpret_array_cast(subarray && self) {return std::move(self).template reinterpret_array_cast::template rebind>();} + template friend constexpr auto reinterpret_array_cast(subarray const& self) {return self .template reinterpret_array_cast::template rebind>();} + + friend constexpr auto dimensionality(subarray const& /*self*/) {return D;} + + using typename types::reference; + + using default_allocator_type = typename multi::pointer_traits::default_allocator_type; + + constexpr auto get_allocator() const -> default_allocator_type { + using multi::get_allocator; + return get_allocator(this->base()); + } + + BOOST_MULTI_FRIEND_CONSTEXPR auto get_allocator(subarray const& self) -> default_allocator_type {return self.get_allocator();} + + using decay_type = array::default_allocator_type>; + + friend constexpr auto decay(subarray const& self) -> decay_type {return self.decay();} + constexpr auto decay() const& -> decay_type { + decay_type ret{*this}; + return ret; + } + + constexpr auto operator+() const -> decay_type {return decay();} + using typename types::const_reference; + + private: + BOOST_MULTI_HD constexpr auto at_aux_(index idx) const { + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + return reference { + this->layout().sub(), + this->base_ + (idx*this->layout().stride() - this->layout().offset()) + }; // cppcheck-suppress syntaxError ; bug in cppcheck 2.5 + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + } + + public: + BOOST_MULTI_HD constexpr auto operator[](index idx) const& { return static_cast(at_aux_(idx)); } // TODO(correaa) use return type to cast + BOOST_MULTI_HD constexpr auto operator[](index idx) && -> reference { return at_aux_(idx) ; } + BOOST_MULTI_HD constexpr auto operator[](index idx) & -> reference { return at_aux_(idx) ; } + + template(D)>, typename = std::enable_if_t<(std::tuple_size::value > 1)> > + BOOST_MULTI_HD constexpr auto operator[](Tuple const& tup) const + ->decltype(operator[](std::get<0>(tup))[detail::tuple_tail(tup)]) { + return operator[](std::get<0>(tup))[detail::tuple_tail(tup)]; } + + template::value == 1)> > + BOOST_MULTI_HD constexpr auto operator[](Tuple const& tup) const + ->decltype(operator[](std::get<0>(tup))) { + return operator[](std::get<0>(tup)); } + + constexpr auto front() const& -> const_reference {return *begin();} + constexpr auto back() const& -> const_reference {return *std::prev(end(), 1);} + + constexpr auto front() && -> reference {return *begin();} + constexpr auto back() && -> reference {return *std::prev(end(), 1);} + + constexpr auto front() & -> reference {return *begin();} + constexpr auto back() & -> reference {return *std::prev(end(), 1);} + + using typename types::index; + + constexpr auto reindexed(index first) const& { + typename types::layout_t new_layout = this->layout(); + new_layout.reindex(first); + return basic_const_array(new_layout, types::base_); + } + constexpr auto reindexed(index first)& { + typename types::layout_t new_layout = this->layout(); + new_layout.reindex(first); + return subarray(new_layout, types::base_); + } + constexpr auto reindexed(index first)&& -> subarray { + typename types::layout_t new_layout = this->layout(); + new_layout.reindex(first); + return {new_layout, types::base_}; + } + + // TODO(correaa) : implement reindexed_aux + template + constexpr auto reindexed(index first, Indexes... idxs) const& -> basic_const_array { + return ((reindexed(first).rotated()).reindexed(idxs...)).unrotated(); + } + template + constexpr auto reindexed(index first, Indexes... idxs) & -> subarray { + return ((reindexed(first).rotated()).reindexed(idxs...)).unrotated(); + } + template + constexpr auto reindexed(index first, Indexes... idxs)&& -> subarray { + return ((std::move(*this).reindexed(first).rotated()).reindexed(idxs...)).unrotated(); + } + + private: + constexpr auto taked_aux_(difference_type n) const { + assert( n <= this->size() ); + typename types::layout_t const new_layout( + this->layout().sub(), + this->layout().stride(), + this->layout().offset(), + this->stride()*n + ); + return subarray(new_layout, this->base_); + } + + public: + constexpr auto taked(difference_type n) const& -> basic_const_array {return taked_aux_(n);} + constexpr auto taked(difference_type n) && -> subarray {return taked_aux_(n);} + constexpr auto taked(difference_type n) & -> subarray {return taked_aux_(n);} + + private: + constexpr auto dropped_aux_(difference_type n) const { + assert( n <= this->size() ); + typename types::layout_t const new_layout{ + this->layout().sub(), + this->layout().stride(), + this->layout().offset(), + this->stride()*(this->size() - n) + }; + return subarray(new_layout, this->base_ + n*this->layout().stride() - this->layout().offset()); + } + + public: + constexpr auto dropped(difference_type n) const& -> basic_const_array { return dropped_aux_(n); } + constexpr auto dropped(difference_type n) && -> subarray { return dropped_aux_(n); } + constexpr auto dropped(difference_type n) & -> subarray { return dropped_aux_(n); } + + private: + BOOST_MULTI_HD constexpr auto sliced_aux_(index first, index last) const { + // TODO(correaa) remove first == last condition + BOOST_MULTI_ACCESS_ASSERT(((first==last) || this->extension().contains(first ))&&"sliced first out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + BOOST_MULTI_ACCESS_ASSERT(((first==last) || this->extension().contains(last - 1))&&"sliced last out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + typename types::layout_t new_layout = this->layout(); + new_layout.nelems() = this->stride()*(last - first); // TODO(correaa) : reconstruct layout instead of mutating it + BOOST_MULTI_ACCESS_ASSERT(this->base_ || ((first*this->layout().stride() - this->layout().offset()) == 0) ); // it is UB to offset a nullptr + return subarray{new_layout, this->base_ + (first*this->layout().stride() - this->layout().offset())}; + } + + public: + BOOST_MULTI_HD constexpr auto sliced(index first, index last) const& -> basic_const_array { return sliced_aux_(first, last); } + BOOST_MULTI_HD constexpr auto sliced(index first, index last) & -> subarray { return sliced_aux_(first, last); } + BOOST_MULTI_HD constexpr auto sliced(index first, index last) && -> subarray { return sliced_aux_(first, last); } + + constexpr auto blocked(index first, index last) const& -> basic_const_array { return sliced(first, last).reindexed(first); } + constexpr auto blocked(index first, index last) & -> subarray { return sliced(first, last).reindexed(first); } + + using iextension = typename subarray::index_extension; + + constexpr auto stenciled(iextension iex) & -> subarray { return blocked(iex.first(), iex.last()); } + constexpr auto stenciled(iextension iex, iextension iex1) & -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) & -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) & -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated(); } + template + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) & -> subarray{return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated(); } + + constexpr auto stenciled(iextension iex) && -> subarray { return blocked(iex.first(), iex.last()); } + constexpr auto stenciled(iextension iex, iextension iex1) && -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) && -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) && -> subarray { return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated(); } + template + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) && -> subarray{ return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated(); } + + constexpr auto stenciled(iextension iex) const& -> basic_const_array { return blocked(iex.first(), iex.last()); } + constexpr auto stenciled(iextension iex, iextension iex1) const& -> basic_const_array { return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) const& -> basic_const_array { return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated(); } + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) const& -> basic_const_array { return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated(); } + + template + constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) const& -> basic_const_array { + return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated(); + } + + constexpr auto elements_at(size_type idx) const& -> decltype(auto) { + assert(idx < this->num_elements()); + auto const sub_num_elements = this->begin()->num_elements(); + return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); + } + constexpr auto elements_at(size_type idx) && -> decltype(auto) { + assert(idx < this->num_elements()); + auto const sub_num_elements = this->begin()->num_elements(); + return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); + } + constexpr auto elements_at(size_type idx) & -> decltype(auto) { + assert(idx < this->num_elements()); + auto const sub_num_elements = this->begin()->num_elements(); + return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); + } + + private: + constexpr auto strided_aux_(difference_type diff) const { + typename types::layout_t const new_layout{this->layout().sub(), this->layout().stride()*diff, this->layout().offset(), this->layout().nelems()}; + return subarray(new_layout, types::base_); + } + + public: + constexpr auto strided(difference_type diff) const& -> basic_const_array { return strided_aux_(diff); } + constexpr auto strided(difference_type diff) && -> subarray { return strided_aux_(diff); } + constexpr auto strided(difference_type diff) & -> subarray { return strided_aux_(diff); } + + constexpr auto sliced( + typename types::index first, typename types::index last, typename types::index stride_ + ) const -> subarray { + return sliced(first, last).strided(stride_); + } + + using index_range = typename subarray::index_range; + + constexpr auto range(index_range irng) const& -> decltype(auto) {return sliced(irng.front(), irng.front() + irng.size());} + constexpr auto range(index_range irng) && -> decltype(auto) {return std::move(*this).sliced(irng.front(), irng.front() + irng.size());} + constexpr auto range(index_range irng) & -> decltype(auto) {return sliced(irng.front(), irng.front() + irng.size());} + + [[deprecated("is_flattable will be a property of the layout soon")]] + constexpr auto is_flattable() const -> bool{ + return + (this->size() <= 1) + || (this->stride() == this->layout().sub().nelems()) + ; + } + + friend constexpr auto flatted(subarray const& self) {return self.flatted();} + constexpr auto flatted() const& { + // assert(is_flattable() && "flatted doesn't work for all layouts!"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + multi::layout_t new_layout{this->layout().sub()}; + new_layout.nelems() *= this->size(); // TODO(correaa) : use immutable layout + return subarray{new_layout, types::base_}; + } + + void flattened() const = delete; + // { + // multi::biiteratorbegin())>> biit{this->begin(), 0, size(*(this->begin()))}; + // return basic_array(this->layout().sub, biit); + // } + + constexpr auto broadcasted() const& { + multi::layout_t const new_layout{layout(), 0, 0, std::numeric_limits::max()}; + return subarray{new_layout, types::base_}; + } + + // TODO(correaa) : define a diagonal_aux + constexpr auto diagonal() && {return this->diagonal();} + + constexpr auto diagonal() & -> subarray { + using boost::multi::detail::get; + auto square_size = std::min(get<0>(this->sizes()), get<1>(this->sizes())); + multi::layout_t new_layout{(*this)({0, square_size}, {0, square_size}).layout().sub()}; + new_layout.nelems() += (*this)({0, square_size}, {0, square_size}).layout().nelems(); // TODO(correaa) : don't use mutation + new_layout.stride() += (*this)({0, square_size}, {0, square_size}).layout().stride(); // TODO(correaa) : don't use mutation + return {new_layout, types::base_}; + } + + template 1) && sizeof(Dummy*), int> =0> + constexpr auto diagonal() const& -> subarray { + auto square_size = std::min(std::get<0>(this->sizes()), std::get<1>(this->sizes())); + multi::layout_t new_layout{(*this)({0, square_size}, {0, square_size}).layout().sub()}; + new_layout.nelems() += (*this)({0, square_size}, {0, square_size}).layout().nelems(); + new_layout.stride() += (*this)({0, square_size}, {0, square_size}).layout().stride(); // cppcheck-suppress arithOperationsOnVoidPointer ; false positive D == 1 doesn't happen here + return {new_layout, types::base_}; + } + + friend constexpr auto diagonal(subarray const& self) {return self .diagonal();} + friend constexpr auto diagonal(subarray& self) {return self .diagonal();} + friend constexpr auto diagonal(subarray&& self) {return std::move(self).diagonal();} + + using partitioned_type = subarray; + using partitioned_const_type = subarray; + + private: + BOOST_MULTI_HD constexpr auto partitioned_aux_(size_type n) const -> partitioned_type { + assert(n != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // vvv TODO(correaa) should be size() here? + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function + assert( (this->layout().nelems() % n) == 0); // if you get an assertion here it means that you are partitioning an array with an incommunsurate partition + multi::layout_t new_layout{this->layout(), this->layout().nelems()/n, 0, this->layout().nelems()}; + new_layout.sub().nelems() /= n; + return {new_layout, types::base_}; + } + + public: + BOOST_MULTI_HD constexpr auto partitioned(size_type n) const& -> partitioned_const_type {return partitioned_aux_(n);} + BOOST_MULTI_HD constexpr auto partitioned(size_type n) & -> partitioned_type {return partitioned_aux_(n);} + BOOST_MULTI_HD constexpr auto partitioned(size_type n) && -> partitioned_type {return partitioned_aux_(n);} + + friend BOOST_MULTI_HD constexpr auto partitioned(subarray const& self, size_type n) -> partitioned_const_type {return self .partitioned(n);} + friend BOOST_MULTI_HD constexpr auto partitioned(subarray & self, size_type n) -> partitioned_type {return self .partitioned(n);} + friend BOOST_MULTI_HD constexpr auto partitioned(subarray && self, size_type n) -> partitioned_type {return std::move(self).partitioned(n);} + + private: + BOOST_MULTI_HD constexpr auto chunked_aux_(size_type count) const -> partitioned_type { + assert( this->size() % count == 0 ); + return partitioned_aux_(this->size()/count); + } + + public: // in Mathematica this is called Partition https://reference.wolfram.com/language/ref/Partition.html in RangesV3 it is called chunk + BOOST_MULTI_HD constexpr auto chunked(size_type count) const& -> partitioned_const_type {return chunked_aux_(count);} + BOOST_MULTI_HD constexpr auto chunked(size_type count) & -> partitioned_type {return chunked_aux_(count);} + BOOST_MULTI_HD constexpr auto chunked(size_type count) && -> partitioned_type {return chunked_aux_(count);} + + private: + constexpr auto reversed_aux_() const -> subarray { + auto new_layout = this->layout(); + new_layout.reverse(); + return {new_layout, types::base_}; + } + + public: + constexpr auto reversed() const& -> basic_const_array { return reversed_aux_(); } + constexpr auto reversed() & -> subarray { return reversed_aux_(); } + constexpr auto reversed() && -> subarray { return reversed_aux_(); } + friend constexpr auto reversed(subarray const& self) -> basic_const_array { return self .reversed(); } + friend constexpr auto reversed(subarray & self) -> subarray { return self .reversed(); } + friend constexpr auto reversed(subarray && self) -> subarray { return std::move(self).reversed(); } + + private: + BOOST_MULTI_HD constexpr auto transposed_aux_() const { + auto new_layout = this->layout(); + new_layout.transpose(); + return subarray(new_layout, types::base_); + } + + public: + BOOST_MULTI_HD constexpr auto transposed() const& -> basic_const_array { return transposed_aux_(); } + BOOST_MULTI_HD constexpr auto transposed() & -> subarray { return transposed_aux_(); } + BOOST_MULTI_HD constexpr auto transposed() && -> subarray { return transposed_aux_(); } + + friend BOOST_MULTI_HD /*constexpr*/ auto transposed(subarray const& self) -> basic_const_array { return self .transposed(); } + friend BOOST_MULTI_HD /*constexpr*/ auto transposed(subarray & self) -> subarray { return self .transposed(); } + friend BOOST_MULTI_HD /*constexpr*/ auto transposed(subarray && self) -> subarray { return std::move(self).transposed(); } + + BOOST_MULTI_FRIEND_CONSTEXPR BOOST_MULTI_HD + auto operator~ (subarray const& self) -> basic_const_array {return self.transposed();} + BOOST_MULTI_FRIEND_CONSTEXPR BOOST_MULTI_HD + auto operator~ (subarray& self) -> subarray {return self.transposed();} + BOOST_MULTI_FRIEND_CONSTEXPR BOOST_MULTI_HD + auto operator~ (subarray&& self) -> subarray {return std::move(self).transposed();} + + private: + BOOST_MULTI_HD constexpr auto rotated_aux_() const { + typename types::layout_t new_layout = this->layout(); + new_layout.rotate(); + return subarray(new_layout, types::base_); + } + + public: + BOOST_MULTI_HD constexpr auto rotated() && -> subarray { return rotated_aux_(); } + BOOST_MULTI_HD constexpr auto rotated() & -> subarray { return rotated_aux_(); } + BOOST_MULTI_HD constexpr auto rotated() const& -> basic_const_array { return rotated_aux_(); } + + BOOST_MULTI_FRIEND_CONSTEXPR auto rotated(subarray const& self) { return self .rotated(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto rotated(subarray & self) { return self .rotated(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto rotated(subarray && self) { return std::move(self).rotated(); } + + private: + BOOST_MULTI_HD constexpr auto unrotated_aux_() const { + typename types::layout_t new_layout = this->layout(); + new_layout.unrotate(); + return subarray(new_layout, types::base_); + } + + public: + BOOST_MULTI_HD constexpr auto unrotated() & -> subarray { return unrotated_aux_(); } + BOOST_MULTI_HD constexpr auto unrotated() && -> subarray { return unrotated_aux_(); } + BOOST_MULTI_HD constexpr auto unrotated() const& -> basic_const_array /*const*/ { return unrotated_aux_(); } // NOLINT(readability-const-return-type) + + BOOST_MULTI_FRIEND_CONSTEXPR auto unrotated(subarray const& self) { return self .unrotated(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto unrotated(subarray & self) { return self .unrotated(); } + BOOST_MULTI_FRIEND_CONSTEXPR auto unrotated(subarray && self) { return std::move(self).unrotated(); } + + constexpr auto operator|(typename subarray::size_type n) & -> decltype(auto) { return partitioned(n); } + constexpr auto operator|(typename subarray::size_type n) && -> decltype(auto) { return std::move(*this).partitioned(n); } + constexpr auto operator|(typename subarray::size_type n) const& -> decltype(auto) { return partitioned(n); } + + private: + template friend struct subarray; + + // BOOST_MULTI_HD constexpr auto paren_aux() & -> subarray {return *this;} + // BOOST_MULTI_HD constexpr auto paren_aux() && -> subarray {return this->operator()();} + BOOST_MULTI_HD constexpr auto paren_aux_() const {return subarray{this->layout(), this->base_};} + + public: + BOOST_MULTI_HD constexpr auto operator()() & -> subarray {return paren_aux_();} + BOOST_MULTI_HD constexpr auto operator()() && -> subarray {return paren_aux_();} + BOOST_MULTI_HD constexpr auto operator()() const& -> basic_const_array /*const*/ {return paren_aux_();} // NOLINT(readability-redundant-access-specifiers,readability-const-return-type) + + private: + template + constexpr auto paren_aux_(index_range irng, As... args) & { + // TODO(correaa) investigate how to make it BOOST_MULTI_HD + // return range(a).rotated().paren_aux(as...).unrotated(); // TODO(correaa) compact + // auto&& tmp = range(irng); + // auto&& tmp2 = + // std::move(tmp). + // rotated(); + // auto&& tmp3 = std::move(tmp2).paren_aux(args...); + // auto&& ret = std::move(tmp3).unrotated(); + // return std::move(tmp3).unrotated(); // std::move(ret); + return range(irng).rotated().paren_aux_(args...).unrotated(); + } + template + constexpr auto paren_aux_(index_range irng, As... args) && { + // TODO(correaa) investigate how to make it BOOST_MULTI_HD + // auto&& tmp = std::move(*this).range(irng); + // auto&& tmp2 = std::move(tmp).rotated().paren_aux(args...); + // return std::move(tmp2).unrotated(); + return std::move(*this).range(irng).rotated().paren_aux_(args...).unrotated(); + } + template constexpr auto paren_aux_(index_range rng, As... args) const& {return range(rng).rotated().paren_aux_(args...).unrotated();} + + template constexpr auto paren_aux_(intersecting_range inr, As... args) & -> decltype(auto) {return paren_aux_(intersection(this->extension(), inr), args...);} + template constexpr auto paren_aux_(intersecting_range inr, As... args) && -> decltype(auto) {return paren_aux_(intersection(this->extension(), inr), args...);} + template constexpr auto paren_aux_(intersecting_range inr, As... args) const& -> decltype(auto) {return paren_aux_(intersection(this->extension(), inr), args...);} + + template BOOST_MULTI_HD constexpr auto paren_aux_(index idx, As... args) & -> decltype(auto) {return operator[](idx).paren_aux_(args...);} + template BOOST_MULTI_HD constexpr auto paren_aux_(index idx, As... args) && -> decltype(auto) {return operator[](idx).paren_aux_(args...);} + template BOOST_MULTI_HD constexpr auto paren_aux_(index idx, As... args) const& -> decltype(auto) {return operator[](idx).paren_aux_(args...);} + + public: + // vvv DO NOT remove default parameter `= irange` : the default template parameters below help interpret the expression `{first, last}` syntax as index ranges + template constexpr auto operator()(A1 arg1) const& -> decltype(auto) {return paren_aux_(arg1);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2) const& -> decltype(auto) {return paren_aux_(arg1, arg2);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) const& -> decltype(auto) {return paren_aux_(arg1, arg2, arg3);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) const& -> decltype(auto) {return paren_aux_(arg1, arg2, arg3, arg4, args...);} // NOLINT(whitespace/line_length) pattern line + + template constexpr auto operator()(A1 arg1) & -> decltype(auto) {return paren_aux_(arg1);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2) & -> decltype(auto) {return paren_aux_(arg1, arg2);} // NOLINT(whitespace/line_length) pattern line + template /*[[gnu::pure]]*/ constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) & -> decltype(auto) {return paren_aux_(arg1, arg2, arg3);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) & -> decltype(auto) {return paren_aux_(arg1, arg2, arg3, arg4, args...);} // NOLINT(whitespace/line_length) pattern line + + template constexpr auto operator()(A1 arg1) && -> decltype(auto) {return std::move(*this).paren_aux_(arg1);} // NOLINT(whitespace/line_length) pattern line + template BOOST_MULTI_HD constexpr auto operator()(A1 arg1, A2 arg2) && -> decltype(auto) {return std::move(*this).paren_aux_(arg1, arg2);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) && -> decltype(auto) {return std::move(*this).paren_aux_(arg1, arg2, arg3);} // NOLINT(whitespace/line_length) pattern line + template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) && -> decltype(auto) {return std::move(*this).paren_aux_(arg1, arg2, arg3, arg4, args...);} // NOLINT(whitespace/line_length) pattern line + + private: + template constexpr auto apply_impl_(Tuple const& tuple, std::index_sequence/*012*/) const& -> decltype(auto) {return this->operator()(std::get(tuple)...);} + template constexpr auto apply_impl_(Tuple const& tuple, std::index_sequence/*012*/) & -> decltype(auto) {return this->operator()(std::get(tuple)...);} + template constexpr auto apply_impl_(Tuple const& tuple, std::index_sequence/*012*/) && -> decltype(auto) {return std::move(*this).operator()(std::get(tuple)...);} + + public: + template constexpr auto apply(Tuple const& tuple) const& -> decltype(auto) {return apply_impl_(tuple, std::make_index_sequence::value>());} + template constexpr auto apply(Tuple const& tuple) && -> decltype(auto) {return apply_impl_(tuple, std::make_index_sequence::value>());} + template constexpr auto apply(Tuple const& tuple) & -> decltype(auto) {return apply_impl_(tuple, std::make_index_sequence::value>());} + + using iterator = array_iterator; + using const_iterator = array_iterator; + using move_iterator = array_iterator; + + // using reverse_iterator [[deprecated]] = std::reverse_iterator< iterator>; + // using const_reverse_iterator [[deprecated]] = std::reverse_iterator; + + private: + BOOST_MULTI_HD constexpr explicit subarray(iterator begin, iterator end) + : subarray( + layout_type{begin->layout(), begin.stride(), 0, begin.stride() * (end - begin)}, + begin.base() + ) { + assert(begin.stride() == end.stride()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + assert(begin->layout() == end->layout()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + } + + friend BOOST_MULTI_HD constexpr auto ref(iterator begin, iterator end) -> multi::subarray; + + public: + using ptr = subarray_ptr; + using const_ptr = subarray_ptr; + + using pointer = ptr; + using const_pointer = const_ptr; + + private: + constexpr auto addressof_aux_() const {return ptr(this->base_, this->layout());} + + public: + constexpr auto addressof() && -> ptr { return addressof_aux_(); } + constexpr auto addressof() & -> ptr { return addressof_aux_(); } + constexpr auto addressof() const& -> const_ptr { return addressof_aux_(); } + + // NOLINTNEXTLINE(runtime/operator) //NOSONAR + constexpr auto operator&() && {return addressof();} // NOLINT(google-runtime-operator) //NOSONAR + // NOLINTNEXTLINE(runtime/operator) //NOSONAR + [[deprecated("controversial")]] constexpr auto operator&() & {return addressof();} // NOLINT(google-runtime-operator) //NOSONAR + // NOLINTNEXTLINE(runtime/operator) //NOSONAR + [[deprecated("controversial")]] constexpr auto operator&() const& {return addressof();} // NOLINT(google-runtime-operator) //NOSONAR + + private: + BOOST_MULTI_HD constexpr auto begin_aux_() const {return iterator(types::base_ , this->sub(), this->stride());} + constexpr auto end_aux_ () const {return iterator(types::base_ + this->nelems(), this->sub(), this->stride());} + + public: + BOOST_MULTI_HD constexpr auto begin() & {return begin_aux_();} + constexpr auto end () & {return end_aux_() ;} + friend BOOST_MULTI_HD /*constexpr*/ auto begin(subarray& self) {return self.begin();} + friend constexpr auto end (subarray& self) {return self.end ();} + + constexpr auto begin() && {return begin();} + constexpr auto end () && {return end() ;} + friend /*constexpr*/ auto begin(subarray&& self) {return std::move(self).begin();} + friend /*constexpr*/ auto end (subarray&& self) {return std::move(self).end() ;} + + constexpr auto begin() const& -> const_iterator { return begin_aux_(); } + constexpr auto end () const& -> const_iterator { return end_aux_() ; } + friend /*constexpr*/ auto begin(subarray const& self) -> const_iterator { return self.begin(); } // NOLINT(whitespace/indent) constexpr doesn't work with nvcc friend + friend /*constexpr*/ auto end (subarray const& self) -> const_iterator { return self.end() ; } // NOLINT(whitespace/indent) constexpr doesn't work with nvcc friend + + BOOST_MULTI_HD constexpr auto cbegin() const& {return begin();} + /*fd*/ constexpr auto cend() const& {return end() ;} + friend constexpr auto cbegin(subarray const& self) {return self.cbegin();} + friend constexpr auto cend (subarray const& self) {return self.cend() ;} + + constexpr auto mbegin() & { return move_iterator{begin()}; } + constexpr auto mend() & { return move_iterator{end() }; } + friend constexpr auto mbegin(subarray & self) { return self.mbegin(); } + friend constexpr auto mend (subarray & self) { return self.mend() ; } + + constexpr auto mbegin() && {return mbegin();} + constexpr auto mend() && {return mend() ;} + friend constexpr auto mbegin(subarray && self) {return std::move(self).mbegin();} + friend constexpr auto mend (subarray && self) {return std::move(self).mend() ;} + + constexpr auto mbegin() const& -> const_iterator {return begin();} + constexpr auto mend() const& -> const_iterator {return end() ;} + friend constexpr auto mbegin(subarray const& self) {return self.mbegin();} + friend constexpr auto mend (subarray const& self) {return self.mend() ;} + + using cursor = cursor_t; + + private: + constexpr auto home_aux_() const {return cursor(this->base_, this->strides());} + + public: + using const_cursor = cursor_t; + + constexpr auto home() const& -> const_cursor { return home_aux_(); } + constexpr auto home() && -> cursor { return home_aux_(); } + constexpr auto home() & -> cursor { return home_aux_(); } + + template constexpr auto assign(It first) & -> It { adl_copy_n(first, this->size(), begin()); std::advance(first, this->size()); return first; } + template constexpr auto assign(It first)&& -> It { return assign(first);} + + template< + class Range, + class = std::enable_if_t>, + class = std::enable_if_t::value>, + class = decltype(adl_copy_n(adl_begin(std::declval()), std::declval(), std::declval())) + > + constexpr auto operator=(Range const& rng) & // check that you LHS is not read-only + -> subarray& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + assert(this->size() == rng.size()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // MULTI_MARK_SCOPE(std::string{"multi::operator= D="}+std::to_string(D)+" from range to "+typeid(T).name() ); + // adl_copy_n(adl_begin(r), this->size(), begin()); + adl_copy(adl_begin(rng), adl_end(rng), begin()); + return *this; + } + template>> + constexpr auto operator=(Range const& rng) && -> subarray& {operator=(rng); return *this;} + + template + constexpr auto operator=(subarray const& other) && -> subarray& {operator=(other); return *this;} + + template + constexpr + auto operator=(subarray const& other) & -> subarray& { + assert(this->extension() == other.extension()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // MULTI_MARK_SCOPE( std::string{"multi::operator= (D="}+std::to_string(D)+") from "+typeid(TT).name()+" to "+typeid(T).name() ); + this->elements() = other.elements(); +// if(this->is_empty()) {return *this;} +// if(this->num_elements() == this->nelems() and o.num_elements() == this->nelems() and this->layout() == o.layout()) { +// this->elements() = o.elements(); +//// adl_copy_n(o.base(), o.num_elements(), this->base()); +// } else if(o.stride() < (~o).stride()) { +// (~(*this)).elements() = o.elements(); +//// adl_copy_n( (~o).begin(), (~o).size(), (~(*this)).begin() ); +// } else { +// assign(o.begin()); +// } + return *this; + } + + constexpr + auto operator=(subarray const& other) & -> subarray& { + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + assert(this->extension() == other.extension()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // MULTI_MARK_SCOPE("multi::operator= [D="+std::to_string(D)+"] from "+typeid(T).name()+" to "+typeid(T).name() ); + elements() = other.elements(); +// if(this->num_elements() == this->nelems() and o.num_elements() == this->nelems() and this->layout() == o.layout()) { +// adl_copy_n(o.base(), o.num_elements(), this->base()); +// } else if(o.stride() < (~o).stride()) { +// adl_copy_n( (~o).begin(), (~o).size(), (~(*this)).begin() ); +// } else { +// assign(o.begin()); +// } + return *this; + } + + constexpr auto operator=(subarray const& other) && + -> subarray& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + operator=(other); + return *this; // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + } + + template< + class Range, + std::enable_if_t>::value, int> = 0, + // std::enable_if_t, int> =0, + class = decltype(Range(std::declval(), std::declval()))> + constexpr explicit operator Range() const { return Range(begin(), end()); } // NOLINT(fuchsia-default-arguments-calls) for example std::vector(it, ti, alloc = {}) + + template constexpr void swap(Array&& other) && noexcept { + assert( std::move(*this).extension() == std::forward(other).extension() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + elements().swap(other.elements()); + // adl_swap_ranges(this->begin(), this->end(), adl_begin(std::forward(o))); + } + template constexpr void swap(A&& other) & noexcept {return swap(std::forward(other));} + + friend constexpr void swap(subarray&& self, subarray&& other) noexcept {std::move(self).swap(std::move(other));} + + template constexpr void swap(subarray const& self, Array&& other) noexcept {self.swap(std::forward(other));} // TODO(correaa) remove + template constexpr void swap(Array&& other, subarray const& self) noexcept {self.swap(std::forward(other));} + + template + friend constexpr auto operator==(subarray const& self, subarray const& other) -> bool { + return (self.extension() == other.extension()) && (self.elements() == other.elements()); + } + template + friend constexpr auto operator!=(subarray const& self, subarray const& other) -> bool { + return (self.extension() != other.extension()) || (self.elements() != other.elements()); + } + + constexpr auto operator==(subarray const& other) const -> bool { + return (this->extension() == other.extension()) && (this->elements() == other.elements()); + } + constexpr auto operator!=(subarray const& other) const -> bool { + return (this->extension() != other.extension()) || (this->elements() != other.elements()); + } + + friend constexpr auto lexicographical_compare(subarray const& self, subarray const& other) -> bool { + if(self.extension().first() > other.extension().first()) {return true ;} + if(self.extension().first() < other.extension().first()) {return false;} + return adl_lexicographical_compare( + self.begin(), self.end(), + other.begin(), other.end() + ); + } + + constexpr auto operator< (subarray const& other) const& -> bool {return lexicographical_compare(*this, other);} + constexpr auto operator<=(subarray const& other) const& -> bool {return *this == other || lexicographical_compare(*this, other);} + constexpr auto operator> (subarray const& other) const& -> bool {return other < *this;} + + template::template rebind, std::enable_if_t< std::is_const_v::element_type>,int> =0> + constexpr auto static_array_cast() const & { // name taken from std::static_pointer_cast + return subarray(this->layout(), static_cast(this->base_)); // TODO(correaa) might violate constness + } + + template::template rebind, std::enable_if_t::element_type>,int> =0> + [[deprecated("violates constness")]] + constexpr auto static_array_cast() const & { // name taken from std::static_pointer_cast + return subarray(this->layout(), static_cast(this->base_)); // TODO(correaa) might violate constness + } + + template::template rebind> + constexpr auto static_array_cast() && { // name taken from std::static_pointer_cast + return subarray(this->layout(), static_cast(this->base())); + } + + template::template rebind> + constexpr auto static_array_cast() & { // name taken from std::static_pointer_cast + return subarray(this->layout(), static_cast(this->base())); + } + + private: + template::template rebind, class... Args> + constexpr auto static_array_cast_(Args&&... args) const & { // name taken from std::static_pointer_cast + return subarray(this->layout(), P2{this->base_, std::forward(args)...}); + } + + public: + template + constexpr auto element_transformed(UF&& fun) const& { + return static_array_cast_< + // std::remove_cv_t>>, + std::decay_t>, + transform_ptr< + // std::remove_cv_t>>, + std::decay_t>, + UF, element_const_ptr, std::invoke_result_t + > + >(std::forward(fun)); + } + template + constexpr auto element_transformed(UF&& fun) & { + return static_array_cast_< + std::decay_t>, + transform_ptr< + std::decay_t>, + UF, element_ptr , std::invoke_result_t + > + >(std::forward(fun)); + } + template + constexpr auto element_transformed(UF&& fun) && {return element_transformed(std::forward(fun));} + + template< + class T2, class P2 = typename std::pointer_traits::template rebind, + class Element = typename subarray::element, + class PM = T2 Element::* + > + constexpr auto member_cast(PM member) const& -> subarray { + static_assert(sizeof(T)%sizeof(T2) == 0, + "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. " + "Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements." + ); + + return subarray{this->layout().scale(sizeof(T), sizeof(T2)), static_cast(&(this->base_->*member))}; + } + + template< + class T2, class P2 = typename std::pointer_traits::template rebind, + class Element = typename subarray::element, + class PM = T2 Element::* + > + constexpr auto member_cast(PM member) & -> subarray { + static_assert(sizeof(T)%sizeof(T2) == 0, + "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. " + "Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements" + ); + + return subarray{this->layout().scale(sizeof(T), sizeof(T2)), static_cast(&(this->base_->*member))}; + } + + template< + class T2, class P2 = typename std::pointer_traits::template rebind, + class Element = typename subarray::element, + class PM = T2 Element::* + > + constexpr auto member_cast(PM member) && -> subarray { + return this->member_cast(member); + } + + template::template rebind> + using rebind = subarray, D, P2>; + + template< + class T2 = std::remove_const_t, + class P2 = typename std::pointer_traits::template rebind, + std::enable_if_t< + std::is_same_v< // check that pointer family is not changed + typename std::pointer_traits::template rebind, + typename std::pointer_traits::template rebind + > + && + std::is_same_v< // check that only constness is changed + std::remove_const_t::element_type>, + std::remove_const_t + > + , int> =0 + > + constexpr auto const_array_cast() const { + if constexpr(std::is_pointer_v) { + return rebind(this->layout(), const_cast (this->base_)); // NOLINT(cppcoreguidelines-pro-type-const-cast) + } else { + return rebind(this->layout(), reinterpret_cast(this->base_)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) //NOSONAR + } + } + + constexpr auto as_const() const { + return rebind{this->layout(), this->base()}; + } + + constexpr auto element_moved() & {return rebind{this->layout(), element_move_ptr{this->base()}};} + constexpr auto element_moved() && {return element_moved();} + + private: + template + constexpr auto reinterpret_array_cast_aux_() const -> rebind { + // static_assert( sizeof(T)%sizeof(T2) == 0, + // "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases" ); + + return { + this->layout().scale(sizeof(T), sizeof(T2)), // NOLINT(bugprone-sizeof-expression) : sizes are compatible according to static assert above + reinterpret_pointer_cast(this->base_) // if ADL gets confused here (e.g. multi:: and thrust::) then adl_reinterpret_pointer_cast will be necessary + }; + } + + public: + template::template rebind> + constexpr auto reinterpret_array_cast() const& {return reinterpret_array_cast_aux_().as_const();} + + template::template rebind> + constexpr auto reinterpret_array_cast() & {return reinterpret_array_cast_aux_();} + + template::template rebind> + constexpr auto reinterpret_array_cast() && {return reinterpret_array_cast_aux_();} + + template::template rebind > + constexpr auto reinterpret_array_cast(multi::size_type count) & -> subarray, D + 1, P2> { + // static_assert( sizeof(T)%sizeof(T2) == 0, + // "error: reinterpret_array_cast is limited to integral stride values"); + + assert( count > 0 ); + assert( sizeof(T) == sizeof(T2)*static_cast(count) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + return subarray, D + 1, P2>( + layout_t(this->layout().scale(sizeof(T), sizeof(T2)), 1, 0, count).rotate(), // NOLINT(bugprone-sizeof-expression) T and T2 are size compatible (see static_assert above) + reinterpret_pointer_cast(this->base()) // if ADL gets confused here (e.g. multi:: and thrust::) then adl_reinterpret_pointer_cast will be necessary + ); + } + + template::template rebind > + constexpr auto reinterpret_array_cast(multi::size_type count) && -> subarray, D + 1, P2> {return reinterpret_array_cast(count);} + + template::template rebind > + constexpr auto reinterpret_array_cast(size_type count) const& -> subarray, D + 1, P2> { + static_assert( sizeof(T)%sizeof(T2) == 0, + "error: reinterpret_array_cast is limited to integral stride values"); + + assert( sizeof(T) == sizeof(T2)*static_cast(count) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : checck implicit size compatibility + return subarray, D + 1, P2>( + layout_t(this->layout().scale(sizeof(T), sizeof(T2)), 1, 0, count).rotate(), + static_cast(static_cast(this->base_)) // NOLINT(bugprone-casting-through-void) direct reinterepret_cast doesn't work here + ); + } + + template + auto serialize(Archive& arxiv, unsigned int version) { + using AT = multi::archive_traits; + if(version == 0) { + std::for_each(this->begin(), this->end(), [&](reference&& item) {arxiv & AT ::make_nvp("item", std::move(item));}); + } else { + std::for_each(this->elements().begin(), this->elements().end(), [&](element& elem) {arxiv & AT ::make_nvp("elem", elem);}); + } + // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & cereal::make_nvp("item", item);}); + // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & item ;}); + } +}; + +template struct array_iterator{}; + +template +struct array_iterator // NOLINT(fuchsia-multiple-inheritance) +: boost::multi::iterator_facade< + array_iterator, + Element, std::random_access_iterator_tag, + typename std::iterator_traits::reference, multi::difference_type +> +, multi::affine , multi::difference_type> +, multi::decrementable > +, multi::incrementable > +, multi::totally_ordered2, void> +{ + using affine = multi::affine, multi::difference_type>; + using difference_type = typename affine::difference_type; + + array_iterator() = default; // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) + using layout_type = multi::layout_t<0>; + + template< + class Other, + decltype(multi::detail::implicit_cast(typename Other::pointer{}))* = nullptr, + decltype(std::declval().base())* = nullptr + > + // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible + BOOST_MULTI_HD constexpr/*mplct*/ array_iterator(Other const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to reproduce the implicitness of the argument + : data_{other.base()}, stride_{other.stride()} {} + + template< + class Other, + decltype(multi::detail::explicit_cast(typename Other::pointer{}))* = nullptr, + decltype(std::declval().data_)* = nullptr + > + constexpr explicit array_iterator(Other const& other) + : data_{other.data_}, stride_{other.stride_} {} + + template friend struct array_iterator; + + constexpr explicit array_iterator(std::nullptr_t nil) : data_{nil} {} + constexpr explicit array_iterator(Ptr const& ptr) : data_{ptr} {} + + template< + class EElement, typename PPtr, + typename = decltype(multi::detail::implicit_cast(std::declval>().data_)) + > + BOOST_MULTI_HD constexpr /*impl*/ array_iterator(array_iterator const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to reproduce the implicitness of original pointer + : data_{other.data_}, stride_{other.stride_} {} + + constexpr explicit operator bool() const {return static_cast(this->data_);} + + BOOST_MULTI_HD constexpr auto operator[](typename array_iterator::difference_type n) const -> typename std::iterator_traits::reference { + return *((*this) + n); + } + + constexpr auto operator->() const -> Ptr {return data_;} + + using element = Element; + using element_ptr = Ptr; + using pointer = element_ptr; + using stride_type = multi::index; + + static constexpr dimensionality_type rank_v = 1; + using rank = std::integral_constant; + + constexpr auto operator<(array_iterator const& other) const -> bool {return distance_to_(other) > 0;} + + BOOST_MULTI_HD explicit constexpr array_iterator(Ptr ptr, typename subarray::index stride) + : data_{ptr}, stride_{stride} {} + + private: + friend struct subarray; + + element_ptr data_; // {nullptr}; // TODO(correaa) : consider uninitialized pointer + stride_type stride_ = {1}; + + constexpr auto distance_to_(array_iterator const& other) const -> difference_type { + assert(stride_==other.stride_ && (other.data_-data_)%stride_ == 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + return (other.data_ - data_)/stride_; // with struct-overflow=3 error: assuming signed overflow does not occur when simplifying `X - Y > 0` to `X > Y` [-Werror=strict-overflow] + } + + public: + BOOST_MULTI_HD constexpr auto operator+(difference_type n) const -> array_iterator {array_iterator ret{*this}; ret+=n; return ret;} + BOOST_MULTI_HD constexpr auto operator-(difference_type n) const -> array_iterator {array_iterator ret{*this}; ret-=n; return ret;} + + [[deprecated("use base() for iterator")]] + constexpr auto data() const -> element_ptr {return data_;} + + constexpr auto base() const& -> element_ptr {return data_;} + + BOOST_MULTI_FRIEND_CONSTEXPR + auto base(array_iterator const& self) -> element_ptr {return self.base();} + + BOOST_MULTI_HD constexpr auto stride() const -> stride_type {return stride_;} + friend constexpr auto stride(array_iterator const& self) -> stride_type {return self.stride_;} + + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + + constexpr auto operator++() -> array_iterator& {data_ += stride_; return *this;} + constexpr auto operator--() -> array_iterator& {data_ -= stride_; return *this;} + + constexpr auto operator+=(difference_type n) -> array_iterator& {data_ += stride_*n; return *this;} + constexpr auto operator-=(difference_type n) -> array_iterator& {data_ -= stride_*n; return *this;} + + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + + constexpr auto operator-(array_iterator const& other) const -> difference_type {return -distance_to_(other);} + + friend constexpr auto operator==(array_iterator const& self, array_iterator const& other) -> bool {return self.data_ == other.data_;} + + BOOST_MULTI_HD constexpr auto operator*() const -> typename std::iterator_traits::reference { return *data_; } // NOLINT(readability-const-return-type) +}; + +template +using iterator = array_iterator; + +template +struct subarray +: array_types { + using types = array_types; + using types::types; + + using element = typename types::element; + using element_ref = typename std::iterator_traits::reference; + using element_cref = typename std::iterator_traits::reference; + using iterator = array_iterator; + + constexpr auto operator= (element const& elem) & -> subarray& { + // MULTI_MARK_SCOPE(std::string{"multi::operator= D=0 from "}+typeid(T).name()+" to "+typeid(T).name() ); + adl_copy_n(&elem, 1, this->base_); + return *this; + } + constexpr auto operator= (element const& elem) && -> subarray& { + operator=(elem); + return *this; // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + } + + constexpr auto operator==(element const& elem) const -> bool { + assert(this->num_elements() == 1); + return adl_equal(&elem, std::next(&elem, this->num_elements()), this->base()); + } + constexpr auto operator!=(element const& elem) const {return ! operator==(elem);} + + template + constexpr + auto operator=(Range0 const& rng) & -> subarray& { + adl_copy_n(&rng, 1, this->base_); + return *this; + } + + constexpr auto elements_at(size_type idx [[maybe_unused]]) const& -> element_cref {assert(idx < this->num_elements()); return *(this->base_);} + constexpr auto elements_at(size_type idx [[maybe_unused]]) && -> element_ref {assert(idx < this->num_elements()); return *(this->base_);} + constexpr auto elements_at(size_type idx [[maybe_unused]]) & -> element_ref {assert(idx < this->num_elements()); return *(this->base_);} + + constexpr auto operator!=(subarray const& other) const {return ! adl_equal(other.base_, other.base_ + 1, this->base_);} + constexpr auto operator==(subarray const& other) const {return adl_equal(other.base_, other.base_ + 1, this->base_);} + + constexpr auto operator<(subarray const& other) const {return adl_lexicographical_compare(this->base_, this->base_ + this->num_elements(), other.base_);} + + using decay_type = typename types::element; + + constexpr auto operator()() const -> element_ref {return *(this->base_);} + + constexpr operator element_ref () && {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax + constexpr operator element_ref () & {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax + constexpr operator element_cref() const& {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax + + constexpr auto broadcasted() const& { + multi::layout_t<1> const new_layout{this->layout(), 0, 0, std::numeric_limits::max()}; + return subarray{new_layout, types::base_}; + } + + template + auto serialize(Archive& arxiv, unsigned int const /*version*/) { + using AT = multi::archive_traits; + auto& element_ = *(this->base_); + arxiv & AT::make_nvp("element", element_); + // arxiv & cereal::make_nvp("element", element_); + // arxiv & element_ ; + } +}; + +template +struct subarray // NOLINT(fuchsia-multiple-inheritance) : to define operators via CRTP +: multi::random_iterable > +, array_types { + ~subarray() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + + // boost serialization needs `delete`. void boost::serialization::extended_type_info_typeid::destroy(const void*) const [with T = boost::multi::subarray >]’ + // void operator delete(void* ptr) noexcept = delete; + // void operator delete(void* ptr, void* place ) noexcept = delete; // NOLINT(bugprone-easily-swappable-parameters) + + static constexpr dimensionality_type rank_v = 1; + using rank = std::integral_constant; + + using types = array_types; + using types::types; + using layout_type = Layout; + using ref_ = subarray; + + using element_type = T; + + using element_ptr = typename types::element_ptr; + using element_const_ptr = typename std::pointer_traits::template rebind; + using element_move_ptr = multi::move_ptr; + using element_ref = typename types::element_ref; + using element_cref = typename std::iterator_traits::reference; + + using const_pointer = element_const_ptr; + using pointer = element_ptr; + using const_reference = typename array_types::const_reference; + using reference = typename array_types:: reference; + + using default_allocator_type = typename multi::pointer_traits::default_allocator_type; + + constexpr auto get_allocator() const -> default_allocator_type {return default_allocator_of(subarray::base());} + BOOST_MULTI_FRIEND_CONSTEXPR + auto get_allocator(subarray const& self) -> default_allocator_type {return self.get_allocator();} + + using decay_type = array::default_allocator_type>; + + constexpr auto decay() const -> decay_type {return decay_type{*this};} + BOOST_MULTI_FRIEND_CONSTEXPR auto decay(subarray const& self) -> decay_type {return self.decay();} + + using basic_const_array = subarray< + T, 1, + typename std::pointer_traits::template rebind, + Layout + >; + + protected: + template constexpr void intersection_assign(A&& other)&& {intersection_assign(std::forward(other));} + template constexpr void intersection_assign(A&& other)& { // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved,cppcoreguidelines-missing-std-forward) false positive clang-tidy 17 + std::for_each( + intersection(types::extension(), extension(other)).begin(), + intersection(types::extension(), extension(other)).end() , + [&](auto const idx) {operator[](idx) = std::forward(other)[idx];} + ); + } + + subarray(subarray const&) = default; + + template friend struct subarray; + template friend struct static_array; // TODO(correaa) check if this is necessary + + template + friend constexpr auto static_array_cast(subarray const&) -> decltype(auto); + + template + friend constexpr auto reinterpret_array_cast(subarray&& self) { + return std::move(self).template reinterpret_array_cast::template rebind>(); + } + template + friend constexpr auto reinterpret_array_cast(subarray const& self) { + return self.template reinterpret_array_cast::template rebind>(); + } + + public: + friend constexpr auto sizes(subarray const& self) noexcept -> typename subarray::sizes_type {return self.sizes();} // needed by nvcc + friend constexpr auto size (subarray const& self) noexcept -> typename subarray::size_type {return self.size ();} // needed by nvcc + + constexpr auto operator+() const -> decay_type {return decay();} + + subarray(subarray&&) noexcept = default; // in C++ 14 this is necessary to return array references from functions +// in c++17 things changed and non-moveable non-copyable types can be returned from functions and captured by auto + + protected: + template friend struct subarray_ptr; + template friend struct array_iterator; + + public: + friend constexpr auto dimensionality(subarray const& /*self*/) -> dimensionality_type {return 1;} + + auto operator=(std::initializer_list values) && -> subarray& {operator=(values); return *this;} + auto operator=(std::initializer_list values) & -> subarray& { + assert( static_cast(values.size()) == this->size() ); + adl_copy_n(values.begin(), values.size(), begin()); + return *this; + } + + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() && { return subarray_ptr{this->base_, this->layout()}; } // NOLINT(google-runtime-operator) : taking address of a reference-like object should be allowed //NOSONAR + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() & { return subarray_ptr{this->base_, this->layout()}; } // NOLINT(google-runtime-operator) : taking address of a reference-like object should be allowed //NOSONAR + // NOLINTNEXTLINE(runtime/operator) + BOOST_MULTI_HD constexpr auto operator&() const& {return subarray_ptr{this->base_, this->layout()};} // NOLINT(google-runtime-operator) extend semantics //NOSONAR + + BOOST_MULTI_HD constexpr void assign(std::initializer_list values) const {assert( values.size() == static_cast(this->size()) ); + assign(values.begin(), values.end()); + } + template + constexpr auto assign(It first) & -> It {adl_copy_n(first, this->size(), this->begin()); std::advance(first, this->size()); return first;} + template + constexpr auto assign(It first)&& -> It {return assign(first);} + template + constexpr void assign(It first, It last) & { + assert( std::distance(first, last) == this->size() ); (void)last; // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + assign(first); + } + template + constexpr void assign(It first, It last)&& {assign(first, last);} + + constexpr auto operator=(subarray&& other) & noexcept(std::is_nothrow_copy_assignable_v) // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor,cppcoreguidelines-noexcept-move-operations) //NOSONAR + -> subarray& { // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + operator=(other); + return *this; // lints([cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + } + + constexpr auto operator=(subarray const& other) const& -> subarray const& = delete; + constexpr auto operator=(subarray const& other) & -> subarray& { + static_assert(std::is_copy_assignable_v, "assignment requires element-wise assignment"); // TODO(correaa) : make sfinae friendly + if(this == std::addressof(other)) {return *this;} + assert(this->extension() == other.extension()); + elements() = other.elements(); + return *this; + } + + constexpr auto operator=(subarray const& other) && -> subarray& { + if(this == std::addressof(other)) {return *this;} // lints cert-oop54-cpp + operator=(other); return *this; + } + + [[deprecated("for compatibility with ranges")]] constexpr auto operator=(subarray const& other) const&& -> subarray const&&; // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) //NOSONAR this is needed to satify the std::indirectly_writable concept + // { // something like this will fail + // if(this == std::addressof(other)) {return static_cast(*this);} // lints cert-oop54-cpp + // const_cast(*this).operator=(other); + // return static_cast(*this); + // } + + private: + BOOST_MULTI_HD constexpr auto at_aux_(index idx) const -> typename subarray::reference { // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment + // MULTI_ACCESS_ASSERT(this->extension().contains(i)&&"out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + auto ba = this->base_; // NOLINT(llvm-qualified-auto,readability-qualified-auto) + auto of = (idx*this->stride() - this->offset()); // NOLINT(llvm-qualified-auto,readability-qualified-auto) + auto pt = ba + of; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic,llvm-qualified-auto,readability-qualified-auto) + return *pt; // in C++17 this is allowed even with syntethic references + // return *(this->base() + (idx*this->stride() - this->offset())); // TODO(correaa) use this->base()[(i*this->stride() - this->offset())] + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + } + + public: + constexpr auto broadcasted() const& { + multi::layout_t<2> const new_layout{this->layout(), 0, 0, std::numeric_limits::max()}; + return subarray{new_layout, types::base_}; + } + + BOOST_MULTI_HD constexpr auto operator[](index idx) const& -> typename subarray::const_reference { return at_aux_(idx); } // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment + BOOST_MULTI_HD constexpr auto operator[](index idx) & -> typename subarray:: reference { return at_aux_(idx); } // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment + BOOST_MULTI_HD constexpr auto operator[](index idx) && -> typename subarray:: reference { return at_aux_(idx); } // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment + + constexpr auto front() const& -> const_reference {return *begin();} + constexpr auto back() const& -> const_reference {return *std::prev(end(), 1);} + + constexpr auto front() && -> reference {return *begin();} + constexpr auto back() && -> reference {return *std::prev(end(), 1);} + + constexpr auto front() & -> reference {return *begin();} + constexpr auto back() & -> reference {return *std::prev(end(), 1);} + + template, int> = 0 + > + constexpr operator subarray&& () const & { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) this is needed by std::ranges, TODO(correaa) think if this can be solved by inheritance from subarray + return std::move(reinterpret_cast const&>(*this)); // NOLINT([ppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-reinterpret-cast) think if this can be solved by inheritance from subarray + } + + private: + template + static constexpr auto apply_impl_(Self&& self, Tuple const& tuple, std::index_sequence /*012*/) -> decltype(auto) { + return std::forward(self)(std::get(tuple)...); + } + + public: + template BOOST_MULTI_HD constexpr auto apply(Tuple const& tuple) const& -> decltype(auto) {return apply_impl_( *this , tuple, std::make_index_sequence>());} + template BOOST_MULTI_HD constexpr auto apply(Tuple const& tuple) && -> decltype(auto) {return apply_impl_(std::move(*this), tuple, std::make_index_sequence>());} + template constexpr auto apply(Tuple const& tuple) & -> decltype(auto) {return apply_impl_( *this , tuple, std::make_index_sequence>());} + + template::value == 0), int> = 0> BOOST_MULTI_HD constexpr auto operator[](Tuple const& /*empty*/) const& -> decltype(auto) {return *this;} + template::value == 1), int> = 0> BOOST_MULTI_HD constexpr auto operator[](Tuple const& indices ) const& -> decltype(auto) {return operator[](std::get<0>(indices));} + template::value > 1), int> = 0> BOOST_MULTI_HD constexpr auto operator[](Tuple const& indices ) const& + ->decltype(operator[](std::get<0>(indices))[detail::tuple_tail(indices)]) { + return operator[](std::get<0>(indices))[detail::tuple_tail(indices)]; } + + // Warning C4459 comes from boost::multi_array having a namespace indices which collides with the variable name? + #ifdef _MSC_VER + #pragma warning( push ) + #pragma warning( disable : 4459 ) + #endif + + [[deprecated("BMA compat, finish impl")]] BOOST_MULTI_HD constexpr auto operator[](std::tuple const& indices) const& { return (*this)({std::get<0>(indices).front(), std::get<0>(indices).back() + 1}); } + + #ifdef _MSC_VER + #pragma warning( pop ) + #endif + + BOOST_MULTI_HD constexpr auto elements_at(size_type idx) const& -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} + BOOST_MULTI_HD constexpr auto elements_at(size_type idx) && -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} + BOOST_MULTI_HD constexpr auto elements_at(size_type idx) & -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} + + constexpr auto reindexed(index first) && {return reindexed(first);} + constexpr auto reindexed(index first) & { + typename types::layout_t new_layout = this->layout(); + new_layout.reindex(first); + return subarray{new_layout, types::base_}; + } + + private: + BOOST_MULTI_HD constexpr auto taked_aux_(difference_type count) const { + assert( count <= this->size() ); // calculating size is expensive that is why + typename types::layout_t const new_layout{ + this->layout().sub(), + this->layout().stride(), + this->layout().offset(), + this->stride()*count + }; + return subarray{new_layout, this->base_}; + } + + public: + constexpr auto taked(difference_type count) const& -> basic_const_array {return taked_aux_(count);} + constexpr auto taked(difference_type count) && -> subarray {return taked_aux_(count);} + constexpr auto taked(difference_type count) & -> subarray {return taked_aux_(count);} + + private: + BOOST_MULTI_HD constexpr auto dropped_aux_(difference_type count) const -> subarray { + assert( count <= this->size() ); + typename types::layout_t const new_layout{ + this->layout().sub(), + this->layout().stride(), + this->layout().offset(), + this->stride()*(this->size() - count) + }; + return subarray{new_layout, this->base_ + (count*this->layout().stride() - this->layout().offset())}; + } + + public: + constexpr auto dropped(difference_type count) const& -> basic_const_array { return dropped_aux_(count); } + constexpr auto dropped(difference_type count) && -> subarray { return dropped_aux_(count); } + constexpr auto dropped(difference_type count) & -> subarray { return dropped_aux_(count); } + + private: + BOOST_MULTI_HD constexpr auto sliced_aux_(index first, index last) const { + typename types::layout_t new_layout = this->layout(); + if(this->is_empty()) { + assert(first == last); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + new_layout.nelems() = 0; // TODO(correaa) : don't use mutation + } else { + (new_layout.nelems() /= this->size())*=(last - first); + } + + return subarray{new_layout, this->base_ + (first*this->layout().stride() - this->layout().offset())}; + } + + public: + BOOST_MULTI_HD constexpr auto sliced(index first, index last) const& -> basic_const_array /*const*/ { return basic_const_array{sliced_aux_(first, last)};} // NOLINT(readability-const-return-type) + BOOST_MULTI_HD constexpr auto sliced(index first, index last) & -> subarray { return sliced_aux_(first, last) ;} + BOOST_MULTI_HD constexpr auto sliced(index first, index last) && -> subarray { return sliced_aux_(first, last) ;} + + using elements_iterator = elements_iterator_t; + using celements_iterator = elements_iterator_t; + + using elements_range = elements_range_t; + using const_elements_range = elements_range_t; + + private: + constexpr auto elements_aux_() const {return elements_range{this->base_, this->layout()};} + + public: + constexpr auto elements() & -> elements_range {return elements_aux_();} + constexpr auto elements() && -> elements_range {return elements_aux_();} + constexpr auto elements() const& -> const_elements_range {return const_elements_range{this->base(), this->layout()};} // TODO(correaa) simplify + + constexpr auto celements() const -> const_elements_range {return elements_aux_();} + + constexpr auto hull() const -> std::pair { + return {std::min(this->base(), this->base() + this->hull_size()), std::abs(this->hull_size())}; + } + + /*[[gnu::pure]]*/ constexpr auto blocked(index first, index last)& -> subarray { + return sliced(first, last).reindexed(first); + } + /*[[gnu::pure]]*/ constexpr auto stenciled(typename subarray::index_extension ext) -> subarray { + return blocked(ext.first(), ext.last()); + } + + private: + constexpr auto strided_aux_(difference_type diff) const -> subarray { + auto const new_layout = typename types::layout_t{this->layout().sub(), this->layout().stride()*diff, this->layout().offset(), this->layout().nelems()}; + return {new_layout, types::base_}; + } + + public: + constexpr auto strided(difference_type diff) const& -> basic_const_array { return strided_aux_(diff);} + constexpr auto strided(difference_type diff) && -> subarray { return strided_aux_(diff);} + constexpr auto strided(difference_type diff) & -> subarray { return strided_aux_(diff);} + + BOOST_MULTI_HD constexpr auto sliced(index first, index last, difference_type stride) const& -> basic_const_array { return sliced(first, last).strided(stride); } + BOOST_MULTI_HD constexpr auto sliced(index first, index last, difference_type stride) && -> subarray { return sliced(first, last).strided(stride); } + BOOST_MULTI_HD constexpr auto sliced(index first, index last, difference_type stride) & -> subarray { return sliced(first, last).strided(stride); } + + BOOST_MULTI_HD constexpr auto range(index_range const& rng) & {return sliced(rng.front(), rng.last());} + BOOST_MULTI_HD constexpr auto range(index_range const& rng) && {return std::move(*this).sliced(rng.front(), rng.last());} + BOOST_MULTI_HD constexpr auto range(index_range const& rng) const& {return sliced(rng.front(), rng.last());} + + BOOST_MULTI_HD constexpr auto operator()() const& -> basic_const_array {return {this->layout(), this->base()};} + BOOST_MULTI_HD constexpr auto operator()() && -> subarray {return *this;} + BOOST_MULTI_HD constexpr auto operator()() & -> subarray {return *this;} + + BOOST_MULTI_HD constexpr auto operator()(index_range const& rng) & {return range(rng);} + BOOST_MULTI_HD constexpr auto operator()(index_range const& rng) && {return std::move(*this).range(rng);} + BOOST_MULTI_HD constexpr auto operator()(index_range const& rng) const& {return range(rng);} + + BOOST_MULTI_HD constexpr auto operator()(index idx) & -> decltype(auto) {return operator[](idx);} + BOOST_MULTI_HD constexpr auto operator()(index idx) && -> decltype(auto) {return std::move(*this).operator[](idx);} + BOOST_MULTI_HD constexpr auto operator()(index idx) const& -> decltype(auto) {return operator[](idx);} + + private: + BOOST_MULTI_HD constexpr auto paren_aux_() & {return operator()();} + BOOST_MULTI_HD constexpr auto paren_aux_() && {return operator()();} + BOOST_MULTI_HD constexpr auto paren_aux_() const& {return operator()();} + + BOOST_MULTI_HD constexpr auto paren_aux_(index_range const& rng) & {return range(rng);} + BOOST_MULTI_HD constexpr auto paren_aux_(index_range const& rng) && {return range(rng);} + BOOST_MULTI_HD constexpr auto paren_aux_(index_range const& rng) const& {return range(rng);} + + BOOST_MULTI_HD constexpr auto paren_aux_(index idx) & -> decltype(auto) {return operator[](idx);} + BOOST_MULTI_HD constexpr auto paren_aux_(index idx) && -> decltype(auto) {return operator[](idx);} + BOOST_MULTI_HD constexpr auto paren_aux_(index idx) const& -> decltype(auto) {return operator[](idx);} + + constexpr auto paren_aux_(intersecting_range const& rng) & -> decltype(auto) {return paren_aux_(intersection(this->extension(), rng));} + constexpr auto paren_aux_(intersecting_range const& rng) && -> decltype(auto) {return std::move(*this).paren_aux_(intersection(this->extension(), rng));} + constexpr auto paren_aux_(intersecting_range const& rng) const& -> decltype(auto) {return paren_aux_(intersection(this->extension(), rng));} + + public: + constexpr auto operator()(intersecting_range const& isrange) & -> decltype(auto) {return paren_aux_(isrange);} + constexpr auto operator()(intersecting_range const& isrange) && -> decltype(auto) {return std::move(*this).paren_aux_(isrange);} + constexpr auto operator()(intersecting_range const& isrange) const& -> decltype(auto) {return paren_aux_(isrange);} + + template + constexpr auto operator()(Args&&... args) & + ->decltype(paren_(*this, std::forward(args)...)) { + return paren_(*this, std::forward(args)...); } + + template + constexpr auto operator()(Args&&... args) && + ->decltype(paren_(std::move(*this), std::forward(args)...)) { + return paren_(std::move(*this), std::forward(args)...); } + + template + constexpr auto operator()(Args&&... args) const& + ->decltype(paren_(*this, std::forward(args)...)) { + return paren_(*this, std::forward(args)...); } + + using partitioned_type = subarray; + using partitioned_const_type = subarray; + + private: + BOOST_MULTI_HD constexpr auto partitioned_aux_(size_type size) const -> partitioned_type { + assert( size != 0 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + assert( (this->layout().nelems() % size) == 0 ); // TODO(correaa) remove assert? truncate left over? (like mathematica) // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + multi::layout_t<2> new_layout{this->layout(), this->layout().nelems()/size, 0, this->layout().nelems()}; + new_layout.sub().nelems() /= size; // TODO(correaa) : don't use mutation + return {new_layout, types::base_}; + } + + public: + BOOST_MULTI_HD constexpr auto partitioned(size_type size) const& -> partitioned_const_type {return partitioned_aux_(size);} + BOOST_MULTI_HD constexpr auto partitioned(size_type size) & -> partitioned_type {return partitioned_aux_(size);} + BOOST_MULTI_HD constexpr auto partitioned(size_type size) && -> partitioned_type {return partitioned_aux_(size);} + + private: + BOOST_MULTI_HD constexpr auto chunked_aux_(size_type size) const -> partitioned_type { + assert( this->size() % size == 0 ); + return partitioned_aux_(this->size()/size); + } + + public: // in Mathematica this is called Partition https://reference.wolfram.com/language/ref/Partition.html in RangesV3 it is called chunk + BOOST_MULTI_HD constexpr auto chunked(size_type size) const& -> partitioned_const_type {return chunked_aux_(size);} + BOOST_MULTI_HD constexpr auto chunked(size_type size) & -> partitioned_type {return chunked_aux_(size);} + BOOST_MULTI_HD constexpr auto chunked(size_type size) && -> partitioned_type {return chunked_aux_(size);} + + private: + constexpr auto reversed_aux_() const -> subarray { + auto new_layout = this->layout(); + new_layout.reverse(); + return {new_layout, types::base_}; + } + + public: + constexpr auto reversed() const& -> basic_const_array {return reversed_aux_();} + constexpr auto reversed() & -> subarray {return reversed_aux_();} + constexpr auto reversed() && -> subarray {return reversed_aux_();} + + friend constexpr auto reversed(subarray const& self) -> basic_const_array {return self .reversed();} + friend constexpr auto reversed(subarray & self) -> subarray {return self .reversed();} + friend constexpr auto reversed(subarray && self) -> subarray {return std::move(self).reversed();} + + friend constexpr auto rotated(subarray const& self) -> decltype(auto) {return self. rotated();} + friend constexpr auto unrotated(subarray const& self) -> decltype(auto) {return self.unrotated();} + + constexpr auto rotated() & -> decltype(auto) {return operator()();} + constexpr auto rotated() && -> decltype(auto) {return operator()();} + constexpr auto rotated() const& -> decltype(auto) {return operator()();} + + BOOST_MULTI_HD constexpr auto unrotated() const& -> decltype(auto) {return operator()();} + BOOST_MULTI_HD constexpr auto unrotated() && -> decltype(auto) {return operator()();} + BOOST_MULTI_HD constexpr auto unrotated() & -> decltype(auto) {return operator()();} + + using iterator = typename multi::array_iterator; + using const_iterator = typename multi::array_iterator; + using move_iterator = array_iterator; + + using reverse_iterator [[deprecated]] = std::reverse_iterator< iterator>; + using const_reverse_iterator [[deprecated]] = std::reverse_iterator; + + struct [[deprecated("BMA compatibility")]] index_gen {auto operator[](irange const& rng) const {return std::make_tuple(rng);}}; + using extent_gen [[deprecated("BMA compatibility")]] = std::array; + using extent_range [[deprecated("BMA compatibility")]] = irange; + + template< + class Range, + std::enable_if_t>::value, int> =0, + class = decltype(Range{std::declval(), std::declval()}) + > + constexpr explicit operator Range() const { + // vvv Range{...} needed by Windows GCC? + return Range{begin(), end()}; // NOLINT(fuchsia-default-arguments-calls) e.g. std::vector(it, it, alloc = {}) + } + + private: + BOOST_MULTI_HD constexpr explicit subarray(iterator begin, iterator end) + : subarray { + layout_type{ {}/*begin->layout()*/, begin.stride(), 0, begin.stride()*(end - begin)}, + begin.base() + } { + assert(begin.stride() == end.stride() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // assert(begin->layout() == end->layout()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + } + friend constexpr auto ref(iterator begin, iterator end) -> multi::subarray; + + constexpr BOOST_MULTI_HD auto begin_aux_() const {return iterator{this->base_ , this->stride()};} + constexpr auto end_aux_ () const {return iterator{this->base_ + types::nelems(), this->stride()};} + + public: + BOOST_MULTI_HD constexpr auto begin() const& -> const_iterator {return begin_aux_();} + constexpr auto begin() & -> iterator {return begin_aux_();} + constexpr auto begin() && -> iterator {return begin_aux_();} + + constexpr auto mbegin() & {return move_iterator{begin()};} + constexpr auto mend () & {return move_iterator{end ()};} + + constexpr auto mbegin() && {return move_iterator{begin()};} + constexpr auto mend () && {return move_iterator{end ()};} + + constexpr auto end () const& -> const_iterator {return end_aux_();} + constexpr auto end () & -> iterator {return end_aux_();} + constexpr auto end () && -> iterator {return end_aux_();} + + [[deprecated("implement as negative stride")]] constexpr auto rbegin() const& {return const_reverse_iterator(end ());} // TODO(correaa) implement as negative stride? + [[deprecated("implement as negative stride")]] constexpr auto rend () const& {return const_reverse_iterator(begin());} // TODO(correaa) implement as negative stride? + + BOOST_MULTI_FRIEND_CONSTEXPR auto begin(subarray const& self) -> const_iterator {return self .begin();} + BOOST_MULTI_FRIEND_CONSTEXPR auto begin(subarray & self) -> iterator {return self .begin();} + BOOST_MULTI_FRIEND_CONSTEXPR auto begin(subarray && self) -> iterator {return std::move(self).begin();} + + BOOST_MULTI_FRIEND_CONSTEXPR auto end (subarray const& self) -> const_iterator {return self .end() ;} + BOOST_MULTI_FRIEND_CONSTEXPR auto end (subarray & self) -> iterator {return self .end() ;} + BOOST_MULTI_FRIEND_CONSTEXPR auto end (subarray && self) -> iterator {return std::move(self).end() ;} + + BOOST_MULTI_HD constexpr auto cbegin() const& -> const_iterator {return begin();} + constexpr auto cend () const& -> const_iterator {return end() ;} + + friend BOOST_MULTI_HD /*constexpr*/ auto cbegin(subarray const& self) {return self.cbegin();} + BOOST_MULTI_FRIEND_CONSTEXPR auto cend (subarray const& self) {return self.cend() ;} + + template constexpr auto operator=(subarray const& other) && -> subarray& {operator=( other ); return *this;} + template constexpr auto operator=(subarray const& other) & -> subarray& { + assert(other.extensions() == this->extensions()); + elements() = other.elements(); + return *this; + } + + template constexpr auto operator=(subarray && other) && -> subarray& {operator=(std::move(other)); return *this;} + template constexpr auto operator=(subarray && other) & -> subarray& { + assert(this->extensions() == other.extensions()); + elements() = std::move(other).elements(); + return *this; + } + + template< + class Range, + class = std::enable_if_t >, + class = std::enable_if_t::value> + > + constexpr auto operator=(Range const& rng) & // TODO(correaa) check that you LHS is not read-only? + -> subarray& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) + assert(this->size() == static_cast(adl_size(rng))); // TODO(correaa) or use std::cmp_equal? + adl_copy_n(adl_begin(rng), adl_size(rng), begin()); + return *this; + } + template< + class Range, + class = std::enable_if_t>, + class = std::enable_if_t::value> + > + constexpr auto operator=(Range const& rng) && -> subarray& {operator=(rng); return *this;} + + template constexpr auto assign(It first) && + ->decltype(adl_copy_n(first, std::declval(), std::declval()), void()) { + return adl_copy_n(first, this-> size() , std::move(*this).begin()), void(); } + + friend constexpr auto operator==(subarray const& self, subarray const& other) -> bool { + return + self.extension() == other.extension() + && self.elements() == other.elements() + ; + } + + friend constexpr auto operator!=(subarray const& self, subarray const& other) -> bool { + return + self.extension() != other.extension() + || self.elements() != other.elements() + ; + } + + template + friend constexpr auto operator==(subarray const& self, subarray const& other) -> bool { + return + self.extension() == other.extension() + && self.elements() == other.elements() + ; + } + + template + friend constexpr auto operator!=(subarray const& self, subarray const& other) -> bool { + return + self.extension() != other.extension() + || self.elements() != other.elements() + ; + } + + friend constexpr auto operator<(subarray const& self, subarray const& other) -> bool { return lexicographical_compare_(self, other); } + friend constexpr auto operator>(subarray const& self, subarray const& other) -> bool { return lexicographical_compare_(other, self); } // NOLINT(readability-suspicious-call-argument) + + friend constexpr auto operator<=(subarray const& self, subarray const& other) -> bool { return lexicographical_compare_(self, other) || self == other; } + friend constexpr auto operator>=(subarray const& self, subarray const& other) -> bool { return lexicographical_compare_(other, self) || self == other; } // NOLINT(readability-suspicious-call-argument) + + constexpr void swap(subarray&& other) && noexcept { + assert(this->extension() == other.extension()); + adl_swap_ranges(this->elements().begin(), this->elements().end(), std::move(other).elements().begin()); + } + friend constexpr void swap(subarray&& self, subarray&& other) noexcept { std::move(self).swap(std::move(other)); } + + template>>> friend constexpr void swap(subarray&& self, A&& other) noexcept { std::move(self).swap(std::forward(other)); } + template>>> friend constexpr void swap(A&& other, subarray&& self) noexcept { std::move(self).swap(std::forward(other)); } + + private: + template + static constexpr auto lexicographical_compare_(A1 const& self, A2 const& other) -> bool { // NOLINT(readability-suspicious-call-argument) + if(self.extension().first() > other.extension().first()) { + return true; + } + if(self.extension().first() < other.extension().first()) { + return false; + } + return adl_lexicographical_compare(adl_begin(self), adl_end(self), adl_begin(other), adl_end(other)); + } + + public: + template::template rebind> + constexpr auto static_array_cast() const -> subarray { // name taken from std::static_pointer_cast + return {this->layout(), static_cast(this->base_)}; + } + template::template rebind, class... Args> + constexpr auto static_array_cast(Args&&... args) const -> subarray { // name taken from std::static_pointer_cast + return {this->layout(), P2{this->base_, std::forward(args)...}}; + } + + template + constexpr auto element_transformed(UF&& fun) const& { + return static_array_cast< + // std::remove_cv_t>>, + std::decay_t>, + transform_ptr< + // std::remove_cv_t>>, + std::decay_t>, + UF, element_const_ptr, std::invoke_result_t + > + >(std::forward(fun)); + } + template + constexpr auto element_transformed(UF&& fun) & { + return static_array_cast< + std::decay_t>, + transform_ptr< + std::decay_t>, + UF, element_ptr , std::invoke_result_t + > + >(std::forward(fun)); + } + template + constexpr auto element_transformed(UF&& fun) && {return element_transformed(std::forward(fun));} + + template< + class T2, class P2 = typename std::pointer_traits::template rebind, + class Element = typename subarray::element, + class PM = T2 std::decay_t::* + > + constexpr auto member_cast(PM member) const { + static_assert(sizeof(T)%sizeof(T2) == 0, + "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. " + "Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements" + ); + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) reinterpret is what the function does. alternative for GCC/NVCC + auto&& r1 = (*(reinterpret_cast(subarray::base_))).*member; // ->*pm; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) TODO(correaa) find a better way + auto* p1 = &r1; P2 p2 = reinterpret_cast(p1); //NOSONAR +#else + auto p2 = static_cast(&(this->base_->*member)); // this crashes nvcc 11.2-11.4 and some? gcc compiler +#endif + return subarray(this->layout().scale(sizeof(T), sizeof(T2)), p2); + } + + constexpr auto element_moved() & {return subarray{this->layout(), element_move_ptr{this->base()}};} + constexpr auto element_moved() && {return element_moved();} + + template::template rebind> + constexpr auto reinterpret_array_cast() const& { + assert( this->layout().stride()*static_cast(sizeof(T)) % static_cast(sizeof(T2)) == 0 ); + + return subarray, 1, P2>{ + layout_type{this->layout().sub(), this->layout().stride()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().offset()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().nelems()*static_cast(sizeof(T))/static_cast(sizeof(T2))}, + reinterpret_pointer_cast(this->base_) + }; + } + + template::template rebind> + constexpr auto reinterpret_array_cast() & { + assert( this->layout().stride()*static_cast(sizeof(T)) % static_cast(sizeof(T2)) == 0 ); + + return subarray, 1, P2>{ + layout_type{this->layout().sub(), this->layout().stride()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().offset()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().nelems()*static_cast(sizeof(T))/static_cast(sizeof(T2))}, + reinterpret_pointer_cast(this->base()) + }; + } + + template::template rebind> + constexpr auto reinterpret_array_cast() && { + assert( this->layout().stride()*static_cast(sizeof(T)) % static_cast(sizeof(T2)) == 0 ); + + return subarray, 1, P2>{ + layout_type{this->layout().sub(), this->layout().stride()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().offset()*static_cast(sizeof(T))/static_cast(sizeof(T2)), this->layout().nelems()*static_cast(sizeof(T))/static_cast(sizeof(T2))}, + reinterpret_pointer_cast(this->base()) + }; + } + + template::template rebind > + constexpr auto reinterpret_array_cast(size_type n) const& -> subarray, 2, P2> { // TODO(correaa) : use rebind for return type + static_assert( sizeof(T)%sizeof(T2)== 0, + "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases"); + + return subarray, 2, P2>{ + layout_t<2>{this->layout().scale(sizeof(T), sizeof(T2)), 1, 0, n}, + reinterpret_pointer_cast(this->base()) + }.rotated(); + } + + // TODO(correaa) : rename to reinterpret_pointer_cast? + template::template rebind > + constexpr auto reinterpret_array_cast(size_type n)& -> subarray, 2, P2> { + // static_assert( sizeof(T)%sizeof(T2)== 0, + // "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases"); + + return subarray, 2, P2>( + layout_t<2>(this->layout().scale(sizeof(T), sizeof(T2)), 1, 0, n), + reinterpret_pointer_cast(this->base()) + ).rotated(); + } + template::template rebind > + constexpr auto reinterpret_array_cast(size_type n)&& -> subarray, 2, P2> { + return this->reinterpret_array_cast(n); + } + + template + constexpr auto fill(TT const& value) & -> decltype(auto) { + return adl_fill_n(this->begin(), this->size(), value), *this; + } + constexpr auto fill()& -> decltype(auto) {return fill(typename subarray::element_type{});} + + template + constexpr auto fill(TT const& value) && -> decltype(auto) {return std::move(this->fill(value));} + constexpr auto fill() && -> decltype(auto) { + return std::move(*this).fill(typename subarray::element_type{}); + } + + template + void serialize(Archive& arxiv, unsigned /*version*/) { + using AT = multi::archive_traits; + std::for_each(this->begin(), this->end(), [&](reference& item) {arxiv & AT ::make_nvp("item", item);}); + // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & cereal::make_nvp("item", item);}); + // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & item ;}); + } +}; + +template +constexpr auto static_array_cast(Array&& self, Args&&... args) -> decltype(auto) { + return std::forward(self).template static_array_cast(std::forward(args)...); +} + +template +struct array_ref // TODO(correaa) : inheredit from multi::partially_ordered2, void>? +: subarray +{ + ~array_ref() = default; // lints(cppcoreguidelines-special-member-functions) + + using layout_type = typename array_ref::types::layout_t; + + using iterator = typename subarray::iterator; + + public: + constexpr // attempt for MSVC + array_ref() = delete; // because reference cannot be unbound + + array_ref(iterator, iterator) = delete; + + // return type removed for MSVC + friend constexpr auto sizes(array_ref const& self) noexcept /*-> typename array_ref::sizes_type*/ {return self.sizes();} // needed by nvcc + friend constexpr auto size (array_ref const& self) noexcept /*-> typename array_ref::size_type*/ {return self.size ();} // needed by nvcc + + protected: + [[deprecated("references are not copyable, use auto&&")]] + array_ref(array_ref const&) = default; // don't try to use `auto` for references, use `auto&&` or explicit value type + + public: + #if defined(__NVCC__) + array_ref(array_ref&&) noexcept = default; // this needs to be public in nvcc c++17 + #else + array_ref(array_ref&&) = delete; + #endif + + #if defined(BOOST_MULTI_HAS_SPAN) && !defined(__NVCC__) + template and D == 1, int> = 0> + constexpr explicit operator std::span() const& {return std::span(this->data_elements(), this->size());} + #endif + + template{}>, decltype(multi::detail::explicit_cast(std::declval()))* = nullptr> + constexpr explicit array_ref(array_ref&& other) + : subarray{other.layout(), ElementPtr{std::move(other).base()}} {} + + template{}>, decltype(multi::detail::implicit_cast(std::declval()))* = nullptr> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax + constexpr /*implicit*/ array_ref(array_ref&& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + : subarray{other.layout(), ElementPtr{std::move(other).base()}} {} + + constexpr array_ref(ElementPtr dat, ::boost::multi::extensions_t const& xs) /*noexcept*/ // TODO(correa) eliminate this ctor + : subarray{typename subarray::types::layout_t(xs), dat} {} + + // constexpr array_ref(typename array_ref::extensions_type extensions, typename array_ref::element_ptr dat) noexcept + // : subarray{typename array_ref::types::layout_t{extensions}, dat} {} + + constexpr array_ref(::boost::multi::extensions_t exts, ElementPtr dat) noexcept + : subarray{typename array_ref::types::layout_t(exts), dat} {} + + template< + class Array, + std::enable_if_t > =0, + std::enable_if_t>, int> =0, + std::enable_if_t())), ElementPtr>, int> =0 // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) support legacy c-arrays + > + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax and because a reference to c-array can be represented as an array_ref + constexpr array_ref( // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax and because a reference to c-array can be represented as an array_ref + Array& array // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + ) + : array_ref( + multi::data_elements(array), + extensions(array) + ) {} + + template =0> + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax and because a reference to c-array can be represented as an array_ref + constexpr array_ref( // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax and because a reference to c-array can be represented as an array_ref + T& elem // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + ) + : array_ref(&elem, {}) {} + + template + constexpr array_ref(TT (&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays,google-explicit-constructor,hicpp-explicit-conversions) : for backward compatibility // NOSONAR + : array_ref( + ::boost::multi::extensions(arr), + ::boost::multi::data_elements(arr) + ) + {} + + template< + class TT, std::size_t N//, + //std::enable_if_t())), ElementPtr>, int> =0 // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) support legacy c-arrays + > + // cppcheck-suppress noExplicitConstructor ; // NOLINTNEXTLINE(runtime/explicit) + constexpr array_ref(std::array& arr) : array_ref(::boost::multi::extensions(arr), ::boost::multi::data_elements(arr)) {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) array_ptr is more general than pointer c-array support legacy c-arrays // NOSONAR + +// this ctor makes memcheck complain about memmory used after scope + template, int> =0> + // cppcheck-suppress noExplicitConstructor + array_ref(std::initializer_list il) : array_ref(il.begin(), typename array_ref::extensions_type{static_cast(il.size())}) {} + + // template, int> =0> + // array_ref(std::initializer_list&& il) = delete; + + using subarray::operator=; + + private: + template constexpr auto copy_elements_(It first) { + return adl_copy_n(first, this->num_elements(), this->data_elements()); + } + + public: + BOOST_MULTI_HD constexpr auto data_elements() const& -> typename array_ref::element_const_ptr {return array_ref::base_;} + + template> ,int> =0> + constexpr auto operator=(array_ref const& other) && -> array_ref& { + assert(this->extensions() == other.extensions()); + array_ref::copy_elements_(other.data_elements()); + return *this; + } + + constexpr auto operator=(array_ref const& other) & -> array_ref& { + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + // TODO(correaa) assert on extensions, not on num elements + assert(this->num_elements() == other.num_elements()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + array_ref::copy_elements_(other.data_elements()); + return *this; + } + + constexpr auto operator=(array_ref const& other) && -> array_ref& { + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + operator=(other); + return *this; + } + + constexpr auto operator=(array_ref&& other) & noexcept(std::is_nothrow_copy_assignable_v) // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor,cppcoreguidelines-noexcept-move-operations) //NOSONAR(cppS5018) + -> array_ref& { + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + operator=(std::as_const(other)); + return *this; + } + constexpr auto operator=(array_ref&& other) && noexcept(std::is_nothrow_copy_assignable_v) // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor,cppcoreguidelines-noexcept-move-operations) + -> array_ref& { + if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) + operator=(std::as_const(other)); + return *this; + } + + template +// constexpr + auto operator=(array_ref const& other)& -> array_ref& { + assert( this->extensions() == other.extensions() ); + // MULTI_MARK_SCOPE(std::string{"multi::operator= D="}+std::to_string(D)+" from "+typeid(TT).name()+" to "+typeid(T).name() ); + adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); + return *this; + } + + template + constexpr auto operator=(array_ref const& other) && -> array_ref& { + this->operator=(other); + return *this; // lints (cppcoreguidelines-c-copy-assignment-signature) + } + + using elements_type = array_ref; + using celements_type = array_ref; + + private: + constexpr auto elements_aux_() const { + return elements_type{ + this->base_, + typename elements_type::extensions_type{multi::iextension{this->num_elements()}} + }; + } + + public: + constexpr auto elements() const& -> celements_type {return elements_aux_();} + constexpr auto elements() & -> elements_type {return elements_aux_();} + constexpr auto elements() && -> elements_type {return elements_aux_();} + + friend constexpr auto elements(array_ref & self) -> elements_type {return self . elements();} + friend constexpr auto elements(array_ref && self) -> elements_type {return std::move(self). elements();} + friend constexpr auto elements(array_ref const& self) -> celements_type {return self . elements();} + + constexpr auto celements() const& {return celements_type{array_ref::data_elements(), array_ref::num_elements()};} + friend constexpr auto celements(array_ref const& self) {return self.celements();} + + template + friend constexpr auto operator==(array_ref const& self, array_ref const& other) -> bool { + if(self.extensions() != other.extensions()) { return false; } + + #if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wunknown-warning-option" + #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span + #endif + + return adl_equal( + other.data_elements(), other.data_elements() + other.num_elements(), + self .data_elements() + ); + + #if defined(__clang__) + #pragma clang diagnostic pop + #endif + + } + template + friend constexpr auto operator!=(array_ref const& self, array_ref const& other) -> bool { + if(self.extensions() != other.extensions()) { return true; } + return !adl_equal( + other.data_elements(), other.data_elements() + other.num_elements(), + self .data_elements() + ); + // return ! operator==(self, other); // commented due to bug in nvcc 22.11 + } + + BOOST_MULTI_HD constexpr auto data_elements() & -> typename array_ref::element_ptr {return array_ref::base_;} + BOOST_MULTI_HD constexpr auto data_elements() && -> typename array_ref::element_ptr {return array_ref::base_;} + // BOOST_MULTI_HD constexpr auto data_elements() const& -> typename array_ref::element_const_ptr {return array_ref::base_;} + + friend constexpr auto data_elements(array_ref&& self) -> typename array_ref::element_ptr {return std::move(self).data_elements();} + + // data() is here for compatibility with std::vector + template = 0> constexpr auto data() const& {return data_elements();} + template = 0> constexpr auto data() && {return data_elements();} + template = 0> constexpr auto data() & {return data_elements();} + + // TODO(correaa) : find a way to use [[deprecated("use data_elements()")]] for friend functions + friend constexpr auto data(array_ref const& self) -> typename array_ref::element_ptr {return self .data_elements();} + friend constexpr auto data(array_ref & self) -> typename array_ref::element_ptr {return self .data_elements();} + friend constexpr auto data(array_ref && self) -> typename array_ref::element_ptr {return std::move(self).data_elements();} + + using decay_type = typename array_ref::decay_type; + + constexpr auto decay() const& -> decay_type const& {return static_cast(*this);} + friend constexpr auto decay(array_ref const& self) -> decay_type const& {return self.decay();} + + private: + template + void check_sizes_() const { + if(size_type{std::get

(this->sizes())} != size_type{std::extent::value}) { + throw std::bad_cast{}; + } + if constexpr(DD + 1 != D) { + check_sizes_(); + } + } + + template static auto launder_(TT* pointer) -> TT* { + #if(defined(__cpp_lib_launder) && ( __cpp_lib_launder >= 201606L)) + return std::launder(pointer); + #else + return pointer ; + #endif + } + + protected: + template + constexpr auto to_carray_()& -> TTN& { + check_sizes_(); + return *launder_(reinterpret_cast(array_ref::base_)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + + template + constexpr auto to_carray_() const& -> TTN const& { + check_sizes_(); + return *launder_(reinterpret_cast(array_ref::base_)); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + + public: + template, int> = 0> + constexpr explicit operator TTN const&() const& { return to_carray_(); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + template, int> = 0> + constexpr explicit operator TTN&() && { return to_carray_(); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + template, int> = 0> + constexpr explicit operator TTN&() & { return to_carray_(); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + private: + template + auto serialize_structured_(Ar& arxiv, unsigned int const version) { + subarray::serialize(arxiv, version); + } + template + auto serialize_flat_(Archive& arxiv, unsigned int const /*version*/) { + using AT = multi::archive_traits; + arxiv & AT::make_nvp("elements", AT::make_array(this->data_elements(), static_cast(this->num_elements()))); + } +// template> +// auto serialize_binary_if(std::true_type, Ar& ar) { +// ar & AT::make_nvp("binary_data", AT::make_binary_object(this->data_elements(), static_cast(this->num_elements())*sizeof(typename array_ref::element))); +// } +// template +// auto serialize_binary_if(std::false_type, Ar& ar) {return serialize_flat(ar);} + + public: + template + auto serialize(Archive& arxiv, unsigned int const version) { + serialize_flat(arxiv, version); +// serialize_structured(ar, version); +// switch(version) { +// case static_cast( 0): return serialize_flat(arxiv); +// case static_cast(-1): return serialize_structured(arxiv, version); +// // case 2: return serialize_binary_if(std::is_trivially_copy_assignable{}, arxiv); +// default: +// if( this->num_elements() <= version ){serialize_structured(arxiv, version);} +// else {serialize_flat (arxiv );} +// } + } +}; + +template +using array_cref = array_ref< + std::decay_t, D, + typename std::pointer_traits::template rebind +>; + +template +using array_mref = array_ref< + std::decay_t, D, + std::move_iterator +>; + +template +constexpr auto ref( + TT(&arr)[N] // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) interact with legacy // NOSONAR +) { + return array_ref, std::rank_v>(arr); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) interact with legacy +} + +template +struct array_ptr +: subarray_ptr +, typename array_ref::layout_t> { + using basic_ptr = subarray_ptr, typename array_ref::layout_t>; + + constexpr array_ptr(Ptr data, multi::extensions_t extensions) + : basic_ptr{data, multi::layout_t{extensions}} {} + + constexpr explicit array_ptr(std::nullptr_t nil) : array_ptr{nil, multi::extensions_t{}} {} + + template + // cppcheck-suppress constParameterPointer ; workaround cppcheck 2.11 + constexpr explicit array_ptr(CArray* data) : array_ptr{data_elements(*data), extensions(*data)} {} + + template< + class TT, std::size_t N, + std::enable_if_t())), Ptr>,int> =0 // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) support legacy c-arrays + > + // cppcheck-suppress noExplicitConstructor ; // NOLINTNEXTLINE(runtime/explicit) + constexpr array_ptr(TT(*array)[N]) : array_ptr{data_elements(*array), extensions(*array)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) array_ptr is more general than pointer c-array support legacy c-arrays // NOSONAR + + constexpr auto operator*() const { + return array_ref((*this)->extensions(), this->base()); + } +}; + +template +class array_ptr { // TODO(correaa) make it private mutable member + mutable multi::array_ref ref_; + + public: + constexpr explicit array_ptr(Ptr dat, typename multi::array_ref::extensions_type extensions) : ref_(dat, extensions) {} + constexpr explicit array_ptr(Ptr dat) : array_ptr(dat, typename multi::array_ref::extensions_type{}) {} + + constexpr explicit operator bool() const {return ref_.base();} + constexpr explicit operator Ptr () const {return ref_.base();} + + friend constexpr auto operator==(array_ptr const& self, array_ptr const& other) -> bool {return self.ref_.base() == other.ref_.base();} + friend constexpr auto operator!=(array_ptr const& self, array_ptr const& other) -> bool {return self.ref_.base() != other.ref_.base();} + + constexpr auto operator* () const -> multi::array_ref& {return ref_;} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) make ref base class a mutable member + constexpr auto operator->() const -> multi::array_ref* {return &ref_;} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) make ref base class a mutable member +}; + +template +constexpr auto addressof(TT(&array)[N]) { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + return array_ptr< + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + std::decay_t>, static_cast(std::rank{}), std::remove_all_extents_t* + >{&array}; +} + +template +using array_cptr = array_ptr::template rebind>; + +template +constexpr auto make_array_ref(P data, multi::extensions_t extensions) { + return array_ref::value_type, D, P>(data, extensions); +} + +template auto make_array_ref(P data, extensions_t<0> exts) {return make_array_ref<0>(data, exts);} +template auto make_array_ref(P data, extensions_t<1> exts) {return make_array_ref<1>(data, exts);} +template auto make_array_ref(P data, extensions_t<2> exts) {return make_array_ref<2>(data, exts);} +template auto make_array_ref(P data, extensions_t<3> exts) {return make_array_ref<3>(data, exts);} +template auto make_array_ref(P data, extensions_t<4> exts) {return make_array_ref<4>(data, exts);} +template auto make_array_ref(P data, extensions_t<5> exts) {return make_array_ref<5>(data, exts);} + +#if defined(__cpp_deduction_guides) + +template::value_type> // pointer_traits doesn't have ::value_type +array_ptr(It)->array_ptr; + +template::value_type> // pointer_traits doesn't have ::value_type +array_ptr(It, index_extensions<0>)->array_ptr; + +template::value_type> +array_ptr(It, index_extensions<1>)->array_ptr; +template::value_type> +array_ptr(It, index_extensions<2>)->array_ptr; +template::value_type> +array_ptr(It, index_extensions<3>)->array_ptr; + +template< + class T, std::size_t N, + typename V = std::remove_all_extents_t, std::size_t D = std::rank_v // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility +> +array_ptr(T(*)[N])->array_ptr(D)>; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + +template array_ref(Ptr, index_extensions<0>) -> array_ref::value_type, 0, Ptr>; +template array_ref(Ptr, index_extensions<1>) -> array_ref::value_type, 1, Ptr>; +template array_ref(Ptr, index_extensions<2>) -> array_ref::value_type, 2, Ptr>; +template array_ref(Ptr, index_extensions<3>) -> array_ref::value_type, 3, Ptr>; +template array_ref(Ptr, index_extensions<4>) -> array_ref::value_type, 4, Ptr>; +template array_ref(Ptr, index_extensions<5>) -> array_ref::value_type, 5, Ptr>; + +template array_ref(It, Tuple)->array_ref::value_type, std::tuple_size::value, It>; +#endif + +// TODO(correaa) move to utility +template +constexpr auto rotated(const T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + return multi::array_ref, std::rank{}, decltype(base(array))>( // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + base(array), extensions(array) + ).rotated(); +} +template +constexpr auto rotated(T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + return multi::array_ref, std::rank{}, decltype(base(array))>( // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility + base(array), extensions(array) + ).rotated(); +} + +template +constexpr auto operator/(RandomAccessIterator data, multi::extensions_t extensions) +-> multi::array_ptr::value_type, D, RandomAccessIterator> +{return {data, extensions};} + +template 1)>, class = decltype((void)adl_begin(*In{}), adl_end(*In{}))> +constexpr auto uninitialized_copy +// require N>1 (this is important because it forces calling placement new on the pointer +(In first, In last, multi::array_iterator dest) { + while(first != last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + adl_uninitialized_copy(adl_begin(*first), adl_end(*first), adl_begin(*dest)); + ++first; + ++dest; + } + return dest; +} + +// begin and end for forwarding reference are needed in this namespace +// to overwrite the behavior of std::begin and std::end +// which take rvalue-references as const-references. + +template auto begin(T&& rng) -> decltype(std::forward(rng).begin()) {return std::forward(rng).begin();} +template auto end (T&& rng) -> decltype(std::forward(rng).end() ) {return std::forward(rng).end() ;} + +template +auto transposed(T(&array)[N][M]) -> decltype(auto) {return ~multi::array_ref(array);} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + +template +using array_const_view = array_ref const&; + +template +using array_view = array_ref&; + +} // end namespace boost::multi + +#ifndef BOOST_MULTI_SERIALIZATION_ARRAY_VERSION +#define BOOST_MULTI_SERIALIZATION_ARRAY_VERSION 0 // NOLINT(cppcoreguidelines-macro-usage) gives user opportunity to select serialization version //NOSONAR +// #define BOOST_MULTI_SERIALIZATION_ARRAY_VERSION 0 // save data as flat array +// #define BOOST_MULTI_SERIALIZATION_ARRAY_VERSION -1 // save data as structured nested labels array +// #define BOOST_MULTI_SERIALIZATION_ARRAY_VERSION 16 // any other value, structure for N <= 16, flat otherwise N > 16 + +namespace boost::multi { + constexpr inline int serialization_array_version = BOOST_MULTI_SERIALIZATION_ARRAY_VERSION; +} // end namespace boost::multi +#endif + +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_ARRAY_REF_HPP_ diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/adl.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/adl.hpp new file mode 100644 index 0000000000..5709458f8d --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/adl.hpp @@ -0,0 +1,698 @@ +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_ADL_HPP +#define BOOST_MULTI_DETAIL_ADL_HPP +#pragma once + +#if defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) +#include +#include +#include +#include +#include +#endif + +#include // for std::copy, std::copy_n, std::equal, etc +#include // std::size_t +#include // for begin, end +#include // for uninitialized_copy, etc +#include // std::conditional_t +#include + +#ifdef _MULTI_FORCE_TRIVIAL_STD_COMPLEX +#include +#endif + +#define BOOST_MULTI_DEFINE_ADL(FuN) /*NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) consider replacing for all ADL'd operations*/ \ +namespace boost { \ +namespace multi { \ +namespace adl { \ + namespace custom {template struct FuN##_t;} __attribute__((unused)) \ + static constexpr class FuN##_t { \ + template [[deprecated]] auto _(priority<0>, As&&... args) const = delete; \ + template auto _(priority<1>, As&&... args) const BOOST_MULTI_DECLRETURN(std::FuN(std::forward(args)...)) \ + template auto _(priority<2>, As&&... args) const BOOST_MULTI_DECLRETURN( FuN(std::forward(args)...)) \ + template auto _(priority<3>, T&& t, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(t).FuN(std::forward(args)...)) \ + template auto _(priority<4>, As&&... args) const BOOST_MULTI_DECLRETURN(custom::FuN##_t::_(std::forward(args)...)) \ + public: \ + template auto operator()(As&&... args) const-> decltype(_(priority<4>{}, std::forward(args)...)) {return _(priority<4>{}, std::forward(args)...);} \ + } (FuN); \ +} /* end namespace adl */ \ +} /* end namespace multi */ \ +} /* end namespace boost */ + +namespace boost::multi { + +template +inline constexpr bool force_element_trivial = false; + +template +inline constexpr bool force_element_trivial_destruction = force_element_trivial; + +template +inline constexpr bool force_element_trivial_default_construction = force_element_trivial; + +#ifdef _MULTI_FORCE_TRIVIAL_STD_COMPLEX +template +inline constexpr bool force_element_trivial> = std::is_trivial_v; + +template +inline constexpr bool force_element_trivial_destruction> = std::is_trivially_default_constructible_v; + +template +inline constexpr bool force_element_trivial_default_construction> = std::is_trivially_destructible_v; + +template<> inline constexpr bool force_element_trivial > = true; +template<> inline constexpr bool force_element_trivial_default_construction> = true; +template<> inline constexpr bool force_element_trivial_destruction > = true; + +template<> inline constexpr bool force_element_trivial > = true; +template<> inline constexpr bool force_element_trivial_default_construction> = true; +template<> inline constexpr bool force_element_trivial_destruction > = true; +#endif + +} // end namespace boost::multi + +#define BOOST_MULTI_DECLRETURN(ExpR) -> decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing +#define BOOST_MULTI_JUSTRETURN(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing + +namespace boost::multi { + +template struct priority : std::conditional_t> {}; + +class adl_copy_n_t { + template constexpr auto _(priority<0>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(std:: copy_n( std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(::thrust:: copy_n( std::forward(args)...)) +#endif + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( copy_n( std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: copy_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).copy_n( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_copy_n_t adl_copy_n; + +class adl_move_t { + template constexpr auto _(priority<0>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: move( std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) // there is no thrust::move algorithm + template constexpr auto _(priority<1>/**/, It first, It last, As&&... args) const BOOST_MULTI_DECLRETURN( thrust::copy(std::make_move_iterator(first), std::make_move_iterator(last), std::forward(args)...)) +#endif + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( move( std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: move(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).move( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_move_t adl_move; + +class adl_fill_n_t { + template< class... As> constexpr auto _(priority<0>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: fill_n (std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template< class... As> constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( thrust:: fill_n (std::forward(args)...)) +#endif + template< class... As> constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( fill_n (std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: fill_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).fill_n (std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_fill_n_t adl_fill_n; + +class adl_equal_t { + template< class...As> constexpr auto _(priority<1>/**/, As&&...args) const BOOST_MULTI_DECLRETURN( std:: equal( std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template< class...As> constexpr auto _(priority<2>/**/, As&&...args) const BOOST_MULTI_DECLRETURN( ::thrust:: equal( std::forward(args)...)) +#endif + template< class...As> constexpr auto _(priority<3>/**/, As&&...args) const BOOST_MULTI_DECLRETURN( equal( std::forward(args)...)) + template< class...As> constexpr auto _(priority<4>/**/, As&&...args) const BOOST_MULTI_DECLRETURN( equal( std::forward(args)..., std::equal_to<>{})) // WORKAROUND makes syntax compatible with boost::ranges::equal if, for some reason, it is included. + template constexpr auto _(priority<5>/**/, T&& arg, As&&...args) const BOOST_MULTI_DECLRETURN( std::decay_t:: equal(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&...args) const BOOST_MULTI_DECLRETURN( std::forward(arg).equal( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&...args) const BOOST_MULTI_DECLRETURN(_(priority<6>{}, std::forward(args)...)) +}; +inline constexpr adl_equal_t adl_equal; + +template struct adl_custom_copy; + +#ifndef _MSC_VER +template > void copy(As...) = delete; +#endif + +class adl_copy_t { + template::reference, typename std::iterator_traits::reference>> + > + constexpr auto _(priority<1>/**/, InputIt first, InputIt last, OutputIt d_first) const BOOST_MULTI_DECLRETURN(std::copy(first, last, d_first)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( ::thrust::copy(std::forward(args)...)) +#endif + template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( copy(std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::copy(std::forward(arg), std::forward(args)...)) +// template constexpr auto _(priority<5>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(boost::multi::adl_custom_copy...>::copy(std::forward(as)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).copy(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN( _(priority<6>{}, std::forward(args)...) ) \ +}; +inline constexpr adl_copy_t adl_copy; + +namespace adl { + namespace custom {template struct fill_t;} + class fill_t { + template auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: fill (std::forward(args)...)) + template auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( fill (std::forward(args)...)) + template auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).fill (std::forward(args)...)) + template auto _(priority<4>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(custom:: fill_t::_(std::forward(args)...)) + + public: + template auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<5>{}, std::forward(args)...)) + }; + inline constexpr fill_t fill; +} // end namespace adl + +// template +// struct alloc_construct_elem_t { +// Alloc* palloc_; +// template auto operator()(T&& ptr) const +// ->decltype(std::allocator_traits::construct(*palloc_, std::addressof(ptr))) { +// return std::allocator_traits::construct(*palloc_, std::addressof(ptr)); } +// }; + +namespace xtd { + +template // this one goes last!!! +constexpr auto to_address(T const& ptr) noexcept; + +template +constexpr auto me_to_address(priority<0> /**/, T const& ptr) noexcept + -> decltype(to_address(ptr.operator->())) { + return to_address(ptr.operator->()); +} + +template +constexpr auto me_to_address(priority<1> /**/, T const& ptr) noexcept + -> decltype(std::pointer_traits::to_address(ptr)) { + return std::pointer_traits::to_address(ptr); +} + +template{}, int> =0> +constexpr auto me_to_address(priority<2>/**/, T const& ptr) noexcept -> T { + static_assert(! std::is_function_v); + return ptr; +} + +template // this one goes last!!! +constexpr auto to_address(T const& ptr) noexcept +->decltype(me_to_address(priority<2>{}/**/, ptr)) { + return me_to_address(priority<2>{} , ptr); } + +template::value_type, typename = decltype(std::addressof(*ForwardIt{})), typename = decltype(Value())> +auto alloc_uninitialized_value_construct_n(Alloc& alloc, ForwardIt first, Size count) -> ForwardIt { +// ->std::decay_t::construct(alloc, std::addressof(*first), Value()), first)> + ForwardIt current = first; + try { + for (; count > 0 ; ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::construct(alloc, std::addressof(*current), Value()); // !!!!!!!!!!!!!! if you are using std::complex type consider making complex default constructible (e.g. by type traits) + } + // ::new (static_cast(std::addressof(*current))) Value(); + return current; + } catch(...) { + for(; current != first; ++first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::destroy(alloc, std::addressof(*first)); + } + throw; + } +} + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span +#endif + +template::value_type> +auto alloc_uninitialized_default_construct_n(Alloc& alloc, ForwardIt first, Size count) +-> std::decay_t::construct(alloc, std::addressof(*first)), first)> { + if(std::is_trivially_default_constructible_v) { + std::advance(first, count); + return first; + } + using alloc_traits = std::allocator_traits; + ForwardIt current = first; + + try { + // return std::for_each_n(first, count, [&](T& elem) { alloc_traits::construct(alloc, std::addressof(elem)); ++current; }); + // workadoung for gcc 8.3.1 in Lass + std::for_each(first, first + count, [&](T& elem) { alloc_traits::construct(alloc, std::addressof(elem)); ++current; }); + return first + count; + } + // LCOV_EXCL_START // TODO(correaa) add test + catch(...) { + std::for_each(first, current, [&](T& elem) { alloc_traits::destroy(alloc, std::addressof(elem)); }); + throw; + } + // LCOV_EXCL_STOP + + + // return current; +} + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +} // end namespace xtd + +// template struct alloc_destroy_elem_t { +// Alloc* palloc_; +// template constexpr auto operator()(T&& ptr) const { // ->decltype(std::allocator_traits::construct(*palloc_, std::forward(t)...)){ +// return std::allocator_traits::destroy(*palloc_, std::addressof(ptr)); +// } +// }; + +template::value_type> +constexpr auto destroy_n(BidirIt first, Size count) +->std::decay_t { + first += count; + for(; count != 0; --first, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::addressof(*(first-1))->~T(); + } + return first; +} + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" // TODO(correaa) use checked span +#endif + +template::value_type> +constexpr auto alloc_destroy_n(Alloc& alloc, BidirIt first, Size count) +->std::decay_t { + first += count; + for (; count != 0; --first, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::destroy(alloc, std::addressof(*(first - 1))); + } + return first; +} + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +class adl_uninitialized_copy_t { + template // sfinae friendy std::uninitialized_copy + [[nodiscard]] constexpr auto _(priority<1>/**/, InIt first, InIt last, FwdIt d_first) const + // BOOST_MULTI_DECLRETURN( std::uninitialized_copy(first, last, d_first)) + { + #if __cplusplus >= 202002L + using ValueType = typename std::iterator_traits::value_type; + if( + std::is_constant_evaluated() + && (std::is_trivially_default_constructible_v || multi::force_element_trivial_default_construction) + ) { + return std:: copy(first, last, d_first); + } else + #endif + { + return std::uninitialized_copy(first, last, d_first); + } + } +// #if defined(__CUDACC__) || defined(__CUDA__) || defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) +#if defined(__CUDACC__) || defined(__HIPCC__) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( ::thrust::uninitialized_copy( std::forward(args)...)) // doesn't work with culang 17, cuda 12 ? +#endif + template constexpr auto _(priority<3>/**/, TB first, As&&... args ) const BOOST_MULTI_DECLRETURN( uninitialized_copy( first , std::forward(args)...)) + template constexpr auto _(priority<4>/**/, TB first, TE last, DB d_first) const BOOST_MULTI_DECLRETURN(std::decay_t ::uninitialized_copy( first , last, d_first )) + template constexpr auto _(priority<5>/**/, TB&& first, As&&... args ) const BOOST_MULTI_DECLRETURN(std::decay_t ::uninitialized_copy(std::forward(first), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, TB&& first, As&&... args ) const BOOST_MULTI_DECLRETURN(std::forward(first).uninitialized_copy( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<6>{}, std::forward(args)...)) +}; +inline constexpr adl_uninitialized_copy_t adl_uninitialized_copy; + +class adl_uninitialized_copy_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std::uninitialized_copy_n(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( uninitialized_copy_n(std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( ::thrust::uninitialized_copy_n(std::forward(args)...)) + template(), ...))>, + std::enable_if_t< + std::is_trivially_default_constructible_v::value_type> + || multi::force_element_trivial_default_construction::value_type> + , int> =0 + > constexpr auto _(priority<4>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( ::thrust::copy_n(std::forward(args)...)) +#endif + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: uninitialized_copy_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).uninitialized_copy_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<7>{}, std::forward(args)...)) // TODO(correaa) this might trigger a compiler crash with g++ 7.5 because of operator&() && overloads +}; +inline constexpr adl_uninitialized_copy_n_t adl_uninitialized_copy_n; + +class adl_uninitialized_move_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: uninitialized_move_n(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( uninitialized_move_n(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: uninitialized_move_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).uninitialized_move_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return _(priority<4>{}, std::forward(args)...);} +}; +inline constexpr auto adl_uninitialized_move_n = adl_uninitialized_move_n_t{}; + +namespace xtd { + +template +constexpr auto alloc_uninitialized_copy_n(std::allocator& /*alloc*/, InputIt first, Size count, ForwardIt d_first) { + return adl_uninitialized_copy_n(first, count, d_first);} + +template +constexpr auto alloc_uninitialized_move_n(std::allocator& /*alloc*/, InputIt first, Size count, ForwardIt d_first) { + return adl_uninitialized_move_n(first, count, d_first);} + +template +auto alloc_uninitialized_copy_n(Alloc& alloc, InputIt first, Size count, ForwardIt d_first) { + ForwardIt current = d_first; + try { + for(; count > 0; ++first, ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::construct(alloc, std::addressof(*current), *first); + } + return current; + } catch(...) { + for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::destroy(alloc, std::addressof(*d_first)); + } + throw; + } +} + +template +auto alloc_uninitialized_move_n(Alloc& alloc, InputIt first, Size count, ForwardIt d_first) { + ForwardIt current = d_first; + try { + for(; count > 0; ++first, ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::construct(alloc, std::addressof(*current), std::move(*first)); + } + return current; + } catch(...) { + for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::destroy(alloc, std::addressof(*d_first)); + } + throw; + } +} + +template +constexpr auto alloc_uninitialized_copy(std::allocator&/*allocator*/, InputIt first, InputIt last, ForwardIt d_first) { + return adl_uninitialized_copy(first, last, d_first); +} + +template())), class=std::enable_if_t::value_type, typename std::iterator_traits::reference>>> +#if __cplusplus >= 202002L +constexpr +#endif +auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt d_first) { +// ->std::decay_t // problematic in clang-11 + gcc-9 + ForwardIt current = d_first; + using alloc_traits = std::allocator_traits; + try { + std::for_each(first, last, [&](auto const& elem) { // TODO(correaa) replace by adl_for_each + alloc_traits::construct(alloc, std::addressof(*current), elem); + ++current; + }); + return current; + } catch(...) { + std::for_each(d_first, current, [&](auto const& elem) { + std::allocator_traits::destroy(alloc, std::addressof(elem)); + }); + throw; + } +} + +template +auto alloc_uninitialized_fill_n(Alloc& alloc, ForwardIt first, Size n, T const& value) +->std::decay_t::construct(alloc, std::addressof(*first), value), first)> { + ForwardIt current = first; // using std::to_address; + try { + for(; n > 0; ++current, --n) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::construct(alloc, std::addressof(*current), value); + } + return current; + } catch(...) { + for(; first != current; ++first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::allocator_traits::destroy(alloc, std::addressof(*first)); + } + throw; + } +} +} // end namespace xtd + +class adl_distance_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: distance(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( distance(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: distance(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).distance(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_distance_t adl_distance; + +class adl_begin_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std::begin(std::forward(args)...)) +// template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( begin(std::forward(args)...)) // this is catching boost::range_iterator if Boost 1.53 is included +// #if defined(__NVCC__) // this is no thrust::begin +// template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(::thrust:: begin( std::forward(args)...)) +// #endif + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::begin(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).begin(std::forward(args)...)) + + public: + template [[nodiscard]] constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_begin_t adl_begin; + +class adl_end_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: end(std::forward(args)...)) + // template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( end(std::forward(args)...)) +// #if defined(__NVCC__) // there is no thrust::end +// template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(::thrust:: end( std::forward(args)...)) +// #endif + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: end(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).end(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_end_t adl_end; + +class adl_size_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std::size(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( size(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::size(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).size(std::forward(args)...)) + + public: + template [[nodiscard]] constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_size_t adl_size; + +class adl_swap_ranges_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: swap_ranges(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( swap_ranges(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: swap_ranges(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).swap_ranges(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_swap_ranges_t adl_swap_ranges; + +class adl_lexicographical_compare_t { + template /*[[gnu::pure]]*/ constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: lexicographical_compare(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( lexicographical_compare(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: lexicographical_compare(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).lexicographical_compare(std::forward(args)...)) + + public: + template /*[[gnu::pure]]*/ constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_lexicographical_compare_t adl_lexicographical_compare; + +class adl_uninitialized_value_construct_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: uninitialized_value_construct_n(std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( uninitialized_value_construct_n(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::uninitialized_value_construct_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).uninitialized_value_construct_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return (_(priority<4>{}, std::forward(args)...));} +}; +inline constexpr adl_uninitialized_value_construct_n_t adl_uninitialized_value_construct_n; + +class adl_alloc_uninitialized_value_construct_n_t { + template constexpr auto _(priority<1>/**/, Alloc&& /*alloc*/, As&&... args) const BOOST_MULTI_DECLRETURN( adl_uninitialized_value_construct_n(std::forward(args)...)) // NOLINT(cppcoreguidelines-missing-std-forward) +// template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( xtd:: alloc_uninitialized_value_construct_n(std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? + template constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_value_construct_n(std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: alloc_uninitialized_value_construct_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).alloc_uninitialized_value_construct_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return (_(priority<5>{}, std::forward(args)...));} +}; +inline constexpr adl_alloc_uninitialized_value_construct_n_t adl_alloc_uninitialized_value_construct_n; + +class adl_uninitialized_default_construct_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const {return std:: uninitialized_default_construct_n( std::forward(args)...);} + // #if defined(__NVCC__) + // template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( thrust::uninitialized_default_construct_n( std::forward(args)...)) + // #endif + template constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( uninitialized_default_construct_n( std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: uninitialized_default_construct_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).uninitialized_default_construct_n( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return (_(priority<5>{}, std::forward(args)...));} +}; +inline constexpr adl_uninitialized_default_construct_n_t adl_uninitialized_default_construct_n; + +class adl_alloc_uninitialized_default_construct_n_t { + template constexpr auto _(priority<1>/**/, Alloc&&/*unused*/, As&&... args) const BOOST_MULTI_JUSTRETURN( adl_uninitialized_default_construct_n( std::forward(args)...)) // NOLINT(cppcoreguidelines-missing-std-forward) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( xtd::alloc_uninitialized_default_construct_n( std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? +#if defined(__CUDACC__) || defined(__HIPCC__) + template constexpr auto _(priority<3>/**/, Alloc&& alloc, It first, Size n) const BOOST_MULTI_DECLRETURN( thrust::detail::default_construct_range(std::forward(alloc), first, n)) +#endif + template constexpr auto _(priority<4>/**/, As&&... args ) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_default_construct_n( std::forward(args)...)) + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args ) const BOOST_MULTI_DECLRETURN( std::decay_t:: alloc_uninitialized_default_construct_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&... args ) const BOOST_MULTI_DECLRETURN(std::forward(arg).alloc_uninitialized_default_construct_n( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return (_(priority<6>{}, std::forward(args)...));} +}; +inline constexpr adl_alloc_uninitialized_default_construct_n_t adl_alloc_uninitialized_default_construct_n; + +class adl_destroy_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( multi:: destroy_n (std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( destroy_n (std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t:: destroy_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).destroy_n (std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(args)...)) +}; +inline constexpr adl_destroy_n_t adl_destroy_n; + +class adl_alloc_destroy_n_t { + template constexpr auto _(priority<1>/**/, Alloc&&/*unused*/, As&&... args) const BOOST_MULTI_DECLRETURN( adl_destroy_n (std::forward(args)...)) // NOLINT(cppcoreguidelines-missing-std-forward) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template constexpr auto _(priority<2>/**/, Alloc& alloc, It first, Size n) const BOOST_MULTI_DECLRETURN( (thrust::detail::destroy_range(alloc, first, first + n))) +#endif + template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN(multi:: alloc_destroy_n (std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? + template< class... As> constexpr auto _(priority<4>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_destroy_n (std::forward(args)...)) + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::decay_t:: alloc_destroy_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).alloc_destroy_n (std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<6>{}, std::forward(args)...)) +}; +inline constexpr adl_alloc_destroy_n_t adl_alloc_destroy_n; + +class adl_alloc_uninitialized_copy_t { + template constexpr auto _(priority<1>/**/, Alloc&&/*ll*/, As&&... args) const BOOST_MULTI_DECLRETURN( adl_uninitialized_copy( std::forward(args)...)) // NOLINT(cppcoreguidelines-missing-std-forward) + template constexpr auto _(priority<2>/**/, Alloc&& alloc, As&&... args) const BOOST_MULTI_DECLRETURN( xtd::alloc_uninitialized_copy(std::forward(alloc), std::forward(args)...)) + template constexpr auto _(priority<3>/**/, Alloc&& alloc, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_copy(std::forward(alloc), std::forward(args)...)) + template constexpr auto _(priority<4>/**/, Alloc&& alloc, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::alloc_uninitialized_copy(std::forward(alloc), std::forward(args)...)) + template constexpr auto _(priority<5>/**/, Alloc&& alloc, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(alloc).alloc_uninitialized_copy( std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<5>{}, std::forward(args)...)) +}; +inline constexpr adl_alloc_uninitialized_copy_t adl_alloc_uninitialized_copy; + +class adl_alloc_uninitialized_copy_n_t { + template constexpr auto _(priority<1>/**/, Alloc&& /*alloc*/, As&&... args) const BOOST_MULTI_DECLRETURN( adl_uninitialized_copy_n(std::forward(args)...) ) // NOLINT(cppcoreguidelines-missing-std-forward) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_copy_n(std::forward(args)...)) +// template constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( xtd::alloc_uninitialized_copy_n(std::forward(args)...)) +// #if defined(__NVCC__) +// there is no thrust alloc uninitialized copy +// #endif + template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::decay_t::alloc_uninitialized_copy_n(std::forward(arg), std::forward(args)...)) + template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).alloc_uninitialized_copy_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return _(priority<6>{}, std::forward(args)...);} +}; +inline constexpr adl_alloc_uninitialized_copy_n_t adl_alloc_uninitialized_copy_n; + +class alloc_uninitialized_move_n_t { +// TODO(correaa) : fallback to no alloc version + template constexpr auto _(priority<1>/**/, As&&... args) const {return( xtd:: alloc_uninitialized_move_n(std::forward(args)...));} + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_move_n(std::forward(args)...)) + template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(std::forward(arg).alloc_uninitialized_move_n(std::forward(args)...)) + + public: + template constexpr auto operator()(As&&... args) const {return _(priority<3>{}, std::forward(args)...);} \ +}; +inline constexpr alloc_uninitialized_move_n_t adl_alloc_uninitialized_move_n; + +class uninitialized_fill_n_t { + template constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( std:: uninitialized_fill_n(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( uninitialized_fill_n(std::forward(args)...)) +#if defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + template constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( ::thrust::uninitialized_fill_n(std::forward(args)...)) +#endif + template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const BOOST_MULTI_DECLRETURN( std::forward(arg).uninitialized_fill_n(std::forward(args)...)) + + public: + template constexpr auto operator()(T1&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(arg), std::forward(args)...)) +}; +inline constexpr uninitialized_fill_n_t adl_uninitialized_fill_n; + +class alloc_uninitialized_fill_n_t { + template< class... As> constexpr auto _(priority<1>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( xtd::alloc_uninitialized_fill_n(std::forward(args)...)) + template constexpr auto _(priority<2>/**/, Alloc&&/*alloc*/, As&&... args) const BOOST_MULTI_DECLRETURN( adl_uninitialized_fill_n(std::forward(args)...)) // NOLINT(cppcoreguidelines-missing-std-forward) + template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const BOOST_MULTI_DECLRETURN( alloc_uninitialized_fill_n(std::forward(args)...)) + template constexpr auto _(priority<4>/**/, Alloc&& alloc , As&&... args) const BOOST_MULTI_DECLRETURN( std::forward(alloc).alloc_uninitialized_fill_n(std::forward(args)...)) + + public: + template constexpr auto operator()(T1&& arg, As&&... args) const BOOST_MULTI_DECLRETURN(_(priority<4>{}, std::forward(arg), std::forward(args)...)) +}; +inline constexpr alloc_uninitialized_fill_n_t adl_alloc_uninitialized_fill_n; + +// template +// struct recursive { +// template +// static constexpr auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt dest){ +// using std::begin; using std::end; +// while(first!=last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm +// recursive::alloc_uninitialized_copy(alloc, begin(*first), end(*first), begin(*dest)); +// ++first; +// ++dest; +// } +// return dest; +// } +// }; + +// template<> struct recursive<1> { +// template +// static auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt dest){ +// return adl_alloc_uninitialized_copy(alloc, first, last, dest); +// } +// }; + +} // end namespace boost::multi + +#undef BOOST_MULTI_DECLRETURN +#undef BOOST_MULTI_JUSTRETURN + +#endif diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/config/ASSERT.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/config/ASSERT.hpp new file mode 100644 index 0000000000..f9faf11e5f --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/config/ASSERT.hpp @@ -0,0 +1,22 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_CONFIG_ASSERT_HPP +#define BOOST_MULTI_DETAIL_CONFIG_ASSERT_HPP + +#include + +#if defined(BOOST_MULTI_ACCESS_NDEBUG) || defined(__CUDACC__) + #define BOOST_MULTI_ACCESS_ASSERT(Expr) // NOLINT(cppcoreguidelines-macro-usage +#else + // #include + // // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) this is for very inefficient asserts + // #if defined(__cpp_lib_stacktrace) && (__cpp_lib_stacktrace >= 202011L) + // #define BOOST_MULTI_ACCESS_ASSERT(Expr) assert((std::cerr<= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)) +# if (__has_cpp_attribute(nodiscard) >= 201907L) && (__cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)) +# define BOOST_MULTI_NODISCARD(MsG) [[nodiscard]] // [[nodiscard(MsG)]] in c++20 empty message is not allowed with paren +# else +# define BOOST_MULTI_NODISCARD(MsG) [[nodiscard]] // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is needed in C++17 +# endif +# elif __has_cpp_attribute(gnu::warn_unused_result) +# define BOOST_MULTI_NODISCARD(MsG) [[gnu::warn_unused_result]] +# endif + +// No discard class +# if(__has_cpp_attribute(nodiscard) && !defined(__NVCC__) && (!defined(__clang__) || (defined(__clang__) && (__cplusplus >= 202002L)))) && (__cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)) +# if (__has_cpp_attribute(nodiscard) >= 201907L) && (__cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)) +# define BOOST_MULTI_NODISCARD_CLASS(MsG) [[nodiscard_(MsG)]] +# else +# define BOOST_MULTI_NODISCARD_CLASS(MsG) [[nodiscard]] +# endif +# endif + +#endif + +#ifndef BOOST_MULTI_NODISCARD +# define BOOST_MULTI_NODISCARD(MsG) +#endif + +#ifndef BOOST_MULTI_NODISCARD_CLASS +# define BOOST_MULTI_NODISCARD_CLASS(MsG) +#endif + +// clang-format on + +#endif // BOOST_MULTI_DETAIL_CONFIG_NODISCARD_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/config/NO_UNIQUE_ADDRESS.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/config/NO_UNIQUE_ADDRESS.hpp new file mode 100644 index 0000000000..fe92fb5392 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/config/NO_UNIQUE_ADDRESS.hpp @@ -0,0 +1,22 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_CONFIG_NO_UNIQUE_ADDRESS_HPP +#define BOOST_MULTI_DETAIL_CONFIG_NO_UNIQUE_ADDRESS_HPP + +// clang-format off +#ifdef __has_cpp_attribute +# if __has_cpp_attribute(no_unique_address) >= 201803L && ! defined(__NVCC__) && ! defined(__PGI) && (__cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)) + // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) this macro will be needed until C++20 +# define BOOST_MULTI_NO_UNIQUE_ADDRESS [[no_unique_address]] +# endif +#endif + +#ifndef BOOST_MULTI_NO_UNIQUE_ADDRESS +# define BOOST_MULTI_NO_UNIQUE_ADDRESS +#endif +// clang-format on + +#endif // BOOST_MULTI_DETAIL_CONFIG_NO_UNIQUE_ADDRESS_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/config/VERSION.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/config/VERSION.hpp new file mode 100644 index 0000000000..508fd5d36b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/config/VERSION.hpp @@ -0,0 +1,17 @@ +// Copyright 2022-2023 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_CONFIG_VERSION_HPP_ +#define BOOST_MULTI_CONFIG_VERSION_HPP_ + +// NOLINTBEGIN(cppcoreguidelines-macro-usage,modernize-macro-to-enum) indicate library version +#define BOOST_MULTI_VERSION_MAJOR 0 +#define BOOST_MULTI_VERSION_MINOR 81 +#define BOOST_MULTI_VERSION_MICRO 0 + +#define BOOST_MULTI_VERSION (BOOST_MULTI_VERSION_MAJOR * 10000 + BOOST_MULTI_VERSION_MINOR * 100 + BOOST_MULTI_VERSION_MICRO) + +// NOLINTEND(cppcoreguidelines-macro-usage,modernize-macro-to-enum) + +#endif // BOOST_MULTI_CONFIG_VERSION_HPP_ diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/implicit_cast.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/implicit_cast.hpp new file mode 100644 index 0000000000..29cd0541f6 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/implicit_cast.hpp @@ -0,0 +1,21 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_IMPLICIT_CAST_HPP +#define BOOST_MULTI_DETAIL_IMPLICIT_CAST_HPP +#pragma once + +namespace boost::multi::detail { + +template constexpr bool is_implicitly_convertible_v = std::is_convertible_v; +template constexpr bool is_explicitly_convertible_v = std::is_constructible_v; + +template, int> =0> +constexpr auto implicit_cast(From&& ref) -> To {return static_cast(std::forward(ref));} + +template && ! std::is_convertible_v, int> =0> +constexpr auto explicit_cast(From&& ref) -> To {return static_cast(std::forward(ref));} + +} // end namespace boost::multi::detail +#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/index_range.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/index_range.hpp similarity index 67% rename from external_codes/boost_multi/multi/include/multi/detail/index_range.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/index_range.hpp index a60f0c1e52..c6df08e0d2 100644 --- a/external_codes/boost_multi/multi/include/multi/detail/index_range.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/index_range.hpp @@ -1,14 +1,18 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_DETAIL_INDEX_RANGE_HPP -#define MULTI_DETAIL_INDEX_RANGE_HPP +#ifndef BOOST_MULTI_DETAIL_INDEX_RANGE_HPP +#define BOOST_MULTI_DETAIL_INDEX_RANGE_HPP +#pragma once -#include "multi/detail/serialization.hpp" -#include "multi/detail/tuple_zip.hpp" -#include "multi/detail/types.hpp" +#include +#include +#include +#include -#include // for min +#include // for std::min +#include // for std::plus<> #include // for std::random_iterator_tag // std::reverse_iterator #include // for numeric_limits #include // for forward @@ -24,51 +28,51 @@ template< > class iterator_facade { using self_type = Self; - [[nodiscard]] constexpr auto self() & {return static_cast(*this);} - [[nodiscard]] constexpr auto self() const& {return static_cast(*this);} + [[nodiscard]] constexpr auto self_() & {return static_cast(*this);} + [[nodiscard]] constexpr auto self_() const& {return static_cast(*this);} public: using value_type = ValueType; using reference = Reference; - using pointer = Pointer; + using pointer = Pointer; // NOSONAR(cpp:S5008) false positive using difference_type = DifferenceType; using iterator_category = AccessCategory; - friend constexpr auto operator!=(self_type const& self, self_type const& other) {return not(self == other);} + friend constexpr auto operator!=(self_type const& self, self_type const& other) {return !(self == other);} - friend constexpr auto operator<=(self_type const& self, self_type const& other) {return (self < other) or (self == other);} - friend constexpr auto operator> (self_type const& self, self_type const& other) {return not(self <= other);} - friend constexpr auto operator>=(self_type const& self, self_type const& other) {return not(self < other);} + friend constexpr auto operator<=(self_type const& self, self_type const& other) {return (self < other) || (self == other);} + friend constexpr auto operator> (self_type const& self, self_type const& other) {return !(self <= other);} + friend constexpr auto operator>=(self_type const& self, self_type const& other) {return !(self < other);} - constexpr auto operator-(difference_type n) const {return self_type{self()} -= n;} - constexpr auto operator+(difference_type n) const {return self_type{self()} += n;} + constexpr auto operator-(difference_type n) const {return self_type{self_()} -= n;} + constexpr auto operator+(difference_type n) const {return self_type{self_()} += n;} friend constexpr auto operator+(difference_type n, self_type const& self) {return self + n;} friend constexpr auto operator++(self_type& self, int) -> self_type {self_type ret = self; ++self; return ret;} friend constexpr auto operator--(self_type& self, int) -> self_type {self_type ret = self; --self; return ret;} - constexpr auto operator[](difference_type n) const {return *(self() + n);} + constexpr auto operator[](difference_type n) const {return *(self_() + n);} }; -template +template, class Minus = std::minus<> > class range { IndexType first_ = {}; - IndexTypeLast last_ = first_; + IndexTypeLast last_ = first_; // TODO(correaa) check how to do partially initialzed public: template // , class ArT = multi::archive_traits> void serialize(Archive& arxiv, unsigned /*version*/) { arxiv & multi::archive_traits::make_nvp("first", first_); - // arxiv & BOOST_SERIALIZATION_NVP( first_); - // arxiv & cereal:: make_nvp("first", first_); - // arxiv & CEREAL_NVP( first_); - // arxiv & first_ ; + // arxiv & BOOST_SERIALIZATION_NVP( first_); + // arxiv & cereal:: make_nvp("first", first_); + // arxiv & CEREAL_NVP( first_); + // arxiv & first_ ; arxiv & multi::archive_traits::make_nvp("last" , last_ ); - // arxiv & BOOST_SERIALIZATION_NVP( last_ ); - // arxiv & cereal:: make_nvp("last" , last_ ); - // arxiv & CEREAL_NVP( last_ ); - // arxiv & last_ ; + // arxiv & BOOST_SERIALIZATION_NVP( last_ ); + // arxiv & cereal:: make_nvp("last" , last_ ); + // arxiv & CEREAL_NVP( last_ ); + // arxiv & last_ ; } using value_type = IndexType; @@ -81,13 +85,31 @@ class range { range() = default; - template, value_type>> > - // cxxcheck-suppress internalAstError ; because bug in cppcheck - constexpr explicit range(Range&& other) + // range(range const&) = default; + + template>, int> =0, + decltype( + detail::implicit_cast(std::declval().first()), + detail::implicit_cast(std::declval().last()) + )* = nullptr + > + // cppcheck-suppress noExplicitConstructor ; // NOLINTNEXTLINE(runtime/explicit) + constexpr /*implicit*/ range(Range&& other) // NOLINT(bugprone-forwarding-reference-overload,google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) ranges are implicitly convertible if elements are implicitly convertible : first_{std::forward(other).first()}, last_{std::forward(other).last()} {} - constexpr range(IndexType first, IndexTypeLast last) noexcept : first_{first}, last_{last} {} - [[deprecated]] constexpr explicit range(IndexType first) : range{first, first + 1} {} + template< + class Range, + std::enable_if_t>, int> =0, + decltype( + detail::explicit_cast(std::declval().first()), + detail::explicit_cast(std::declval().last()) + )* =nullptr + > + constexpr explicit range(Range&& other) // NOLINT(bugprone-forwarding-reference-overload) + : first_{std::forward(other).first()}, last_{std::forward(other).last()} {} + + constexpr range(IndexType first, IndexTypeLast last) : first_{first}, last_{last} {} class const_iterator : public boost::multi::iterator_facade< const_iterator, @@ -149,17 +171,17 @@ class range { friend constexpr auto end (range const& self) {return self.end() ;} friend constexpr auto operator==(range const& self, range const& other) { - return (self.empty() and other.empty()) or (self.first_ == other.first_ and self.last_ == other.last_); + return (self.empty() && other.empty()) || (self.first_ == other.first_ && self.last_ == other.last_); } - friend constexpr auto operator!=(range const& self, range const& other) {return not(self == other);} + friend constexpr auto operator!=(range const& self, range const& other) {return !(self == other);} [[nodiscard]] constexpr auto find(value_type const& value) const -> range::const_iterator { - if(value >= last_ or value < first_) { + if(value >= last_ || value < first_) { return end(); } return begin() + (value - front()); } - template [[nodiscard]] constexpr auto contains(Value const& value) const {return (value >=first_) and (value < last_);} + template [[nodiscard]] constexpr auto contains(Value const& value) const -> bool {return (value >=first_) && (value < last_);} template [[nodiscard]] constexpr auto count (Value const& value) const -> value_type {return contains(value);} friend constexpr auto intersection(range const& self, range const& other) { @@ -167,11 +189,14 @@ class range { auto new_first = max(self.first(), other.first()); auto new_last = min(self.last() , other.last() ); new_first = min(new_first, new_last); - return range{new_first, new_last}; + return range(new_first, new_last); } - [[nodiscard]] constexpr auto contains(value_type const& value) const {return value >= first_ and value < last_;} + [[nodiscard]] constexpr auto contains(value_type const& value) const {return value >= first_ && value < last_;} }; +template // , class Plus = std::plus<>, class Minus = std::minus<> > +range(IndexType, IndexTypeLast) -> range; // #3 + template constexpr auto make_range(IndexType first, IndexTypeLast last) -> range { return {first, last}; @@ -182,7 +207,7 @@ class intersecting_range { range impl_{std::numeric_limits::min(), std::numeric_limits::max()}; constexpr intersecting_range() = default; // MSVC 19.07 needs constexpr to initialize ALL later - static constexpr auto make(IndexType first, IndexType last) -> intersecting_range { + static constexpr auto make_(IndexType first, IndexType last) -> intersecting_range { intersecting_range ret; ret.impl_ = range{first, last}; return ret; } friend constexpr auto intersection(intersecting_range const& self, range const& other) { @@ -192,10 +217,10 @@ class intersecting_range { return intersection(other, self.impl_); } friend constexpr auto operator<(intersecting_range const& self, IndexType end) { - return intersecting_range::make(self.impl_.first(), end); + return intersecting_range::make_(self.impl_.first(), end); } friend constexpr auto operator<=(IndexType first, intersecting_range const& self) { - return intersecting_range::make(first, self.impl_.last()); + return intersecting_range::make_(first, self.impl_.last()); } public: @@ -221,39 +246,38 @@ struct extension_t : public range { : range{first, last} {} // cppcheck-suppress noExplicitConstructor ; because syntax convenience // NOLINTNEXTLINE(runtime/explicit) - constexpr extension_t(IndexType last) noexcept // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) because syntax convenience + constexpr extension_t(IndexType last) noexcept // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) allow terse syntax : range(0, last) {} constexpr extension_t() noexcept : range() {} - friend constexpr auto size(extension_t const& self) -> typename extension_t::size_type {return self.size();} + friend constexpr auto size(extension_t const& self) -> typename extension_t::size_type { return self.size(); } -// template -// friend auto operator<<(OStream& os, extension_t const& self) -> decltype(os<<"[]") { -// if(self.empty()) { -// return os << static_cast const&>(self); -// } -// if(self.first() == 0) { -// return os <<"["<< self.last() <<"]"; -// } -// return os << static_cast const&>(self); -// } + // constexpr auto operator==(extension_t const& other) const {return static_cast const&>(*this) == static_cast const&>(other);} + // constexpr auto operator!=(extension_t const& other) const {return static_cast const&>(*this) != static_cast const&>(other);} - [[nodiscard]] constexpr auto start () const -> IndexType {return this->first();} - [[nodiscard]] constexpr auto finish() const -> IndexType {return this->last ();} - - friend constexpr auto operator==(extension_t const& self, extension_t const& other) {return static_cast const&>(self) == static_cast const&>(other);} - friend constexpr auto operator!=(extension_t const& self, extension_t const& other) {return static_cast const&>(self) != static_cast const&>(other);} + // constexpr friend auto operator==(extension_t const& self, extension_t const& other) { return static_cast const&>(self) == static_cast const&>(other); } + // constexpr friend auto operator!=(extension_t const& self, extension_t const& other) { return static_cast const&>(self) != static_cast const&>(other); } friend constexpr auto intersection(extension_t const& ex1, extension_t const& ex2) -> extension_t { - using std::max; using std::min; + using std::max; + using std::min; + auto first = max(ex1.first(), ex2.first()); - auto const last = min(ex1.last() , ex2.last() ); + auto const last = min(ex1.last(), ex2.last()); + first = min(first, last); + return extension_t{first, last}; } }; +template +extension_t(IndexType, IndexTypeLast) -> extension_t; + +template +extension_t(IndexType) -> extension_t; + template() + 1)> constexpr auto make_extension_t(IndexType first, IndexTypeLast last) -> extension_t { return {first, last}; @@ -291,44 +315,6 @@ struct repeat { using type = TT<>; }; -//template -//constexpr auto array_size_impl(const std::array&) -// -> std::integral_constant; - -//template -//constexpr auto array_size_impl(const std::tuple&) -// -> std::integral_constant>{}>; - -//template -//using array_size = decltype(array_size_impl(std::declval())); - -//template -//constexpr auto static_size() -> std::decay_t::value)> { -// return array_size::value; -//} -//template -//constexpr auto static_size(Array const& /*unused*/) -> decltype(static_size()) { -// return static_size(); -//} - -//// TODO(correaa) consolidate with tuple_tail defined somewhere else -//template -//constexpr auto head(Tuple&& t) -//->decltype(std::get<0>(std::forward(t))) { -// return std::get<0>(std::forward(t)); } - -//template -//constexpr auto tail_impl(std::index_sequence /*012*/, [[maybe_unused]] Tuple&& t) { // [[maybe_unused]] needed by icpc "error #869: parameter "t" was never referenced" -// using boost::multi::detail::get; -// return boost::multi::detail::tuple{get(std::forward(t))...}; -//// return make_tuple(std::get(std::forward(t))...); -//} - -//template -//constexpr auto tail(Tuple const& t) { -// return tail_impl(std::make_index_sequence - 1U>(), t); -//} - } // end namespace detail template using index_extensions = typename detail::repeat::type; @@ -337,8 +323,8 @@ template constexpr auto contains(index_extensions const& iex, Tuple const& tup) { // using detail::head; // using detail::tail; - return contains(head(iex), head(tup)) and contains(tail(iex), tail(tup)); + return contains(head(iex), head(tup)) && contains(tail(iex), tail(tup)); } } // end namespace boost::multi -#endif +#endif // BOOST_MULTI_DETAIL_INDEX_RANGE_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/layout.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/layout.hpp new file mode 100644 index 0000000000..4dd952a43b --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/layout.hpp @@ -0,0 +1,858 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_LAYOUT_HPP +#define BOOST_MULTI_DETAIL_LAYOUT_HPP + +#include +#include +#include + +#include + +#include +#include // for make_signed_t +#include // for swap + +#if defined(__NVCC__) +#define BOOST_MULTI_HD __host__ __device__ +#else +#define BOOST_MULTI_HD +#endif + +namespace boost::multi { + +namespace detail { + +template +constexpr auto tuple_tail_impl(Tuple&& tup, std::index_sequence /*012*/) { + (void)tup; // workaround bug warning in nvcc + using boost::multi::detail::get; + return boost::multi::detail::tuple{std::forward(std::forward(tup)))>(get(std::forward(tup)))...}; +} + +template +constexpr auto tuple_tail(Tuple&& t) // NOLINT(readability-identifier-length) std naming +->decltype(tuple_tail_impl(std::forward(t), std::make_index_sequence> - 1U>())) { + return tuple_tail_impl(std::forward(t), std::make_index_sequence> - 1U>()); } + +} // end namespace detail + +template struct layout_t; + +template +struct extensions_t : boost::multi::detail::tuple_prepend_t::base_> { + using base_ = boost::multi::detail::tuple_prepend_t::base_>; + + private: + base_ impl_; + + public: + static constexpr dimensionality_type dimensionality = D; + + extensions_t() = default; + using nelems_type = multi::index; + + template = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(multi::size_t size) : extensions_t{index_extension{size}} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : allow terse syntax + + template = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(index_extension ext1) : base_{ext1} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) allow terse syntax + + template = 0> + constexpr extensions_t(index_extension ext1, index_extension ext2) : base_{ext1, ext2} {} + + template = 0> + constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3) : base_{ext1, ext2, ext3} {} + + template = 0> + constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4) noexcept : base_{ext1, ext2, ext3, ext4} {} + + template = 0> + constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4, index_extension ext5) : base_{ext1, ext2, ext3, ext4, ext5} {} + + template = 0> + constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4, index_extension ext5, index_extension ext6) : base_{ext1, ext2, ext3, ext4, ext5, ext6} {} + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(::std::tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(::std::tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(::std::tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(::std::tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template{}}), std::enable_if_t = 0> + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(::std::tuple extensions) : base_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template + constexpr explicit extensions_t(tuple const& tup) + : extensions_t(tup, std::make_index_sequence(D)>()) {} + + constexpr extensions_t(index_extension const& extension, typename layout_t::extensions_type const& other) + : extensions_t(tuple{extension, other.base()}) {} + + constexpr auto base() const& -> base_ const& {return *this;} // impl_;} + + friend constexpr auto operator*(index_extension const& extension, extensions_t const& self) -> extensions_t { + // return extensions_t(tuple(extension, self.base())); + return extensions_t(extension, self); + } + + friend BOOST_MULTI_HD auto operator==(extensions_t const& self, extensions_t const& other) {return self.base() == other.base();} + friend BOOST_MULTI_HD auto operator!=(extensions_t const& self, extensions_t const& other) {return self.base() != other.base();} + + using indices_type = multi::detail::tuple_prepend_t::indices_type>; + + [[nodiscard]] constexpr auto from_linear(nelems_type const& n) const -> indices_type { + auto const sub_num_elements = extensions_t{static_cast(*this).tail()}.num_elements(); + #if !(defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)) + assert( sub_num_elements != 0 ); // clang hip doesn't allow assert in host device functions + #endif + return multi::detail::ht_tuple(n/sub_num_elements, extensions_t{static_cast(*this).tail()}.from_linear(n%sub_num_elements)); + } + + friend constexpr auto operator%(nelems_type idx, extensions_t const& extensions) {return extensions.from_linear(idx);} + + constexpr explicit operator bool() const {return ! layout_t{*this}.empty();} + + template + constexpr auto to_linear(index const& idx, Indices const&... rest) const { + auto const sub_extensions = extensions_t{this->base().tail()}; + return idx*sub_extensions.num_elements() + sub_extensions.to_linear(rest...); + } + template + constexpr auto operator()(index idx, Indices... rest) const {return to_linear(idx, rest...);} + + constexpr auto operator[](index idx) const + ->decltype(std::declval()[idx]) { + return static_cast(*this)[idx]; } + + template + constexpr auto next_canonical(index& idx, Indices&... rest) const -> bool { // NOLINT(google-runtime-references) idx is mutated + if(extensions_t{this->base().tail()}.next_canonical(rest...)) {++idx;} + if(idx == this->base().head().last()) { + idx = this->base().head().first(); + return true; + } + return false; + } + template + constexpr auto prev_canonical(index& idx, Indices&... rest) const -> bool { // NOLINT(google-runtime-references) idx is mutated + if(extensions_t{this->base().tail()}.prev_canonical(rest...)) {--idx;} + if(idx < this->base().head().first()) { + idx = this->base().head().back(); + return true; + } + return false; + } + + private: + template + void serialize_impl_(Archive& arxiv, std::index_sequence /*unused012*/) { + using boost::multi::detail::get; + (void)std::initializer_list{(arxiv & multi::archive_traits::make_nvp("extension", get(this->base())) , 0U)...}; + } + + public: + template + void serialize(Archive& arxiv, unsigned int const /*version*/) { + serialize_impl_(arxiv, std::make_index_sequence(D)>()); + } + + private: + template(std::declval())...})> + constexpr extensions_t(Array const& tup, std::index_sequence /*unused012*/) : base_{boost::multi::detail::get(tup)...} {} + + static constexpr auto multiply_fold_() -> size_type {return static_cast(1U);} + static constexpr auto multiply_fold_(size_type const& size) -> size_type {return size;} + template + static constexpr auto multiply_fold_(size_type const& size, As const&... rest) -> size_type {return size*static_cast(multiply_fold_(rest...));} + + template constexpr auto num_elements_impl_(std::index_sequence /*unused012*/) const -> size_type { + using boost::multi::detail::get; + return static_cast(multiply_fold_(static_cast(get(this->base()).size())...)); + } + + public: + constexpr auto num_elements() const -> size_type { + return static_cast(num_elements_impl_(std::make_index_sequence(D)>())); + } + friend constexpr auto intersection(extensions_t const& self, extensions_t const& other) -> extensions_t{ + using boost::multi::detail::get; + return extensions_t{ + tuple{ + index_extension{intersection(get<0>(self.base()), get<0>(other.base()))}, + intersection( extensions_t{self.base().tail()}, extensions_t{other.base().tail()} ).base() + } + }; + } + + template =0> + friend constexpr auto get(extensions_t const& self) -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(self.base()); + } + + template =0> + constexpr auto get() const -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(this->base()); + } + + // constexpr operator base_ const&() const { return impl_; } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) TODO(correaa) use inheritance from tuple to avoid this and implement get in std:: +}; + +template<> struct extensions_t<0> : tuple<> { + using base_ = tuple<>; + + private: + // base_ impl_; + + public: + static constexpr dimensionality_type dimensionality = 0; // TODO(correaa): consider deprecation + + using rank = std::integral_constant; + + using nelems_type = index; + + explicit extensions_t(tuple<> const& tup) : base_{tup} {} + + extensions_t() = default; + + constexpr auto base() const -> base_ const& {return *this;} + + template static void serialize(Archive&/*ar*/, unsigned /*version*/) {/*noop*/} + + static constexpr auto num_elements() /*const*/ -> size_type {return 1;} + + using indices_type = tuple<>; + + [[nodiscard]] static constexpr auto from_linear(nelems_type const& n) /*const*/ -> indices_type { + assert(n == 0); (void)n; // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : constexpr function + return indices_type{}; + } + friend constexpr auto operator%(nelems_type const& n, extensions_t const& /*s*/) -> tuple<> {return /*s.*/from_linear(n);} + + static constexpr auto to_linear() /*const*/ -> difference_type {return 0;} + constexpr auto operator()() const {return to_linear();} + + constexpr void operator[](index) const = delete; + + static constexpr auto next_canonical() /*const*/ -> bool {return true;} + static constexpr auto prev_canonical() /*const*/ -> bool {return true;} + + friend constexpr auto intersection(extensions_t const& /*x1*/, extensions_t const& /*x2*/) -> extensions_t {return {};} + + constexpr BOOST_MULTI_HD auto operator==(extensions_t const& /*other*/) const {return true ;} + constexpr BOOST_MULTI_HD auto operator!=(extensions_t const& /*other*/) const {return false;} + + template // TODO(correaa) = detele ? + friend constexpr auto get(extensions_t const& self) -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(self.base()); + } + + template // TODO(correaa) = detele ? + constexpr auto get() const -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(this->base()); + } + +}; + +template<> struct extensions_t<1> : tuple { + using base_ = tuple; + + static constexpr auto dimensionality = 1; // TODO(correaa): consider deprecation + + using nelems_type = index; + + // cppcheck-suppress noExplicitConstructor ; to allow terse syntax (compatible with std::vector(int) constructor + constexpr extensions_t(multi::size_t size) : base_{multi::index_extension{0, size}} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(tuple extensions) : base_{static_cast(extensions.head())} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) + constexpr extensions_t(multi::index_extension const& other) : base_{other} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) allow terse syntax + + constexpr explicit extensions_t(base_ tup) : base_{tup} {} + + extensions_t() = default; + constexpr auto base() const -> base_ const& {return *this;} + + BOOST_MULTI_HD constexpr auto operator==(extensions_t const& other) const -> bool {return base() == other.base();} // when compiling as cuda code, this needs --expt-relaxed-constexpr + BOOST_MULTI_HD constexpr auto operator!=(extensions_t const& other) const -> bool {return base() != other.base();} + + constexpr auto num_elements() const -> size_type { + return this->base().head().size(); + } + + using indices_type = multi::detail::tuple; + + [[nodiscard]] constexpr auto from_linear(nelems_type const& n) const -> indices_type { // NOLINT(readability-convert-member-functions-to-static) TODO(correaa) + return indices_type{n}; + } + + friend + constexpr auto operator%(nelems_type idx, extensions_t const& extensions) + -> multi::detail::tuple { + return extensions.from_linear(idx); + } + + static constexpr auto to_linear(index const& idx) -> difference_type /*const*/ {return idx;} + constexpr auto operator()(index const& idx) const -> difference_type {return to_linear(idx);} + constexpr auto operator[](index idx) const { + return multi::detail::tuple{std::get<0>(this->base())[idx]}; + } + + template + constexpr auto next_canonical(index& idx) const -> bool { // NOLINT(google-runtime-references) idx is mutated + // using boost::multi::detail::get; + if(idx == ::boost::multi::detail::get<0>(this->base()).back()) { + idx = ::boost::multi::detail::get<0>(this->base()).first(); + return true; + } + ++idx; + return false; + } + constexpr auto prev_canonical(index& idx) const -> bool { // NOLINT(google-runtime-references) idx is mutated + using boost::multi::detail::get; + if(idx == get<0>(this->base()).first()) { + idx = get<0>(this->base()).back(); + return true; + } + --idx; + return false; + } + + friend auto intersection(extensions_t const& self, extensions_t const& other) { + return extensions_t{ + intersection( + boost::multi::detail::get<0>(self .base()), + boost::multi::detail::get<0>(other.base()) + ) + }; + } + template + void serialize(Archive& arxiv, unsigned /*version*/) { + using boost::multi::detail::get; + auto& extension_ = get<0>(this->base()); + arxiv & multi::archive_traits::make_nvp("extension", extension_); + } + + template =0> + constexpr auto get() const -> decltype(auto) { // -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(this->base()); + } + + template =0> + friend constexpr auto get(extensions_t const& self) -> decltype(auto) { // -> typename std::tuple_element::type { + using boost::multi::detail::get; + return get(self.base()); + } +}; + +template using iextensions = extensions_t; + +template +constexpr auto array_size_impl(boost::multi::extensions_t const&) + -> std::integral_constant(D)>; + +} // end namespace boost::multi + +// Some versions of Clang throw warnings that stl uses class std::tuple_size instead +// of struct std::tuple_size like it should be +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wmismatched-tags" +#endif + +template +struct std::tuple_size> // NOLINT(cert-dcl58-cpp) to implement structured binding +: std::integral_constant(D)> {}; + +template<> +struct std::tuple_element<0, boost::multi::extensions_t<0>> { // NOLINT(cert-dcl58-cpp) to implement structured binding + using type = void; +}; + +template +struct std::tuple_element> { // NOLINT(cert-dcl58-cpp) to implement structured binding + using type = typename std::tuple_element::base_>::type; +}; + +namespace std { + +// clang wants tuple_size to be a class, not a struct with -Wmismatched-tags +#if !defined(__GLIBCXX__) || (__GLIBCXX__ <= 20190406) +template<> struct tuple_size> : std::integral_constant {}; +template<> struct tuple_size> : std::integral_constant {}; +template<> struct tuple_size> : std::integral_constant {}; +template<> struct tuple_size> : std::integral_constant {}; +template<> struct tuple_size> : std::integral_constant {}; +template<> struct tuple_size> : std::integral_constant {}; +#else +template<> class tuple_size> : public std::integral_constant {}; +template<> class tuple_size> : public std::integral_constant {}; +template<> class tuple_size> : public std::integral_constant {}; +template<> class tuple_size> : public std::integral_constant {}; +template<> class tuple_size> : public std::integral_constant {}; +template<> class tuple_size> : public std::integral_constant {}; +#endif + +#if !defined(_MSC_VER) && (!defined(__GLIBCXX__) || (__GLIBCXX__ <= 20240707) ) +template +constexpr auto get(boost::multi::extensions_t const& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get, gcc workaround +->decltype(tp.template get()) { + return tp.template get(); } + +template +constexpr auto get(boost::multi::extensions_t& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get, gcc workaround +->decltype(tp.template get()) { + return tp.template get(); } + +template +constexpr auto get(boost::multi::extensions_t&& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get, gcc workaround +->decltype(std::move(tp).template get()) { + return std::move(tp).template get(); } +#endif + +} // end namespace std + +namespace boost::multi { + +struct monostate : equality_comparable { + friend BOOST_MULTI_HD constexpr auto operator==(monostate const& /*self*/, monostate const& /*other*/) {return true;} +}; + +template +struct layout_t<0, SSize> +: multi::equality_comparable > +{ + using dimensionality_type = multi::dimensionality_type; + using rank = std::integral_constant; + + using size_type = SSize; + using difference_type = std::make_signed_t; + using index = difference_type; + using index_extension = multi::index_extension; + using index_range = multi::range; + + using sub_type = monostate; + using stride_type = monostate; + using offset_type = index; + using nelems_type = index; + + using strides_type = tuple<>; + using offsets_type = tuple<>; + using nelemss_type = tuple<>; + + using extension_type = void; + + using extensions_type = extensions_t; + using sizes_type = tuple<>; + + static constexpr dimensionality_type rank_v = rank::value; + static constexpr dimensionality_type dimensionality = rank_v; // TODO(correaa) : consider deprecation + + friend constexpr auto dimensionality(layout_t const& /*self*/) {return rank_v;} + + private: + sub_type sub_ = {}; // TODO(correaa) use [[no_unique_address]] in C++20 + stride_type stride_ = {}; // TODO(correaa) use [[no_unique_address]] in C++20 + offset_type offset_ = 0; + nelems_type nelems_ = 1; // TODO(correaa) : or std::numeric_limits::max(); ? + + template friend struct layout_t; + + public: + layout_t() = default; + BOOST_MULTI_HD constexpr explicit layout_t(extensions_type const& /*nil*/) {} + BOOST_MULTI_HD constexpr explicit layout_t(extensions_type const& /*nil*/, strides_type const& /*nil*/) {} + + BOOST_MULTI_HD constexpr layout_t(sub_type sub, stride_type stride, offset_type offset, nelems_type nelems) // NOLINT(bugprone-easily-swappable-parameters) + : sub_{sub}, stride_{stride}, offset_{offset}, nelems_{nelems} {} + + [[nodiscard]] constexpr auto extensions() const {return extensions_type{};} + friend constexpr auto extensions(layout_t const& self) {return self.extensions();} + + [[nodiscard]] constexpr auto num_elements() const {return nelems_;} + friend constexpr auto num_elements(layout_t const& self) {return self.num_elements();} + + [[nodiscard]] constexpr auto sizes() const {return tuple<>{};} + friend constexpr auto sizes(layout_t const& self) {return self.sizes();} + + [[nodiscard]] constexpr auto strides() const {return strides_type{};} + [[nodiscard]] constexpr auto offsets() const {return offsets_type{};} + [[nodiscard]] constexpr auto nelemss() const {return nelemss_type{};} + + constexpr auto operator()() const {return offset_;} + constexpr explicit operator offset_type() const {return offset_;} + + constexpr auto stride() const -> stride_type = delete; + constexpr auto offset() const -> offset_type {return offset_;} + constexpr auto nelems() const -> nelems_type {return nelems_;} + constexpr auto sub() const -> sub_type = delete; + + constexpr auto size() const -> size_type = delete; + constexpr auto extension() const -> extension_type = delete; + + constexpr auto is_empty() const noexcept {return nelems_ == 0;} + [[nodiscard/*for c++20 ("empty checks for emptyness")*/]] + constexpr auto empty() const noexcept {return nelems_ == 0;} + friend + constexpr auto empty(layout_t const& self) noexcept {return self.empty();} + + [[deprecated("is going to be removed")]] + constexpr auto is_compact() const -> bool = delete; + + constexpr auto base_size() const -> size_type {return 0;} + constexpr auto origin() const -> offset_type {return 0;} + + constexpr auto reverse() -> layout_t& {return *this;} + // [[deprecated("use two arg version")]] constexpr auto scale(size_type /*size*/) const {return *this;} + constexpr auto scale(size_type /*num*/, size_type /*den*/) const {return *this;} + +// friend constexpr auto operator!=(layout_t const& self, layout_t const& other) {return not(self == other);} + friend BOOST_MULTI_HD constexpr auto operator==(layout_t const& self, layout_t const& other) { + return + std::tie(self .sub_, self .stride_, self .offset_, self .nelems_) + == std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) + ; + } + constexpr auto operator< (layout_t const& other) const -> bool { + return std::tie(offset_, nelems_) < std::tie(other.offset_, other.nelems_); + } + + constexpr auto rotate() -> layout_t& {return *this;} + constexpr auto unrotate() -> layout_t& {return *this;} + + constexpr auto hull_size() const -> size_type {return num_elements();} // not in bytes +}; + +template +struct layout_t +: multi::equality_comparable> +{ + using dimensionality_type = multi::dimensionality_type; + using rank = std::integral_constant; + + using sub_type = layout_t; + using size_type = SSize; + using difference_type = std::make_signed_t; + using index = difference_type; + + using index_extension = multi::index_extension; + using index_range = multi::range; + + using stride_type = index; + using offset_type = index; + using nelems_type = index; + + using strides_type = typename boost::multi::detail::tuple_prepend::type; + using offsets_type = typename boost::multi::detail::tuple_prepend::type; + using nelemss_type = typename boost::multi::detail::tuple_prepend::type; + + using extension_type = index_extension; // not index_range! + + using extensions_type = extensions_t; + using sizes_type = typename boost::multi::detail::tuple_prepend::type; + + static constexpr dimensionality_type rank_v = rank::value; + static constexpr dimensionality_type dimensionality = rank_v; // TODO(correaa): consider deprecation + + [[deprecated("for compatibility with Boost.MultiArray, use static `dimensionality` instead")]] + static constexpr auto num_dimensions() {return dimensionality;} // NOSONAR(cpp:S1133) + + friend constexpr auto dimensionality(layout_t const& /*self*/) {return rank_v;} + + private: + sub_type sub_ = {}; + stride_type stride_ = 1; // or std::numeric_limits::max()? + offset_type offset_ = 0; + nelems_type nelems_ = 0; + + template friend struct layout_t; + + public: + layout_t() = default; + BOOST_MULTI_HD constexpr explicit layout_t(extensions_type const& extensions) : + sub_{ + std::apply( + [](auto const&... subextensions) {return multi::extensions_t{subextensions...};}, + detail::tail(extensions.base()) + ) + }, + stride_{sub_.num_elements()?sub_.num_elements():1}, + offset_{boost::multi::detail::get<0>(extensions.base()).first()*stride_}, + nelems_{boost::multi::detail::get<0>(extensions.base()).size()*sub().num_elements()} + {} + + BOOST_MULTI_HD constexpr explicit layout_t(extensions_type const& extensions, strides_type const& strides) : + sub_{ + std::apply( + [](auto const&... subextensions) {return multi::extensions_t{subextensions...};}, + detail::tail(extensions.base()) + ), + detail::tail(strides) + }, + stride_{boost::multi::detail::get<0>(strides)}, + offset_{boost::multi::detail::get<0>(extensions.base()).first()*stride_}, + nelems_{boost::multi::detail::get<0>(extensions.base()).size()*sub().num_elements()} + {} + + BOOST_MULTI_HD constexpr explicit layout_t(sub_type sub, stride_type stride, offset_type offset, nelems_type nelems) // NOLINT(bugprone-easily-swappable-parameters) + : sub_{sub}, stride_{stride}, offset_{offset}, nelems_{nelems} {} + + constexpr auto origin() const {return sub_.origin() - offset_;} + + private: + constexpr auto at_aux_(index idx) const { + return sub_type{sub_.sub_, sub_.stride_, sub_.offset_ + offset_ + idx*stride_, sub_.nelems_}(); + } + + public: + constexpr auto operator[](index idx) const {return at_aux_(idx);} + + template + constexpr auto operator()(index idx, Indices... rest) const {return operator[](idx)(rest...);} + constexpr auto operator()(index idx) const {return at_aux_(idx);} + constexpr auto operator()() const {return *this;} + + BOOST_MULTI_HD constexpr auto sub() & -> sub_type & {return sub_ ;} + BOOST_MULTI_HD constexpr auto sub() const& -> sub_type const& {return sub_ ;} + friend BOOST_MULTI_HD constexpr auto sub(layout_t const& self) -> sub_type const& {return self.sub();} + + BOOST_MULTI_HD constexpr auto nelems() & -> nelems_type & {return nelems_ ;} + BOOST_MULTI_HD constexpr auto nelems() const& -> nelems_type const& {return nelems_ ;} + friend BOOST_MULTI_HD constexpr auto nelems(layout_t const& self) -> nelems_type const& {return self.nelems();} + + constexpr BOOST_MULTI_HD auto nelems(dimensionality_type dim) const {return (dim != 0)?sub_.nelems(dim - 1):nelems_;} + + friend BOOST_MULTI_HD constexpr auto operator==(layout_t const& self, layout_t const& other) -> bool { + return + std::tie(self .sub_, self .stride_, self .offset_, self. nelems_) + == std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) + ; + } + constexpr BOOST_MULTI_HD auto operator< (layout_t const& other) const -> bool { + return + std::tie( sub_, stride_, offset_, nelems_) + < std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) + ; + } + + constexpr auto reindex(index idx) -> layout_t& {offset_ = idx*stride_; return *this;} + template + constexpr auto reindex(index idx, Indices... rest) -> layout_t& {reindex(idx).rotate().reindex(rest...).unrotate(); return *this;} + + constexpr auto num_elements() const noexcept -> size_type {return size()*sub_.num_elements();} + friend constexpr auto num_elements(layout_t const& self) noexcept -> size_type {return self.num_elements();} + + constexpr auto is_empty() const noexcept {return nelems_ == 0;} + friend constexpr auto is_empty(layout_t const& self) noexcept {return self.is_empty();} + + constexpr auto empty() const noexcept {return is_empty();} + + friend constexpr auto size(layout_t const& self) noexcept -> size_type {return self.size();} + constexpr auto size() const noexcept -> size_type { + // if(nelems_ == 0) {return 0;} + BOOST_MULTI_ACCESS_ASSERT(stride_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + // if(nelems_ != 0) {MULTI_ACCESS_ASSERT(stride_ != 0);} + // return nelems_ == 0?0:nelems_/stride_; + return nelems_/stride_; + } + + constexpr BOOST_MULTI_HD auto stride() -> stride_type & {return stride_;} + constexpr BOOST_MULTI_HD auto stride() const -> stride_type const& {return stride_;} + + friend BOOST_MULTI_HD constexpr auto stride(layout_t const& self) -> index {return self.stride();} + + BOOST_MULTI_HD constexpr auto strides() const -> strides_type {return strides_type{stride(), sub_.strides()};} + friend BOOST_MULTI_HD constexpr auto strides(layout_t const& self) -> strides_type {return self.strides();} + + constexpr BOOST_MULTI_HD auto offset(dimensionality_type dim) const -> index {return (dim != 0)?sub_.offset(dim - 1):offset_;} + BOOST_MULTI_HD constexpr auto offset() const -> index {return offset_;} + friend BOOST_MULTI_HD constexpr auto offset(layout_t const& self) -> index {return self.offset();} + constexpr BOOST_MULTI_HD auto offsets() const {return boost::multi::detail::tuple{offset(), sub_.offsets()};} + constexpr BOOST_MULTI_HD auto nelemss() const {return boost::multi::detail::tuple{nelems(), sub_.nelemss()};} + + constexpr auto base_size() const {using std::max; return max(nelems_, sub_.base_size());} + + constexpr auto is_compact() const& {return base_size() == num_elements();} + friend constexpr auto is_compact(layout_t const& self) {return self.is_compact();} + + constexpr auto shape() const& -> decltype(auto) {return sizes();} + friend constexpr auto shape(layout_t const& self) -> decltype(auto) {return self.shape();} + + constexpr BOOST_MULTI_HD auto sizes() const noexcept {return tuple{size(), sub_.sizes()};} + + friend constexpr auto extension(layout_t const& self) {return self.extension();} + [[nodiscard]] constexpr auto extension() const -> extension_type { + if(nelems_ == 0) {return index_extension{};} + assert(stride_ != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function + assert(offset_ % stride_ == 0); + assert(nelems_ % stride_ == 0); + return index_extension{offset_/stride_, (offset_ + nelems_)/stride_}; + } + + constexpr auto extensions() const {return extensions_type{tuple{extension(), sub_.extensions().base()}};} // tuple_cat(make_tuple(extension()), sub_.extensions().base())};} + friend constexpr auto extensions(layout_t const& self) -> extensions_type {return self.extensions();} + +// [[deprecated("use get(m.extensions()")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output + constexpr auto extension(dimensionality_type dim) const {return std::apply([](auto... extensions) {return std::array(D)>{extensions...};}, extensions().base()).at(static_cast(dim));} // cppcheck-suppress syntaxError ; bug in cppcheck 2.14 +// [[deprecated("use get(m.strides()) ")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output + constexpr auto stride (dimensionality_type dim) const {return std::apply([](auto... strides ) {return std::array(D)>{strides ...};}, strides () ).at(static_cast(dim));} +// [[deprecated("use get(m.sizes()) ")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output +// constexpr auto size (dimensionality_type dim) const {return std::apply([](auto... sizes ) {return std::array(D)>{sizes ...};}, sizes () ).at(static_cast(dim));} + + template + constexpr auto partition(Size const& count) -> layout_t& { + using std::swap; + stride_ *= count; + nelems_ *= count; + sub_.partition(count); + return *this; + } + + constexpr auto transpose() -> layout_t& { + using std::swap; + swap(stride_, sub_.stride_); + swap(offset_, sub_.offset_); + swap(nelems_, sub_.nelems_); + return *this; + } + constexpr auto reverse() -> layout_t& { + unrotate(); + sub_.reverse(); + return *this; + } + + constexpr auto rotate() -> layout_t& {if constexpr(D > 1) {transpose(); sub_. rotate();} return *this;} + constexpr auto unrotate() -> layout_t& {if constexpr(D > 1) {sub_.unrotate(); transpose();} return *this;} + + constexpr auto hull_size() const -> size_type { + if(is_empty()) {return 0;} + return std::abs(size()*stride())>std::abs(sub_.hull_size())?size()*stride():sub_.hull_size(); + } + + [[deprecated("use two arg version")]] constexpr auto scale(size_type factor) const { + return layout_t{sub_.scale(factor), stride_*factor, offset_*factor, nelems_*factor}; + } + + constexpr auto scale(size_type num, size_type den) const { + assert( (stride_*num) % den == 0 ); + return layout_t{sub_.scale(num, den), stride_*num/den, offset_*num/den, nelems_*num/den}; + } +}; + +constexpr auto +operator*(layout_t<0>::index_extension const& extensions_0d, layout_t<0>::extensions_type const& /*zero*/) +-> typename layout_t<1>::extensions_type { + return typename layout_t<1>::extensions_type{tuple::index_extension>{extensions_0d}}; +} + +constexpr auto operator*(extensions_t<1> const& extensions_1d, extensions_t<1> const& self) { + using boost::multi::detail::get; + return extensions_t<2>({get<0>(extensions_1d.base()), get<0>(self.base())}); +} + +} // end namespace boost::multi + +namespace boost::multi { + + template + struct convertible_tuple : Tuple { + using Tuple::Tuple; + explicit convertible_tuple(Tuple const& other) : Tuple(other) {} + + public: + using array_type = std::array::value>; + auto to_array() const noexcept { + return std::apply([](auto... es) noexcept { + return std::array, sizeof...(es)>{{static_cast(es) ...}}; + }, static_cast(*this)); + } + + /*explicit*/ operator array_type() const& noexcept {return to_array();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + /*explicit*/ operator array_type() && noexcept {return to_array();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + [[deprecated("dangling conversion")]] + operator std::ptrdiff_t const*() const { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + #ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wreturn-stack-address" + #endif + return to_array().data(); + #ifdef __clang__ + #pragma clang diagnostic pop + #endif + } + + template), int> =0> + friend constexpr auto get(convertible_tuple const& self) -> typename std::tuple_element::type { + return std::get(static_cast(self)); + } + }; + + template + struct decaying_array : Array { + using Array::Array; + explicit decaying_array(Array const& other) : Array(other) {} + + [[deprecated("possible dangling conversion, use `std::array p` instead of `auto* p`")]] + constexpr operator std::ptrdiff_t const*() const {return Array::data();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + template), int> =0> + friend constexpr auto get(decaying_array const& self) -> typename std::tuple_element::type { + return std::get(static_cast(self)); + } + }; +} // end namespace boost::multi + +template struct std::tuple_size> : std::integral_constant> {}; // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple size +template struct std::tuple_size> : std::integral_constant> {}; // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple size + +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_DETAIL_LAYOUT_HPP diff --git a/external_codes/boost_multi/multi/include/multi/detail/memory.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/memory.hpp similarity index 50% rename from external_codes/boost_multi/multi/include/multi/detail/memory.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/memory.hpp index 6565148a30..dfeb09a590 100644 --- a/external_codes/boost_multi/multi/include/multi/detail/memory.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/memory.hpp @@ -1,41 +1,36 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_DETAIL_MEMORY_HPP -#define MULTI_DETAIL_MEMORY_HPP +#ifndef BOOST_MULTI_DETAIL_MEMORY_HPP +#define BOOST_MULTI_DETAIL_MEMORY_HPP +#pragma once #include // for std::allocator_traits +#include // for std::void_t namespace boost::multi { template -struct allocator_traits : std::allocator_traits { -#if 0 - template - static auto construct(Alloc& alloc, Ptr p, Args&&... args) // NOLINT(readability-identifier-length) std naming - ->decltype(alloc.construct(p, std::forward(args)...)) { - return alloc.construct(p, std::forward(args)...); } - - template - static auto destroy(Alloc& alloc, Ptr p) // NOLINT(readability-identifier-length) std naming - ->decltype(alloc.destroy(p)) { - return alloc.destroy(p); } -#endif -}; +struct allocator_traits : std::allocator_traits {}; // https://en.cppreference.com/w/cpp/memory/destroy -template::value, int> = 0> +template< + class Alloc, class ForwardIt, + std::enable_if_t::value, int> =0 +> void destroy(Alloc& alloc, ForwardIt first, ForwardIt last) { for(; first != last; ++first) {allocator_traits::destroy(alloc, std::addressof(*first));} // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm } -template::value and ForwardIt::rank_v == 1, int> = 0> +template::value && ForwardIt::rank_v == 1, int> = 0> void destroy(Alloc& alloc, ForwardIt first, ForwardIt last) { // using multi::to_address; - for(; first != last; ++first) {alloc.destroy(to_address(first));} // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + std::for_each(first, last, [&](auto& elem) {alloc.destroy(addressof(elem));}); + // for(; first != last; ++first) {alloc.destroy(to_address(first));} // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm } -template::value and ForwardIt::rank_v != 1, int> = 0> +template::value && ForwardIt::rank_v != 1, int> = 0> void destroy(Alloc& alloc, ForwardIt first, ForwardIt last) { for(; first != last; ++first) {destroy(alloc, begin(*first), end(*first));} // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm } @@ -67,7 +62,7 @@ auto uninitialized_default_construct_n(Alloc& alloc, ForwardIt first, Size n) -> template< class Alloc, class ForwardIt, class Size, typename T = typename std::iterator_traits::value_type, - typename = std::enable_if_t{}> + typename = std::enable_if_t{}> > auto uninitialized_value_construct_n(Alloc& alloc, ForwardIt first, Size n) -> ForwardIt { ForwardIt current = first; // using std::addressof; @@ -75,7 +70,7 @@ auto uninitialized_value_construct_n(Alloc& alloc, ForwardIt first, Size n) -> F for(; n > 0; ++current, --n) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm allocator_traits::construct(alloc, to_address(current), T{}); } - // a.construct(to_address(current), T()); // a.construct(std::pointer_traits::pointer_to(*current), T()); // AT::construct(a, to_address(current), T()); // AT::construct(a, addressof(*current), T()); // a.construct(addressof(*current), T()); + // a.construct(to_address(current), T()); // a.construct(std::pointer_traits::pointer_to(*current), T()); // AT::construct(a, to_address(current), T()); // AT::construct(a, addressof(*current), T()); // a.construct(addressof(*current), T()); return current; } catch(...) {destroy(alloc, first, current); throw;} } @@ -87,7 +82,7 @@ template auto std_copy(Args&&... args) { namespace xtd { -template{}> > +template{}> > auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, MIt dest) -> MIt { MIt current = dest; // using multi::to_address; @@ -99,49 +94,26 @@ auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, MIt des } // end namespace xtd -// // https://en.cppreference.com/w/cpp/memory/destroy_at -// template > -// void destroy_at(Alloc& a, T* p) {AT::destroy(a, p);} - -// // https://en.cppreference.com/w/cpp/memory/destroy_n -// template // , typename AT = typename std::allocator_traits > -// auto destroy_n(Alloc& a, ForwardIt first, Size n) -> ForwardIt { -// // using std::addressof; -// for(; n > 0; ++first, --n) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm -// allocator_traits::destroy(a, to_address(first)); -// } -// return first; -// } +template +struct is_allocator : std::false_type {}; -template class is_allocator { - template< - class A, - class P = typename A::pointer, class S = typename A::size_type, - typename = decltype( - std::declval() == A{std::declval()}, - std::declval().deallocate(P{std::declval().allocate(std::declval())}, std::declval()) - ) - > - static auto aux(A const&) -> std::true_type; - static auto aux(... ) -> std::false_type; - - public: - static bool const value = decltype(aux(std::declval()))::value; - constexpr explicit operator bool() const {return value;} -}; +template +struct is_allocator() == Alloc{std::declval()}, + std::declval().deallocate(typename Alloc::pointer{std::declval().allocate(std::declval())}, std::declval()) +)>> : std::true_type {}; template constexpr bool is_allocator_v = is_allocator::value; -template -auto uninitialized_copy(InputIt first, InputIt last, ForwardIt dest) { - while(first!=last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - uninitialized_copy(begin(*first), end(*first), begin(*dest)); - ++first; - ++dest; - } - return dest; -} +// template +// auto uninitialized_copy(InputIt first, InputIt last, ForwardIt dest) { +// while(first!=last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm +// uninitialized_copy(begin(*first), end(*first), begin(*dest)); +// ++first; +// ++dest; +// } +// return dest; +// } } // end namespace boost::multi - -#endif +#endif // BOOST_MULTI_DETAIL_MEMORY_HPP diff --git a/external_codes/boost_multi/multi/include/multi/detail/monotonic_allocator_.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/monotonic_allocator_.hpp similarity index 97% rename from external_codes/boost_multi/multi/include/multi/detail/monotonic_allocator_.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/monotonic_allocator_.hpp index d6e2503d17..4e4624d43c 100644 --- a/external_codes/boost_multi/multi/include/multi/detail/monotonic_allocator_.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/monotonic_allocator_.hpp @@ -17,7 +17,7 @@ namespace multi { template struct block : std::pointer_traits{ - template block(char(&t)[N]) : start_{t}, lenght_{N} {} + template block(char(&t)[N]) : start_{t}, lenght_{N} {} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) support legacy c-arrays typename block::pointer start_; typename block::size_type lenght_; bool contains(typename block::pointer p) const{ @@ -33,7 +33,8 @@ class monotonic_buffer : block { size_type allocated_bytes_ = 0; size_type deallocated_bytes_ = 0; // size_type position_ = 0; - static std::size_t max_alignment = A; + + static std::size_t const max_alignment = A; static size_type align_up(size_type n) noexcept { return (n + (max_alignment-1)) & ~(max_alignment-1); diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/operators.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/operators.hpp new file mode 100644 index 0000000000..3bd9709a62 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/operators.hpp @@ -0,0 +1,230 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_OPERATORS_HPP +#define BOOST_MULTI_DETAIL_OPERATORS_HPP +#pragma once + +#include // for enable_if +#include // for forward + +#if defined(__NVCC__) +#define BOOST_MULTI_HD __host__ __device__ +#else +#define BOOST_MULTI_HD +#endif + +namespace boost::multi { + +struct empty_base {}; + +template struct selfable { + using self_type = Self; + constexpr auto self() const -> self_type const& { return static_cast(*this); } + constexpr auto self() -> self_type& { return static_cast(*this); } + friend constexpr auto self(selfable const& self) -> self_type const& { return self.self(); } +}; + +template struct equality_comparable2; + +template +struct equality_comparable2 : selfable { + // friend constexpr auto operator==(equality_comparable2 const& self, equality_comparable2 const& other) {return self.self() == other.self() ;} + friend constexpr auto operator!=(equality_comparable2 const& self, equality_comparable2 const& other) { return !(self.self() == other.self()); } +}; + +template struct equality_comparable : equality_comparable2 {}; + +template struct totally_ordered2; + +template +struct totally_ordered2 : equality_comparable2, totally_ordered2> { + using self_type = Self; + constexpr auto self() const -> self_type const& { return static_cast(*this); } + + // friend auto operator< (totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return self.self() < other.self() ;} + friend auto operator==(totally_ordered2 const& self, totally_ordered2 const& other) -> bool { return !(self.self() < other.self()) && !(other.self() < self.self()); } + // friend auto operator!=(totally_ordered2 const& self, totally_ordered2 const& other) {return (s.self() < o.self()) or (o.self() < s.self());} + + friend auto operator<=(totally_ordered2 const& self, totally_ordered2 const& other) -> bool { return !(other.self() < self.self()); } + + friend auto operator>(totally_ordered2 const& self, totally_ordered2 const& other) -> bool { return !(self.self() < other.self()) && !(self.self() == other.self()); } + friend auto operator>=(totally_ordered2 const& self, totally_ordered2 const& other) -> bool { return !(self.self() < other.self()); } +}; + +template using totally_ordered = totally_ordered2; + +template +struct totally_ordered2 { + // template + // friend constexpr auto operator<=(T const& self, U const& other) { return (self < other) || (self == other); } + //template + //friend constexpr auto operator>=(T const& self, U const& other) { return (other < self) || (self == other); } + // template + // friend constexpr auto operator>(T const& self, U const& other) { return other < self; } +}; + +template +struct copy_constructible {}; + +template +struct weakly_incrementable { + // friend T& operator++(weakly_incrementable& t){return ++static_cast(t);} +}; + +template +struct weakly_decrementable { + // friend T& operator--(weakly_decrementable& t){return --static_cast(t);} +}; + +template struct incrementable : totally_ordered { // , self_mutable { + friend constexpr auto operator++(incrementable& self, int) -> Self { + Self tmp{self.self()}; + ++self.self(); + assert(self.self() > tmp); + return tmp; + } +}; + +template +struct decrementable : weakly_decrementable { + template>> + friend constexpr auto operator--(U& self, int) -> T { + T tmp{self}; + --self; + return tmp; + } +}; + +template +struct steppable : totally_ordered { + using self_type = Self; + constexpr auto self() const -> self_type const& { return static_cast(*this); } + constexpr auto self() -> self_type& { return static_cast(*this); } + + friend constexpr auto operator++(steppable& self, int) -> Self { + Self tmp{self.self()}; + ++self.self(); + return tmp; + } + friend constexpr auto operator--(steppable& self, int) -> Self { + Self tmp{self.self()}; + --self.self(); + return tmp; + } +}; + +template +struct affine_with_unit : steppable { // affine_with_unit > { + using self_type = Self; + constexpr auto cself() const -> self_type const& { return static_cast(*this); } + constexpr auto self() const -> self_type const& { return static_cast(*this); } + constexpr auto self() -> self_type& { return static_cast(*this); } + + using difference_type = Difference; + friend constexpr auto operator++(affine_with_unit& self) -> Self& { return self.self() += difference_type{1}; } + friend constexpr auto operator--(affine_with_unit& self) -> Self& { return self.self() -= difference_type{1}; } + + // friend constexpr auto operator-(affine_with_unit const& self, difference_type const& diff) -> Self { + // auto ret{self.self()}; + // ret += (-diff); + // return ret; + // } + constexpr auto operator+(difference_type const& diff) const -> Self { + auto ret{cself()}; + ret += diff; + return ret; + } + friend constexpr auto operator+(difference_type const& diff, affine_with_unit const& self) -> Self { + auto ret{self.self()}; + ret += diff; + return ret; + } + friend constexpr auto operator<(affine_with_unit const& self, affine_with_unit const& other) -> bool { + return difference_type{0} < other.self() - self.self(); + } +}; + +template +struct dereferenceable { + using self_type = Self; + constexpr auto self() const -> self_type const& { return static_cast(*this); } + constexpr auto self() -> self_type& { return static_cast(*this); } + + using reference = Reference; + + constexpr auto operator*() const -> reference { return *(self().operator->()); } +}; + +template +struct random_accessable // NOLINT(fuchsia-multiple-inheritance) +: affine_with_unit +, dereferenceable { + using difference_type = Difference; + using reference = Reference; + using iterator_category = std::random_access_iterator_tag; + + using self_type = Self; + constexpr auto self() const -> self_type const& { return static_cast(*this); } + constexpr auto self() -> self_type& { return static_cast(*this); } + + BOOST_MULTI_HD constexpr auto operator[](difference_type idx) const -> reference { return *(self() + idx); } +}; + +// template +// struct dereferenceable { +// using reference = Reference; +// friend constexpr auto operator*(dereferenceable const& t) -> reference {return *static_cast(t);} +// }; + +template +struct addable2 { + using difference_type = D; + template{}>> + friend constexpr auto operator+(TT&& self, difference_type const& diff) -> T { + T tmp{std::forward(self)}; + tmp += diff; + return tmp; + } + template{}>> + friend constexpr auto operator+(difference_type const& diff, TT&& self) -> T { return std::forward(self) + diff; } +}; + +template +struct subtractable2 { + using difference_type = D; + // TODO(correaa) clang 16 picks up this and converts the difference_type to TT !! + // template + // friend auto operator-(TT&& self, difference_type const& diff) -> T {T tmp{std::forward(self)}; tmp -= diff; return tmp;} +}; + +template +struct affine : addable2 +, subtractable2 { + using difference_type = Difference; +}; + +template +struct random_iterable { + constexpr auto cfront() const& -> decltype(auto) { return static_cast(*this).front(); } + constexpr auto cback() const& -> decltype(auto) { return static_cast(*this).back(); } + friend constexpr auto cfront(T const& self) -> decltype(auto) { return self.cfront(); } + friend constexpr auto cback(T const& self) -> decltype(auto) { return self.cback(); } +}; + +template +struct random_access_iterator : equality_comparable2 { + using difference_type = Difference; + using value_type = Value; + using pointer = Pointer; + using reference = Reference; + using iterator_category = std::random_access_iterator_tag; + auto operator*() const -> Reference { return *static_cast(*this); } +}; + +} // end namespace boost::multi + +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_DETAIL_OPERATORS_HPP diff --git a/external_codes/boost_multi/multi/include/multi/memory/pointer_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/pointer_traits.hpp similarity index 65% rename from external_codes/boost_multi/multi/include/multi/memory/pointer_traits.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/pointer_traits.hpp index b698a88e8a..c9c57bc490 100644 --- a/external_codes/boost_multi/multi/include/multi/memory/pointer_traits.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/pointer_traits.hpp @@ -1,8 +1,10 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa +// Copyright 2020-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_MEMORY_POINTER_TRAITS_HPP -#define MULTI_MEMORY_POINTER_TRAITS_HPP +#ifndef BOOST_MULTI_DETAIL_POINTER_TRAITS_HPP +#define BOOST_MULTI_DETAIL_POINTER_TRAITS_HPP +#pragma once #include // for size_t #include // for iterator_traits @@ -11,7 +13,7 @@ namespace boost::multi { -template struct priority_me : std::conditional_t>{}; +template struct priority_me : std::conditional_t>{}; template auto dat_aux(priority_me<0>, Pointer ) -> std::allocator::value_type>; template auto dat_aux(priority_me<1>, T* ) -> std::allocator::value_type>; @@ -22,5 +24,5 @@ struct pointer_traits/*, typename Pointer::default_allocator_type>*/ : std::poin using default_allocator_type = decltype(dat_aux(priority_me<2>{}, std::declval())); }; -} // end namespace boost::multi -#endif +} // end namespace boost::multi +#endif // BOOST_MULTI_DETAIL_POINTER_TRAITS_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/serialization.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/serialization.hpp new file mode 100644 index 0000000000..b1dda081e0 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/serialization.hpp @@ -0,0 +1,158 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_SERIALIZATION_HPP_ +#define BOOST_MULTI_DETAIL_SERIALIZATION_HPP_ + +#include // for std::for_each +#include // for std::byte +#include // for std::uint32_t +#include + +namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace archive { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace detail { + +template class common_iarchive; +template class common_oarchive; + +} // end namespace detail +} // end namespace archive + +namespace serialization { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat + +template class nvp; // dependency "in name only" +template class array_wrapper; // dependency "in name only" +struct binary_object; // dependency "in name only", if you get an error here, it means that eventually you need to include #include + +template struct version; + +// template//, std::enable_if_t>{}, int> =0> +// auto operator>>(Archive& ar, T&& t) -> Archive& {return ar>> t;} + +} // end namespace serialization +} // end namespace boost + +namespace cereal { + +template struct OutputArchive; +template struct InputArchive; + +template class NameValuePair; // dependency "in name only", if you get an error here you many need to #include at some point + +} // end namespace cereal + +namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace multi { + +template +struct archive_traits { + template + /*inline*/ static auto make_nvp(char const* /*n*/, T&& value) noexcept { return std::forward(value); } +}; + +template> && (MA::dimensionality > -1), int> = 0> +auto operator>>(Archive& arxiv, MA&& self) // this is for compatibility with Archive type + -> decltype(arxiv >> static_cast(std::forward(self))) { + return arxiv >> static_cast(std::forward(self)); +} + +template> && (MA::dimensionality > -1), int> = 0> +auto operator<<(Archive& arxiv, MA&& self) // this is for compatibility with Archive type + -> decltype(arxiv << static_cast(std::forward(self))) { + return arxiv << static_cast(std::forward(self)); +} + +template> && (MA::dimensionality > -1), int> = 0> +auto operator&(Archive& arxiv, MA&& self) // this is for compatibility with Archive type + -> decltype(arxiv & static_cast(std::forward(self))) { + return arxiv & static_cast(std::forward(self)); +} + +template +struct archive_traits, Ar> || std::is_base_of_v, Ar>>> { + template using nvp = boost::serialization::nvp; + template using array_wrapper = boost::serialization::array_wrapper; + template struct binary_object_t { + using type = boost::serialization::binary_object; + }; + + template /*inline*/ static auto make_nvp(char const* name, T& value) noexcept -> nvp const { return nvp{name, value}; } // NOLINT(readability-const-return-type) : match original boost declaration + template /*inline*/ static auto make_nvp(char const* name, T&& value) noexcept -> nvp const { return nvp{name, static_cast(std::forward(value))}; } // NOLINT(readability-const-return-type) : match original boost declaration + + template /*inline*/ static auto make_array(T* first, std::size_t size) noexcept -> array_wrapper const { return array_wrapper{first, size}; } // NOLINT(readability-const-return-type) original boost declaration + template /*inline*/ static auto make_binary_object(std::byte const* first, std::size_t size) noexcept -> const typename binary_object_t::type { return typename binary_object_t::type(first, size); } // if you get an error here you need to eventually `#include`// NOLINT(readability-const-return-type,clang-diagnostic-ignored-qualifiers) : original boost declaration +}; + +template +struct archive_traits< + Ar, + typename std::enable_if_t< + std::is_base_of_v, Ar> || std::is_base_of_v, Ar> || std::is_base_of_v, Ar> || std::is_base_of_v, Ar>>> { + using self_t = archive_traits, Ar> || std::is_base_of_v, Ar> || std::is_base_of_v, Ar> || std::is_base_of_v, Ar>>>; + + // template + // inline static auto make_nvp (char const* name, T const& value) noexcept {return cereal::NameValuePair{name, value};} // if you get an error here you many need to #include at some point // TODO(correaa) replace by cereal::make_nvp from cereal/cereal.hpp + // template + // inline static auto make_nvp (std::string const& name, T&& value) noexcept {return cereal::NameValuePair{name.c_str(), std::forward(value)};} // if you get an error here you many need to #include at some point + template + /*inline*/ static auto make_nvp(char const* name, T&& value) noexcept { return cereal::NameValuePair{name, std::forward(value)}; } // if you get an error here you many need to #include at some point + // template + // inline static auto make_nvp (char const* name, T& value) noexcept {return cereal::NameValuePair{name, value};} // if you get an error here you many need to #include at some point + + template + struct array_wrapper { + T* p_; + std::size_t c_; + + template + void serialize(Archive& arxiv, unsigned int const /*version*/) { + std::for_each( // std::for_each_n is absent in GCC 7 + p_, std::next(p_, c_), + [&arxiv](auto& item) { arxiv& make_nvp("item", item); } + ); + // for(std::size_t i = 0; i != c_; ++i) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm + // auto& item = p_[i]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + // arxiv & make_nvp("item" , item ); // "item" is the name used by Boost.Serialization XML make_array + // // arxiv & boost::multi::archive_traits::make_nvp("element", element); + // // arxiv & cereal::make_nvp("element", element); + // // arxiv & CEREAL_NVP( element); + // // arxiv & element ; + // } + } + }; + + template + /*inline*/ static auto make_array(T* ptr, std::size_t count) -> array_wrapper { return array_wrapper{ptr, count}; } + + template + /*inline*/ static auto make_nvp(char const* name, array_wrapper&& value) noexcept { return make_nvp(name, static_cast&>(std::move(value))); } +}; + +} // end namespace multi +} // end namespace boost + +namespace boost { + +template +class multi_array; + +} // end namespace boost + +namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace serialization { + +template // , class = std::enable_if_t> > +inline auto make_nvp(char const* name, T&& value) { + return boost::serialization::make_nvp(name, static_cast(std::forward(value))); +} + +} // end namespace serialization + +using boost::serialization::make_nvp; + +} // end namespace boost + +#endif // BOOST_MULTI_DETAIL_SERIALIZATION_HPP_ diff --git a/external_codes/boost_multi/multi/include/multi/detail/stack_allocator.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/stack_allocator_.hpp similarity index 100% rename from external_codes/boost_multi/multi/include/multi/detail/stack_allocator.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/stack_allocator_.hpp diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/static_allocator.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/static_allocator.hpp new file mode 100644 index 0000000000..e0f54f16f9 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/static_allocator.hpp @@ -0,0 +1,112 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_STATIC_ALLOCATOR_HPP +#define BOOST_MULTI_DETAIL_STATIC_ALLOCATOR_HPP + +#include +#include + +#include +#include +#include +#include + +namespace boost::multi::detail { + +template +class static_allocator { //NOSONAR(cpp:S4963) this allocator has special semantics + bool dirty_ = false; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4324) // Warning that the structure is padded due to the below +#endif + + BOOST_MULTI_NO_UNIQUE_ADDRESS alignas(T) std::array buffer_; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + public: + using value_type = T; + using pointer = T*; + + template struct rebind { + using other = static_allocator; + }; + + static constexpr auto max_size() noexcept -> std::size_t { return N; } + + static_allocator() = default; // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) buffer_ is not initialized + + template + static_allocator(static_allocator const& /*other*/) { // NOLINT(hicpp-explicit-conversions,google-explicit-constructor) follow std::allocator + // static_assert(sizeof(T) == sizeof(TT)); + static_assert(NN == N); + } + + static_allocator(static_allocator const& /*other*/) // std::vector makes a copy right away + // = default; // this copies the internal buffer + {} + + [[deprecated("don't move dynamic container with static_allocator")]] + static_allocator(static_allocator&& /*other*/) // this is called *by the elements* during move construction of a vector + // = delete; + // {throw std::runtime_error("don't move dynamic container with static_allocator");} // this is called *by the elements* during move construction of a vector + noexcept {} + // noexcept {std::memmove(buffer_.data(), other.buffer_.data(), sizeof(T)*N);} + // noexcept : buffer_{std::move(other.buffer_)} {} + // noexcept = default; + + [[deprecated("don't move dynamic container with static_allocator")]] + auto operator=(static_allocator const& /*other*/) -> static_allocator& + = delete; + + [[deprecated("don't move dynamic container with static_allocator")]] auto operator=(static_allocator&& other) -> static_allocator& + = delete; + + ~static_allocator() = default; + + auto select_on_container_copy_construction() noexcept -> static_allocator + = delete; + // {return static_allocator{};} + + using propagate_on_container_move_assignment = std::false_type; // this forces to call move assignment of the allocator by std::vector + using propagate_on_container_copy_assignment = std::false_type; + using propagate_on_container_swap = std::false_type; + + static constexpr auto capacity() { return N; } + + BOOST_MULTI_NODISCARD("because otherwise it will generate a memory leak") + auto allocate([[maybe_unused]] std::size_t n) -> pointer { + assert(n <= N); + assert(! dirty_); // do not attempt to resize a vector with static_allocator + dirty_ = true; + return reinterpret_cast(buffer_.data()); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + } + void deallocate(pointer /*ptr*/, [[maybe_unused]] std::size_t n) { + assert(n <= N); + } + + using is_always_equal = std::true_type; +}; + +template +constexpr auto operator==(static_allocator const& /*a1*/, static_allocator const& /*a2*/) noexcept +{ return true; } // &a1 == &a2; } +// = delete; + +template +auto operator!=(static_allocator const& /*a1*/, static_allocator const& /*a2*/) noexcept // this is used *by the elements* when resizing a vector +{ return false; } // &a1 != &a2;} +// = delete + +template +[[deprecated("don't swap dynamic container with static_allocator")]] +void swap(static_allocator& a1, static_allocator& a2) noexcept = delete; + +} // end namespace boost::multi::detail +#endif // BOOST_MULTI_DETAIL_STATIC_ALLOCATOR_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/detail/tuple_zip.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/tuple_zip.hpp new file mode 100644 index 0000000000..a85395bdae --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/tuple_zip.hpp @@ -0,0 +1,456 @@ +// Copyright 2021-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_DETAIL_TUPLE_ZIP_HPP +#define BOOST_MULTI_DETAIL_TUPLE_ZIP_HPP +#pragma once + +#include +#include + +#include // for deprecated functions + +namespace boost::multi { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace detail { + +template class tuple; + +template<> class tuple<> { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + public: + constexpr tuple() = default; + constexpr tuple(tuple const&) = default; + + constexpr auto operator=(tuple const&) -> tuple& = default; + + constexpr auto operator==(tuple const& /*other*/) const -> bool { return true; } + constexpr auto operator!=(tuple const& /*other*/) const -> bool { return false; } + + constexpr auto operator<(tuple const& /*other*/) const { return false; } + constexpr auto operator>(tuple const& /*other*/) const { return false; } +}; + +template class tuple : tuple { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + T0 head_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) can be a reference + using head_type = T0; + using tail_type = tuple; + + public: + constexpr auto head() const& -> T0 const& { return head_; } + constexpr auto head() && -> decltype(auto) { return std::move(head_); } + constexpr auto head() & -> T0& { return head_; } + + constexpr auto tail() const& -> tail_type const& { return static_cast(*this); } + constexpr auto tail() && -> decltype(auto) { return static_cast(*this); } + constexpr auto tail() & -> tail_type& { return static_cast(*this); } + + constexpr tuple() = default; + constexpr tuple(tuple const&) = default; + + // TODO(correaa) make conditional explicit constructor depending on the conversions for T0, Ts... + constexpr explicit tuple(T0 head, tuple tail) : tail_type{std::move(tail)}, head_{std::move(head)} {} + // cppcheck-suppress noExplicitConstructor ; allow bracket init in function argument // NOLINTNEXTLINE(runtime/explicit) + constexpr tuple(T0 head, Ts... tail) : tail_type{tail...}, head_{head} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) to allow bracket function calls + + // cppcheck-suppress noExplicitConstructor ; allow bracket init in function argument // NOLINTNEXTLINE(runtime/explicit) + constexpr explicit tuple(::std::tuple other) : tuple(::std::apply([](auto... es) {return tuple(es...);}, other)) {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + constexpr auto operator=(tuple const&) -> tuple& = default; + + template + constexpr auto operator=(tuple const& other) // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) signature used for SFINAE + -> decltype(std::declval() = other.head(), std::declval() = other.tail(), std::declval()) { + head_ = other.head(), tail() = other.tail(); + return *this; + } + + constexpr auto operator==(tuple const& other) const -> bool { return head_ == other.head_ && tail() == other.tail(); } + constexpr auto operator!=(tuple const& other) const -> bool { return head_ != other.head_ || tail() != other.tail(); } + + constexpr auto operator<(tuple const& other) const { + if(head_ < other.head_) { + return true; + } + if(other.head_ < head_) { + return false; + } + return tail() < other.tail(); + } + constexpr auto operator>(tuple const& other) const { + if(head_ > other.head_) { + return true; + } + if(other.head_ > head_) { + return false; + } + return tail() > other.tail(); + } + + private: + template struct priority : std::conditional_t> {}; + + template + constexpr auto at_aux_(priority<0> /*prio*/, Index idx) const + -> decltype(ht_tuple(std::declval(), std::declval()[idx])) { + return ht_tuple(head(), tail()[idx]); + } + + template + constexpr auto at_aux_(priority<1> /*prio*/, Index idx) const + -> decltype(ht_tuple(std::declval()[idx], std::declval())) { + return ht_tuple(head()[idx], tail()); + } + + public: + template + constexpr auto operator[](Index idx) const + -> decltype(std::declval const&>().at_aux_(priority<1>{}, idx)) { + return this->at_aux_(priority<1>{}, idx); + } + + template + constexpr auto get() const& -> auto const& { // NOLINT(readability-identifier-length) std naming + if constexpr(N == 0) { + return head(); + } else { + return tail().template get(); + } + } + + template + constexpr auto get() & -> decltype(auto) { // NOLINT(readability-identifier-length) std naming + if constexpr(N == 0) { + return head(); + } else { + return tail().template get(); + } + } + + template + constexpr auto get() && -> decltype(auto) { // NOLINT(readability-identifier-length) std naming + if constexpr(N == 0) { + return std::move(*this).head(); + } else { + return std::move(*this).tail().template get(); + } + } +}; + +#if defined(__INTEL_COMPILER) // this instance is necessary due to a bug in intel compiler icpc +// TODO(correaa) : this class can be collapsed with the general case with [[no_unique_address]] in C++20 +template class tuple { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) + T0 head_; + tuple<> tail_; + + public: + constexpr auto head() const& -> T0 const& { return head_; } + constexpr auto head() && -> T0&& { return std::move(head_); } + constexpr auto head() & -> T0& { return head_; } + + constexpr auto tail() const& -> tuple<> const& { return tail_; } + constexpr auto tail() && -> tuple<>&& { return std::move(tail_); } + constexpr auto tail() & -> tuple<>& { return tail_; } + + constexpr tuple() = default; + constexpr tuple(tuple const&) = default; // cppcheck-suppress noExplicitConstructor ; workaround cppcheck 2.11 + + // cppcheck-suppress noExplicitConstructor ; allow bracket init in function argument // NOLINTNEXTLINE(runtime/explicit) + constexpr tuple(T0 t0, tuple<> sub) : head_{std::move(t0)}, tail_{sub} {} + constexpr explicit tuple(T0 t0) : head_{std::move(t0)}, tail_{} {} + + constexpr auto operator=(tuple const& other) -> tuple& = default; + + constexpr auto operator==(tuple const& other) const { return head_ == other.head_; } + constexpr auto operator!=(tuple const& other) const { return head_ != other.head_; } + + constexpr auto operator<(tuple const& other) const { return head_ < other.head_; } + constexpr auto operator>(tuple const& other) const { return head_ > other.head_; } +}; +#endif + +template tuple(T0, tuple) -> tuple; + +template constexpr auto mk_tuple(T0 head, Ts... tail) { + return tuple(std::move(head), std::move(tail)...); +} + +template constexpr auto tie(T0& head, Ts&... tail) { + return tuple(head, tail...); +} + +template constexpr auto ht_tuple(T0 head, tuple tail) { + return tuple(std::move(head), std::move(tail)); +} + +template struct tuple_prepend; + +template +struct tuple_prepend> { + using type = tuple; +}; + +template +using tuple_prepend_t = typename tuple_prepend::type; + +template +constexpr auto head(tuple const& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming + return t.head(); +} + +template +constexpr auto head(tuple&& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming + return std::move(t).head(); +} + +template +constexpr auto head(tuple& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming + return t.head(); +} + +template +constexpr auto tail(tuple const& t) -> decltype(t.tail()) { return t.tail(); } // NOLINT(readability-identifier-length) std naming + +template +constexpr auto tail(tuple&& t) -> decltype(std::move(t).tail()) { return std::move(t).tail(); } // NOLINT(readability-identifier-length) std naming + +template +constexpr auto tail(tuple& t) -> decltype(t.tail()) { return t.tail(); } // NOLINT(readability-identifier-length) std naming + +#if defined __NVCC__ // in place of global -Xcudafe \"--diag_suppress=implicit_return_from_non_void_function\" +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif +#if ! defined(_MSC_VER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wreturn-type" +#endif +template +constexpr auto get(tuple const& t) -> auto const& { // NOLINT(readability-identifier-length) std naming + if constexpr(N == 0) { + return t.head(); + } else { + return get(t.tail()); + } +} +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#pragma diagnostic pop +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif + +template +constexpr auto get(tuple& tup) -> auto& { + if constexpr(N == 0) { + return tup.head(); + } else { + return get(tup.tail()); + } +} + +template +constexpr auto get(tuple&& tup) -> auto&& { + if constexpr(N == 0) { + return std::move(std::move(tup)).head(); + } else { + return get(std::move(std::move(tup).tail())); + } +} +#if ! defined(_MSC_VER) +#pragma GCC diagnostic pop +#endif + +} // end namespace detail +} // end namespace boost::multi + +// Some versions of Clang throw warnings that stl uses class std::tuple_size instead +// of struct std::tuple_size like it should be +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wmismatched-tags" +#endif + +template +struct std::tuple_size> { // NOLINT(cert-dcl58-cpp) to have structured bindings + // cppcheck-suppress unusedStructMember + static constexpr std::size_t value = sizeof...(Ts); +}; + +template<> +struct std::tuple_element<0, boost::multi::detail::tuple<>> { // NOLINT(cert-dcl58-cpp) to have structured bindings + using type = void; +}; + +template +struct std::tuple_element<0, boost::multi::detail::tuple> { // NOLINT(cert-dcl58-cpp) to have structured bindings + using type = T0; +}; + +template +struct std::tuple_element<0, boost::multi::detail::tuple> { // NOLINT(cert-dcl58-cpp) to have structured bindings + using type = T0; +}; + +template +struct std::tuple_element> { // NOLINT(cert-dcl58-cpp) to have structured bindings + using type = typename tuple_element>::type; +}; + +namespace std { // NOLINT(cert-dcl58-cpp) to implement structured bindings + +template +constexpr auto get(boost::multi::detail::tuple const& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + -> decltype(boost::multi::detail::get(tp)) { + return boost::multi::detail::get(tp); +} + +template +constexpr auto get(boost::multi::detail::tuple& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + -> decltype(boost::multi::detail::get(tp)) { + return boost::multi::detail::get(tp); +} + +template +constexpr auto get(boost::multi::detail::tuple&& tp) // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + -> decltype(boost::multi::detail::get(std::move(tp))) { + return boost::multi::detail::get(std::move(tp)); +} + +template +constexpr auto std_apply_timpl(F&& fn, Tuple&& tp, std::index_sequence /*012*/) -> decltype(auto) { // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + (void)tp; // fix "error #827: parameter "t" was never referenced" in NVC++ and "error #869: parameter "t" was never referenced" in oneAPI-ICPC + return std::forward(fn)(boost::multi::detail::get(std::forward(tp))...); +} + +template +constexpr auto apply(F&& fn, boost::multi::detail::tuple const& tp) -> decltype(auto) { // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + return std_apply_timpl( + std::forward(fn), tp, + std::make_index_sequence{} + ); +} + +template +constexpr auto apply(F&& fn, boost::multi::detail::tuple& tp) -> decltype(auto) { // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + return std_apply_timpl( + std::forward(fn), tp, + std::make_index_sequence{} + ); +} + +template +constexpr auto apply(F&& fn, boost::multi::detail::tuple&& tp) -> decltype(auto) { // NOLINT(cert-dcl58-cpp) normal idiom to defined tuple get + return std_apply_timpl( + std::forward(fn), std::move(tp), + std::make_index_sequence{} + ); +} + +} // end namespace std + +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +namespace boost::multi { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat +namespace detail { + +template +constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, std::index_sequence /*012*/) { + using boost::multi::detail::get; + return boost::multi::detail::mk_tuple( + boost::multi::detail::mk_tuple( + get(std::forward(tup1)), + get(std::forward(tup2)) + )... + ); +} + +template +constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, Tuple3&& tup3, std::index_sequence /*012*/) { + using boost::multi::detail::get; + return boost::multi::detail::mk_tuple( + boost::multi::detail::mk_tuple( + get(std::forward(tup1)), + get(std::forward(tup2)), + get(std::forward(tup3)) + )... + ); +} + +template +constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, Tuple3&& tup3, Tuple4&& tup4, std::index_sequence /*012*/) { + using boost::multi::detail::get; + return boost::multi::detail::mk_tuple( + boost::multi::detail::mk_tuple( + get(std::forward(tup1)), + get(std::forward(tup2)), + get(std::forward(tup3)), + get(std::forward(tup4)) + )... + ); +} + +template +constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, Tuple3&& tup3, Tuple4&& tup4, Tuple5&& tup5, std::index_sequence /*012*/) { + using boost::multi::detail::get; + return boost::multi::detail::mk_tuple( + boost::multi::detail::mk_tuple( + get(std::forward(tup1)), + get(std::forward(tup2)), + get(std::forward(tup3)), + get(std::forward(tup4)), + get(std::forward(tup5)) + )... + ); +} + +template +constexpr auto tuple_zip(T1&& tup1, T2&& tup2) { + return detail::tuple_zip_impl( + std::forward(tup1), std::forward(tup2), + std::make_index_sequence>::value>() + ); +} + +template +constexpr auto tuple_zip(T1&& tup1, T2&& tup2, T3&& tup3) { + return detail::tuple_zip_impl( + std::forward(tup1), std::forward(tup2), std::forward(tup3), + std::make_index_sequence>::value>() + ); +} + +template +constexpr auto tuple_zip(T1&& tup1, T2&& tup2, T3&& tup3, T4&& tup4) { + return detail::tuple_zip_impl( + std::forward(tup1), std::forward(tup2), std::forward(tup3), std::forward(tup4), + std::make_index_sequence>::value>() + ); +} + +template +constexpr auto tuple_zip(T1&& tup1, T2&& tup2, T3&& tup3, T4&& tup4, T5&& tup5) { + return detail::tuple_zip_impl( + std::forward(tup1), std::forward(tup2), std::forward(tup3), std::forward(tup4), std::forward(tup5), + std::make_index_sequence>::value>() + ); +} + +} // end namespace detail + +using detail::tie; + +} // end namespace boost::multi +#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/type_traits.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/type_traits.hpp similarity index 59% rename from external_codes/boost_multi/multi/include/multi/detail/type_traits.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/type_traits.hpp index 40f9a571e4..d16dc5c507 100644 --- a/external_codes/boost_multi/multi/include/multi/detail/type_traits.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/type_traits.hpp @@ -1,8 +1,9 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa +// Copyright 2022-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_DETAIL_TYPE_TRAITS_HPP -#define MULTI_DETAIL_TYPE_TRAITS_HPP +#ifndef BOOST_MULTI_DETAIL_TYPE_TRAITS_HPP +#define BOOST_MULTI_DETAIL_TYPE_TRAITS_HPP #include diff --git a/external_codes/boost_multi/multi/include/multi/detail/types.hpp b/external_codes/boost_multi/multi/include/boost/multi/detail/types.hpp similarity index 63% rename from external_codes/boost_multi/multi/include/multi/detail/types.hpp rename to external_codes/boost_multi/multi/include/boost/multi/detail/types.hpp index 216f456c18..37ee09b7d7 100644 --- a/external_codes/boost_multi/multi/include/multi/detail/types.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/detail/types.hpp @@ -1,13 +1,11 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2021 Alfredo A. Correa +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_DETAIL_TYPES_HPP -#define MULTI_DETAIL_TYPES_HPP - -// #include "index_range.hpp" +#ifndef BOOST_MULTI_DETAIL_TYPES_HPP +#define BOOST_MULTI_DETAIL_TYPES_HPP #include // for std::size_t -// #include // for make_tuple #include // for make_signed_t #include // for forward @@ -18,7 +16,8 @@ using size_type = std::make_signed_t; using index = std::make_signed_t; using difference_type = std::make_signed_t; + using dimensionality_type = index; } // end namespace boost::multi -#endif +#endif // BOOST_MULTI_DETAIL_TYPES_HPP diff --git a/external_codes/boost_multi/multi/include/boost/multi/pmr_.hpp b/external_codes/boost_multi/multi/include/boost/multi/pmr_.hpp new file mode 100644 index 0000000000..8da23c7df6 --- /dev/null +++ b/external_codes/boost_multi/multi/include/boost/multi/pmr_.hpp @@ -0,0 +1,10 @@ +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#ifndef BOOST_MULTI_PMR_HPP_ +#define BOOST_MULTI_PMR_HPP_ + +#include + +#endif diff --git a/external_codes/boost_multi/multi/include/multi/utility.hpp b/external_codes/boost_multi/multi/include/boost/multi/utility.hpp similarity index 71% rename from external_codes/boost_multi/multi/include/multi/utility.hpp rename to external_codes/boost_multi/multi/include/boost/multi/utility.hpp index efb3584fb8..1416a3f219 100644 --- a/external_codes/boost_multi/multi/include/multi/utility.hpp +++ b/external_codes/boost_multi/multi/include/boost/multi/utility.hpp @@ -1,10 +1,13 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#ifndef MULTI_UTILITY_HPP -#define MULTI_UTILITY_HPP +#ifndef BOOST_MULTI_UTILITY_HPP +#define BOOST_MULTI_UTILITY_HPP +#pragma once -#include "detail/layout.hpp" +#include +#include #include // for std::invoke #include // for allocator<> @@ -14,13 +17,13 @@ #include // for std::size (in c++17) #endif -namespace boost::multi { - -template::value, int> =0> // ::value (not _v) needed by intel's icpc 19 -constexpr auto implicit_cast(From&& r) -> To {return static_cast(r);} // NOLINT(readability-identifier-length) std naming +#if defined(__NVCC__) +#define BOOST_MULTI_HD __host__ __device__ +#else +#define BOOST_MULTI_HD +#endif -template::value and not std::is_convertible::value, int> =0> // ::value (not _v) needed by intel's icpc 19 -constexpr auto explicit_cast(From&& r) -> To {return static_cast(r);} // NOLINT(readability-identifier-length) std naming +namespace boost::multi { template struct move_ptr : private std::move_iterator { @@ -38,20 +41,20 @@ struct move_ptr : private std::move_iterator { using std::move_iterator::move_iterator; - constexpr /*implicit*/ operator Ptr() const {return std::move_iterator::base();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) decay to lvalue should be easy - constexpr auto operator+=(difference_type n) -> move_ptr& {static_cast&>(*this) += n; return *this;} - constexpr auto operator-=(difference_type n) -> move_ptr& {static_cast&>(*this) -= n; return *this;} + BOOST_MULTI_HD constexpr /*implicit*/ operator Ptr() const {return std::move_iterator::base();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) decay to lvalue should be easy + BOOST_MULTI_HD constexpr auto operator+=(difference_type n) -> move_ptr& {static_cast&>(*this) += n; return *this;} + BOOST_MULTI_HD constexpr auto operator-=(difference_type n) -> move_ptr& {static_cast&>(*this) -= n; return *this;} - constexpr auto operator+(difference_type n) const -> move_ptr {move_ptr ret{*this}; ret += n; return ret;} - constexpr auto operator-(difference_type n) const -> move_ptr {move_ptr ret{*this}; ret -= n; return ret;} + BOOST_MULTI_HD constexpr auto operator+(difference_type n) const -> move_ptr {move_ptr ret{*this}; ret += n; return ret;} + BOOST_MULTI_HD constexpr auto operator-(difference_type n) const -> move_ptr {move_ptr ret{*this}; ret -= n; return ret;} - constexpr auto operator-(move_ptr const& other) const -> difference_type {return static_cast const&>(*this) - static_cast const&>(other);} + BOOST_MULTI_HD constexpr auto operator-(move_ptr const& other) const -> difference_type {return static_cast const&>(*this) - static_cast const&>(other);} - constexpr auto operator*() const -> decltype(auto) {return *static_cast const&>(*this);} - constexpr auto operator[](difference_type n) const -> decltype(auto) {return *((*this) + n);} + constexpr auto operator*() const -> decltype(auto) {return *static_cast const&>(*this);} + BOOST_MULTI_HD constexpr auto operator[](difference_type n) const -> decltype(auto) {return *((*this) + n);} - constexpr auto operator==(move_ptr const& other) const -> bool {return static_cast const&>(*this) == static_cast const&>(other);} - constexpr auto operator!=(move_ptr const& other) const -> bool {return static_cast const&>(*this) != static_cast const&>(other);} + BOOST_MULTI_HD constexpr auto operator==(move_ptr const& other) const -> bool {return static_cast const&>(*this) == static_cast const&>(other);} + BOOST_MULTI_HD constexpr auto operator!=(move_ptr const& other) const -> bool {return static_cast const&>(*this) != static_cast const&>(other);} }; template struct ref_add_const {using type = T const;}; // this is not the same as std::add_const @@ -63,7 +66,7 @@ template struct ref_add_const {using type = T const&;}; template::reference>> struct transform_ptr { using difference_type = typename std::iterator_traits::difference_type; - using value_type = std::decay_t;//typename std::iterator_traits>::value_type; + using value_type = std::decay_t; // typename std::iterator_traits>::value_type; using pointer = Ptr; using reference = Ref; using iterator_category = typename std::iterator_traits::iterator_category; @@ -72,23 +75,36 @@ struct transform_ptr { transform_ptr< std::remove_cv_t, UF, Ptr, - typename std::conditional< + std::conditional_t< std::is_const_v, typename ref_add_const::type, Ref - >::type + > + // typename std::conditional< + // std::is_const_v, + // typename ref_add_const::type, + // Ref + // >::type > ; - template + #if defined(__GNUC__) && (__GNUC__ < 9) + constexpr explicit transform_ptr(std::nullptr_t nil) : p_{nil} /*, f_{}*/ {} // seems to be necessary for gcc 7 + #endif + constexpr transform_ptr(pointer ptr, UF fun) : p_{ptr}, f_(std::move(fun)) {} - template - constexpr transform_ptr(Other const& other) : p_{other.p_}, f_{other.f_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) TODO(correaa) use conditional explicit idiom here + template(std::declval

()))* =nullptr> + // cppcheck-suppress noExplicitConstructor + constexpr /*mplc*/ transform_ptr(Other const& other) : p_{other.p_}, f_{other.f_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) + + template(std::declval

::value_type; - using pointer = void; - using reference = void; + using difference_type = std::ptrdiff_t; + using value_type = typename std::iterator_traits

::value_type; + using pointer = void; + using reference = void; using iterator_category = typename std::iterator_traits

::iterator_category; }; @@ -149,97 +195,120 @@ BOOST_AUTO_TEST_CASE(transformed_array) { namespace multi = boost::multi; { using complex = std::complex; - complex cee{1., 2.}; + complex cee{1.0, 2.0}; auto&& zee = test::conjd{cee}; - BOOST_REQUIRE(( zee == complex{1., -2.} )); + BOOST_REQUIRE(( zee == complex{1.0, -2.0} )); - BOOST_REQUIRE( real(zee) == 1. ); - BOOST_REQUIRE( imag(zee) == -2. ); - BOOST_REQUIRE( zee.real() == 1. ); - BOOST_REQUIRE( zee.imag() == -2. ); + BOOST_REQUIRE( real(zee) == 1.0 ); + BOOST_REQUIRE( imag(zee) == -2.0 ); + BOOST_REQUIRE( zee.real() == 1.0 ); + BOOST_REQUIRE( zee.imag() == -2.0 ); } { - double doub = 5; + double doub = 5.0; auto&& negd_a = test::involuted(test::neg, doub); - BOOST_REQUIRE( negd_a == -5. ); + BOOST_REQUIRE( negd_a == -5.0 ); - negd_a = 10.; - BOOST_REQUIRE( negd_a == 10. ); - BOOST_REQUIRE( doub = -10. ); + negd_a = 10.0; + BOOST_REQUIRE( negd_a == 10.0 ); + BOOST_REQUIRE( doub = -10.0 ); } { - multi::array arr = { 0, 1, 2, 3, 4}; - auto&& ref = arr.static_array_cast(); + multi::array arr = {0.0, 1.0, 2.0, 3.0, 4.0}; + auto&& ref = arr.static_array_cast(); BOOST_REQUIRE( ref[2] == arr[2] ); } { - multi::array arr = { +0.0, +1.0, +2.0, +3.0, +4.0}; - multi::array neg = { -0.0, -1.0, -2.0, -3.0, -4.0}; - auto&& negd_arr = arr.static_array_cast>(); + multi::array const arr = {+0.0, +1.0, +2.0, +3.0, +4.0}; + multi::array neg = {-0.0, -1.0, -2.0, -3.0, -4.0}; + auto&& negd_arr = arr.static_array_cast>(); BOOST_REQUIRE( negd_arr[2] == neg[2] ); } { - multi::array arr = { + multi::array const arr = { { +0.0, +1.0, +2.0, +3.0, +4.0}, { +5.0, +6.0, +7.0, +8.0, +9.0}, {+10.0, +11.0, +12.0, +13.0, +14.0}, - {+15.0, +16.0, +17.0, +18.0, +19.0} + {+15.0, +16.0, +17.0, +18.0, +19.0}, }; multi::array neg = { { -0.0, -1.0, -2.0, -3.0, -4.0}, { -5.0, -6.0, -7.0, -8.0, -9.0}, {-10.0, -11.0, -12.0, -13.0, -14.0}, - {-15.0, -16.0, -17.0, -18.0, -19.0} + {-15.0, -16.0, -17.0, -18.0, -19.0}, }; - auto&& negd_arr = arr.static_array_cast>(); + // auto&& negd_arr = arr.static_array_cast>(); // not compile, ok, read only + auto&& negd_arr = arr.static_array_cast>(); BOOST_REQUIRE( negd_arr[1][1] == neg[1][1] ); + BOOST_REQUIRE( negd_arr[1][1] == -6.0 ); + // negd_arr2[1][1] = 3.0; // can't compile, ok, read-only } { - #if defined(__cpp_deduction_guides) - double zee[4][5] { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : testing legacy types - { 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19} +#if defined(__cpp_deduction_guides) + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : testing legacy types + double zee[4][5]{ + { 0.0, 1.0, 2.0, 3.0, 4.0}, + { 5.0, 6.0, 7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0, 13.0, 14.0}, + {15.0, 16.0, 17.0, 18.0, 19.0}, }; auto&& d2DC = multi::make_array_ref(test::involuter{test::neg, &zee[0][0]}, {4, 5}); - d2DC[1][1] = -66.; - BOOST_REQUIRE( zee[1][1] == 66 ); - #endif + d2DC[1][1] = -66.0; + BOOST_REQUIRE( zee[1][1] == 66.0 ); +#endif { using complex = std::complex; + multi::array d2D = { - { { 0., 3.}, { 1., 9.}, { 2., 4.}, { 3., 0.}, { 4., 0.} }, - { { 5., 0.}, { 6., 3.}, { 7., 5.}, { 8., 0.}, { 9., 0.} }, - { { 1., 4.}, { 9., 1.}, {12., 0.}, {13., 0.}, {14., 0.} }, - { {15., 0.}, {16., 0.}, {17., 0.}, {18., 0.}, {19., 0.} } + { {0.0, 3.0}, {1.0, 9.0}, {2.0, 4.0}, {3.0, 0.0}, {4.0, 0.0}}, + { {5.0, 0.0}, {6.0, 3.0}, {7.0, 5.0}, {8.0, 0.0}, {9.0, 0.0}}, + { {1.0, 4.0}, {9.0, 1.0}, {12.0, 0.0}, {13.0, 0.0}, {14.0, 0.0}}, + {{15.0, 0.0}, {16.0, 0.0}, {17.0, 0.0}, {18.0, 0.0}, {19.0, 0.0}}, }; auto&& d2Dreal = d2D.reinterpret_array_cast(); - BOOST_REQUIRE( d2Dreal[2][1] == 9. ); + BOOST_REQUIRE( d2Dreal[2][1] == 9.0 ); - d2Dreal[2][1] = 12.; - BOOST_REQUIRE( d2D[2][1] == complex(12., 1.) ); + d2Dreal[2][1] = 12.0; + BOOST_REQUIRE( d2D[2][1] == complex(12.0, 1.0) ); auto&& d2DrealT = rotated(d2D).reinterpret_array_cast(); - BOOST_REQUIRE( d2DrealT[2][1] == 7. ); + BOOST_REQUIRE( d2DrealT[2][1] == 7.0 ); - multi::array d2Dreal_copy = d2D.template reinterpret_array_cast(); + multi::array const d2Dreal_copy = d2D.template reinterpret_array_cast(); BOOST_REQUIRE( d2Dreal_copy == d2Dreal ); } { using complex = std::complex; - constexpr auto const I = complex{0., 1.}; // NOLINT(readability-identifier-length) imaginary unit + + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) imaginary unit + multi::array arr = { - { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, - { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} + {1.0 + 3.0 * I, 3.0 - 2.0 * I, 4.0 + 1.0 * I}, + {9.0 + 1.0 * I, 7.0 - 8.0 * I, 1.0 - 3.0 * I}, }; auto conjd_arr = arr.static_array_cast>(); BOOST_REQUIRE( conjd_arr[1][2] == conj(arr[1][2]) ); } } } + +#if !defined(__NVCC__) && (__GNUC_MINOR__ > 7) +BOOST_AUTO_TEST_CASE(transformed_to_string) { + namespace multi = boost::multi; + + multi::array const AA = { + {1, 2}, + {3, 4}, + }; + multi::array BB = AA.element_transformed([](int ee) noexcept { return std::to_string(ee); }); + + BOOST_REQUIRE( BB[1][1] == "4" ); +} +#endif + +#undef BOOST_MULTI_DECLRETURN diff --git a/external_codes/boost_multi/multi/test/utility.cpp b/external_codes/boost_multi/multi/test/utility.cpp index 3b04ddfef9..9cdccab573 100644 --- a/external_codes/boost_multi/multi/test/utility.cpp +++ b/external_codes/boost_multi/multi/test/utility.cpp @@ -1,21 +1,36 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi utility" -#include - -#include "multi/array.hpp" -#include "multi/detail/tuple_zip.hpp" - -//#include -//#include - -//#include - -//#include "../adaptors/serialization/xml_archive.hpp" - -#include -#include // for iota +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +#include + +#include +#include // for iota + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; @@ -24,22 +39,29 @@ namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(std_array_extensions_3d) { std::array, 4>, 3> arr = {}; - static_assert(std::is_same::element, double>{}, "!"); + static_assert(std::is_same::element, double>{}); BOOST_REQUIRE( multi::dimensionality(arr) == 3 ); - BOOST_REQUIRE( multi::extension(arr) == 3 ); + // BOOST_REQUIRE( multi::extension(arr) == 3 ); BOOST_REQUIRE(( multi::extensions(arr) == decltype(multi::extensions(arr)){3, 4, 5} )); +#ifndef _MSC_VER // problem with 14.3 c++17 using multi::data_elements; BOOST_REQUIRE( data_elements(arr) == &arr[0][0][0] ); // NOLINT(readability-container-data-pointer) BOOST_REQUIRE( data_elements(arr) == arr[0][0].data() ); using multi::num_elements; BOOST_REQUIRE( num_elements(arr) == 60 ); - - multi::array marr({3, 4, 5}); +#endif + + multi::array const marr( +#ifdef _MSC_VER // problem with 14.3 c++17 + multi::extensions_t<3> +#endif + {3, 4, 5} + ); using multi::layout; BOOST_REQUIRE( layout(arr) == layout(marr) ); @@ -49,13 +71,13 @@ BOOST_AUTO_TEST_CASE(std_array_extensions_3d) { BOOST_AUTO_TEST_CASE(std_array_extensions_2d) { std::array, 3> arr = {}; - static_assert( std::is_same::element, double>{}, "!" ); + static_assert(std::is_same::element, double>{}); using multi::dimensionality; BOOST_REQUIRE( dimensionality(arr) == 2 ); - using multi::extension; - BOOST_REQUIRE( extension(arr) == 3 ); + // using multi::extension; + // BOOST_REQUIRE( extension(arr) == 3 ); using multi::extensions; BOOST_REQUIRE(( extensions(arr) == decltype(extensions(arr)){3, 4} )); @@ -68,7 +90,7 @@ BOOST_AUTO_TEST_CASE(std_array_extensions_2d) { using multi::num_elements; BOOST_REQUIRE( num_elements(arr) == 12 ); - multi::array marr({3, 4}); + multi::array const marr({3, 4}); using multi::layout; BOOST_REQUIRE( layout(arr) == layout(marr) ); @@ -78,13 +100,13 @@ BOOST_AUTO_TEST_CASE(std_array_extensions_2d) { BOOST_AUTO_TEST_CASE(std_array_extensions_1d) { std::array arr = {}; - static_assert( std::is_same::element, double>{}, "!" ); + static_assert(std::is_same::element, double>{}); using multi::dimensionality; BOOST_REQUIRE( dimensionality(arr) == 1 ); - using multi::extension; - BOOST_REQUIRE( extension(arr) == 4 ); + // using multi::extension; + // BOOST_REQUIRE( extension(arr) == 4 ); using multi::extensions; BOOST_REQUIRE(( extensions(arr) == decltype(extensions(arr)){multi::iextension{4}} )); @@ -98,11 +120,16 @@ BOOST_AUTO_TEST_CASE(std_array_extensions_1d) { } BOOST_AUTO_TEST_CASE(test_utility_1d) { - std::array carr = {{0., 1., 2., 3., 4., 5., 6., 7., 8., 9.}}; + // clang-format off + std::array carr = {{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}}; + // clang-format on + multi::array_ref marr(carr.data(), {multi::iextension{10}}); -// boost::multi_array_ref Marr(&carr[0], boost::extents[10]); - std::vector varr(10); std::iota(begin(varr), end(varr), 0); - std::array aarr{}; std::iota(begin(aarr), end(aarr), 0); + + std::vector varr(10); // NOLINT(fuchsia-default-arguments-calls) + std::iota(begin(varr), end(varr), 0.0); + std::array aarr{}; + std::iota(begin(aarr), end(aarr), 0.0); BOOST_REQUIRE( size(marr) == 10 ); @@ -117,7 +144,7 @@ BOOST_AUTO_TEST_CASE(test_utility_1d) { using multi::num_elements; BOOST_REQUIRE( num_elements(carr) == num_elements(marr) ); - BOOST_REQUIRE( num_elements(varr) == num_elements(marr) ); + // BOOST_REQUIRE( num_elements(varr) == num_elements(marr) ); BOOST_REQUIRE( num_elements(aarr) == num_elements(aarr) ); using multi::data_elements; @@ -136,11 +163,13 @@ BOOST_AUTO_TEST_CASE(test_utility_1d) { } BOOST_AUTO_TEST_CASE(test_utility_2d) { + // clang-format off std::array, 3> carr{{ - {{ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.}}, - {{10., 11., 12., 13., 14., 15., 16., 17., 18., 19.}}, - {{20., 21., 22., 23., 24., 25., 26., 27., 28., 29.}}, + {{ 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0}}, + {{20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0}}, }}; + // clang-format on multi::array_ref marr(&carr[0][0], {3, 10}); // NOLINT(readability-container-data-pointer) tests access BOOST_REQUIRE( static_cast(carr.size()) == size(marr) ); @@ -157,55 +186,55 @@ BOOST_AUTO_TEST_CASE(test_utility_2d) { } BOOST_AUTO_TEST_CASE(multi_utility_test) { - static_assert( std::is_same::value_type, double>{}, "!"); + static_assert(std::is_same::value_type, double>{}, "!"); using multi::corigin; using multi::dimensionality; using multi::extension; using multi::extensions; -// using multi::origin; + using multi::num_elements; using multi::size; using multi::sizes; - using multi::num_elements; -{ - double arr[4] = {1., 2., 3., 4.}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types - BOOST_REQUIRE( dimensionality(arr) == 1 ); - BOOST_REQUIRE( extension(arr).first() == 0 ); - BOOST_REQUIRE( extension(arr).last() == 4 ); - - BOOST_REQUIRE( size(arr) == 4 ); - - using boost::multi::detail::get; - BOOST_REQUIRE( get<0>(sizes(arr)) == size(arr) ); - using multi::get_allocator; - - static_assert(std::is_same >{}, "!"); - - using std::addressof; - - using multi::data_elements; - static_assert( std::is_same{} , "!"); -// BOOST_REQUIRE( data(A) == addressof(A[0]) ); - BOOST_REQUIRE( data_elements(arr) == addressof(arr[0]) ); -} { - double arr[2][3] = {{1., 2., 3.}, {4., 5., 6.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : test legacy types - BOOST_REQUIRE( dimensionality(arr) == 2 ); - BOOST_REQUIRE( extension(arr).first() == 0 ); - BOOST_REQUIRE( extension(arr).last() == 2 ); -// int a = extensions(A); - -// BOOST_REQUIRE( origin(A) == &A[0][0] ); -// *origin(A) = 99.; - arr[0][0] = 99.; - - BOOST_REQUIRE( arr[0][0] == 99. ); - BOOST_REQUIRE( corigin(arr) == &arr[0][0] ); - BOOST_REQUIRE( size(arr) == 2 ); - - using multi::detail::get; - BOOST_REQUIRE( get<0>(sizes(arr)) == size(arr) ); - BOOST_REQUIRE( num_elements(arr) == 6 ); - - static_assert( num_elements(arr) == 6 , "!" ); -} + { + double arr[4] = {1.0, 2.0, 3.0, 4.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + BOOST_REQUIRE( dimensionality(arr) == 1 ); + BOOST_REQUIRE( extension(arr).first() == 0 ); + BOOST_REQUIRE( extension(arr).last() == 4 ); + + BOOST_REQUIRE( size(arr) == 4 ); + + using boost::multi::detail::get; + BOOST_REQUIRE( get<0>(sizes(arr)) == size(arr) ); + using multi::get_allocator; + + static_assert(std::is_same>{}); + + using std::addressof; + + using multi::data_elements; + static_assert(std::is_same{}); + // BOOST_REQUIRE( data(A) == addressof(A[0]) ); + BOOST_REQUIRE(data_elements(arr) == addressof(arr[0])); + } + { + double arr[2][3] = { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : test legacy types + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + }; + BOOST_REQUIRE( dimensionality(arr) == 2 ); + BOOST_REQUIRE( extension(arr).first() == 0 ); + BOOST_REQUIRE( extension(arr).last() == 2 ); + + arr[0][0] = 99.0; + + BOOST_REQUIRE( arr[0][0] == 99.0 ); + BOOST_REQUIRE( corigin(arr) == &arr[0][0] ); + BOOST_REQUIRE( size(arr) == 2 ); + + using multi::detail::get; + BOOST_REQUIRE( get<0>(sizes(arr)) == size(arr) ); + BOOST_REQUIRE( num_elements(arr) == 6 ); + + static_assert(num_elements(arr) == 6); + } } diff --git a/external_codes/boost_multi/multi/test/zero_dimensionality.cpp b/external_codes/boost_multi/multi/test/zero_dimensionality.cpp index 4e0f7a142d..460c0128f3 100644 --- a/external_codes/boost_multi/multi/test/zero_dimensionality.cpp +++ b/external_codes/boost_multi/multi/test/zero_dimensionality.cpp @@ -1,81 +1,115 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi zero dimensionality" -#include - -#include "multi/array.hpp" - -#include +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(zero_dimensionality_part1) { { - std::vector v1 = {1., 2., 3.}; + std::vector v1 = {10, 20, 30}; // NOLINT(fuchsia-default-arguments-calls) - multi::array_ref m1(v1.data(), multi::extensions_t<1>{multi::iextension{3}}); + multi::array_ref m1(v1.data(), multi::extensions_t<1>{multi::iextension{3}}); BOOST_REQUIRE( size(m1) == 3 ); BOOST_REQUIRE( &m1[1] == &v1[1] ); BOOST_REQUIRE( num_elements(m1) == 3 ); - multi::array_ref m0(v1.data(), {}); -// BOOST_REQUIRE(( &m0 == multi::array_ptr(v1.data(), {}) )); + multi::array_ref m0(v1.data(), {}); +// BOOST_REQUIRE(( &m0 == multi::array_ptr(v1.data(), {}) )); BOOST_REQUIRE( data_elements(m0) == v1.data() ); BOOST_REQUIRE( num_elements(m0) == 1 ); - m0 = 5.1; - BOOST_REQUIRE( v1[0] == 5.1 ); + m0 = 51; + BOOST_REQUIRE( v1[0] == 51 ); - double const& doub = std::move(m0); - BOOST_REQUIRE( doub == 5.1 ); + int const& doub = std::move(m0); + BOOST_REQUIRE( doub == 51 ); } { - multi::static_array a0 = multi::static_array{45.}; // TODO(correaa) this might trigger a compiler crash with g++ 7.5 because of operator&() && overloads + multi::static_array a0 = multi::static_array{45.0}; // TODO(correaa) this might trigger a compiler crash with g++ 7.5 because of operator&() && overloads BOOST_REQUIRE( num_elements(a0) == 1 ); - BOOST_REQUIRE( a0 == 45. ); + BOOST_REQUIRE( a0 == 45.0 ); - a0 = multi::static_array{60.}; - BOOST_REQUIRE( a0 == 60. ); + a0 = multi::static_array{60.0}; + BOOST_REQUIRE( a0 == 60.0 ); } { - std::allocator alloc; - multi::static_array a0(45., alloc); + std::allocator const alloc; + multi::static_array a0(45.0, alloc); BOOST_REQUIRE( num_elements(a0) == 1 ); - BOOST_REQUIRE( a0 == 45. ); + BOOST_REQUIRE( a0 == 45.0 ); - a0 = multi::static_array{60.}; - BOOST_REQUIRE( a0 == 60. ); + a0 = multi::static_array{60.0}; + BOOST_REQUIRE( a0 == 60.0 ); } } BOOST_AUTO_TEST_CASE(zero_dimensionality_part2) { { - multi::array, 2> arr({1, 2}, std::allocator>{}); - BOOST_REQUIRE( size(arr) == 1 ); + multi::array, 2> const arr( + #ifdef _MSC_VER // problem with 14.3 c++17 + multi::extensions_t<2> + #endif + {1, 2}, + std::allocator>{} + ); + BOOST_REQUIRE( arr.size() == 1 ); } { - double doub = 2.; + double doub = 2.0; + multi::array_ref arr(doub); + double const& the_doub = static_cast(arr); + BOOST_REQUIRE( the_doub == doub ); + BOOST_REQUIRE( &the_doub == &doub ); + } + { + double doub = 2.0; double dd{multi::array_ref(&doub, {})}; BOOST_REQUIRE( dd == doub ); - multi::array_ptr ap1(&doub, multi::extensions_t<1>{{0, 1}}); + multi::array_ptr const ap1(&doub, multi::extensions_t<1>{{0, 1}}); BOOST_REQUIRE( ap1->base() == &doub ); BOOST_REQUIRE( (*ap1).base() == &doub ); - multi::array_ptr ap0(&doub, {}); + multi::array_ptr const ap0(&doub, {}); BOOST_REQUIRE(( ap0 == multi::array_ptr(&doub, {}) )); BOOST_REQUIRE(( ap0 != multi::array_ptr(&dd, {}) )); BOOST_REQUIRE( ap0->base() == &doub ); BOOST_REQUIRE( (*ap0).base() == &doub ); - multi::array_ptr ap0dd{&dd}; + multi::array_ptr const ap0dd{&dd}; BOOST_REQUIRE( ap0dd != ap0 ); BOOST_REQUIRE( *ap0 == *ap0dd ); - double d3 = M_PI; - BOOST_REQUIRE(( *multi::array_ptr(&d3, {}) == M_PI )); + double d3 = 3.14159265358979323846; + BOOST_REQUIRE(( *multi::array_ptr(&d3, {}) == 3.14159265358979323846 )); } } - diff --git a/external_codes/boost_multi/multi/test_adaptors/CMakeLists.txt b/external_codes/boost_multi/multi/test_adaptors/CMakeLists.txt new file mode 100644 index 0000000000..835b90f75b --- /dev/null +++ b/external_codes/boost_multi/multi/test_adaptors/CMakeLists.txt @@ -0,0 +1,207 @@ +# Copyright 2018-2024 Alfredo A. Correa +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +cmake_minimum_required(VERSION 3.16) + +# Library doesn't require installation, to still install this project: +# ~~~ +# $ cmake .. --install-prefix=$HOME && cmake --build . --config Release --target test --target install -- -j $(nproc) +# ~~~ +# to use this library in another CMake project +# ~~~ +# project("Your project") +# find_package(boost-multi CONFIG REQUIRED) +# add_executable(${PROJECT_NAME} src/your_main.cpp) +# target_link_libraries(${PROJECT_NAME} boost-multi::boost-multi) +# ~~~ + +if (DEFINED BOOST_SUPERPROJECT_VERSION AND NOT DEFINED BOOST_MULTI_STANDALONE) + + project(boost_multi VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX) + + add_library(boost_multi INTERFACE) + + add_library(Boost::multi ALIAS boost_multi) + + target_include_directories(boost_multi INTERFACE include) + + target_compile_features(boost_multi INTERFACE cxx_std_14) + + message(STATUS "Boost.Multi: standalone mode OFF") + + if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt") + + add_subdirectory(test) + + endif() + +else() + + project( + multi + HOMEPAGE_URL "https://gitlab.com/correaa/boost-multi" + DESCRIPTION "A header-only modern C++ library that provides access and manipulation of data in multidimensional arrays." + VERSION 0.80.1 + LANGUAGES CXX) + + message(STATUS "Boost.Multi: standalone mode ON") + + message("current binary directory: ${CMAKE_CURRENT_BINARY_DIR}") + + include_directories(${PROJECT_SOURCE_DIR}/include) # workaround for vscode to detect headers https://stackoverflow.com/a/68139743/225186 + + include(CMakePackageConfigHelpers) + include(CMakeDependentOption) + include(GNUInstallDirs) + + find_program(MEMORYCHECK_COMMAND valgrind) + set(VALGRIND_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=51 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") + + # must go before `include(CTest)` + set(MEMORYCHECK_COMMAND_OPTIONS "-q --tool=memcheck --leak-check=yes --num-callers=52 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all") + set(MEMORYCHECK_SUPPRESSIONS_FILE "${PROJECT_SOURCE_DIR}/.valgrind-suppressions") + + include(CTest) + + option(MULTI_BUILD_PACKAGE "Build package files as well" ON) + + cmake_dependent_option(MULTI_BUILD_TESTS "Enable multi tests" ON "BUILD_TESTING" OFF) + + cmake_dependent_option(MULTI_BUILD_PACKAGE_DEB "Create a DEB" ON "MULTI_BUILD_PACKAGE" OFF) + + add_library(multi INTERFACE) + #target_compile_features(multi PUBLIC cxx_std_17) + + target_include_directories(multi INTERFACE $ $) + target_compile_options(multi INTERFACE $<$: --expt-relaxed-constexpr --extended-lambda>) + + if(NOT CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + add_library(correaa::multi ALIAS multi) + endif() + + # https://youtrack.jetbrains.com/issue/CPP-25608 + target_compile_features(${PROJECT_NAME} INTERFACE $<$>:cxx_std_17>) + target_compile_options(${PROJECT_NAME} INTERFACE $<$:-std=c++17>) + + # this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/ + if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + return() + endif() + + # Installation help + configure_package_config_file("${PROJECT_SOURCE_DIR}/cmake/multi-config.cmake.in" "${PROJECT_BINARY_DIR}/multi-config.cmake" INSTALL_DESTINATION "share/cmake/multi") + + write_basic_package_version_file("${PROJECT_BINARY_DIR}/multi-config-version.cmake" COMPATIBILITY SameMajorVersion ARCH_INDEPENDENT) + + message("current install prefix directory: ${CMAKE_INSTALL_PREFIX}") + + install( + TARGETS multi + EXPORT multi-targets + INCLUDES + DESTINATION "${CMAKE_INSTALL_DATADIR}") + + install( + EXPORT ${PROJECT_NAME}-targets + DESTINATION "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" + NAMESPACE multi:: + FILE "${PROJECT_NAME}-targets.cmake") + + install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" DESTINATION "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}") + + install(DIRECTORY "include/" TYPE INCLUDE) + + add_subdirectory(test) + + # if(MULTI_BUILD_TESTS) + find_package(Boost 1.65 COMPONENTS unit_test_framework) # 1.65 needed for BOOST_TEST_GLOBAL_FIXTURE, you can use your own Boost and use `cmake -DBOOST_ROOT=$HOME/local` + if(NOT Boost_FOUND) + message(WARNING "Cannot find Boost 1.65+, Multi library will not be fully tested.") + else() + enable_testing() + + add_subdirectory(include/boost/multi/adaptors/blas) + add_subdirectory(include/boost/multi/adaptors/complex) + add_subdirectory(include/boost/multi/adaptors/cuda) + add_subdirectory(include/boost/multi/adaptors/fftw) + + find_package(LAPACK) + if(LAPACK_FOUND) + add_subdirectory(include/boost/multi/adaptors/lapack) + endif() + + add_subdirectory(include/boost/multi/adaptors/thrust) + if(ENABLE_CUDA) + add_subdirectory(include/boost/multi/adaptors/cufft) + endif() + if(ENABLE_HIP) + add_subdirectory(include/boost/multi/adaptors/hipfft) + add_subdirectory(include/boost/multi/adaptors/hipthrust/test) + endif() + endif() + # endif() + + if(MULTI_BUILD_PACKAGE) + list(APPEND source-generators TBZ2 TGZ TXZ ZIP) + + if(CMAKE_HOST_WIN32) + list(APPEND binary-generators "WIX") + endif() + + if(MULTI_BUILD_PACKAGE_DEB) + list(APPEND binary-generators "DEB") + endif() + + if(MULTI_BUILD_RPM) + list(APPEND binary-generators "RPM") + endif() + + set(CPACK_PACKAGE_NAME ${PROJECT_NAME} + CACHE STRING "The resulting package name" + ) + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "all") + set(CPACK_PACKAGE_ARCHITECTURE "all") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Multidimensional arrays for C++" + CACHE STRING "Package description for the package metadata" + ) + + set(CPACK_PACKAGE_VENDOR "alfredo.correa@gmail.com") + # set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CPACK_PACKAGE_NAME}) + # SET(CPACK_OUTPUT_FILE_PREFIX "${CMAKE_SOURCE_DIR}/_packages") + # set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/some")#/${CMAKE_PROJECT_VERSION}") + + # set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) + # set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) + # set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) + + set(CPACK_PACKAGE_CONTACT "alfredo.correa@gmail.com") + set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Alfredo A. Correa") + + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") + set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + + set(CPACK_DEBIAN_FILE_NAME "multi_all.deb") + # set(CPACK_COMPONENTS_GROUPING ALL_COMPONENTS_IN_ONE)#ONE_PER_GROUP) + # set(CPACK_DEB_COMPONENT_INSTALL YES) + + set(CPACK_SOURCE_GENERATOR ${source-generators}) + set(CPACK_GENERATOR ${binary-generators}) + + # set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}") + # set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_FILE_NAME}") + + list( + APPEND + CPACK_SOURCE_IGNORE_FILES + /.git/ + /.build*/ + /build/ + .gitignore + .DS_Store) + + include(CPack) + endif() + +endif() diff --git a/external_codes/catch/Readme b/external_codes/catch/Readme index f69508fc07..114764bca8 100644 --- a/external_codes/catch/Readme +++ b/external_codes/catch/Readme @@ -3,7 +3,7 @@ C++ Unit test framework https://github.com/catchorg/Catch2 The single include file is downloaded from here: -https://github.com/catchorg/Catch2/releases/download/v2.13.9/catch.hpp +https://github.com/catchorg/Catch2/releases/download/v2.13.10/catch.hpp This patch was applied to eliminate warnings from sprintf https://github.com/catchorg/Catch2/commit/0e2895934cf778f3bd7e84b3df10dac81d4ef7e3 diff --git a/external_codes/catch/catch.hpp b/external_codes/catch/catch.hpp index 277b7b80ca..21b491b577 100644 --- a/external_codes/catch/catch.hpp +++ b/external_codes/catch/catch.hpp @@ -1,6 +1,6 @@ /* - * Catch v2.13.9 - * Generated: 2022-04-12 22:37:23.260201 + * Catch v2.13.10 + * Generated: 2022-10-16 11:01:23.452308 * ---------------------------------------------------------- * This file has been merged from multiple headers. Please don't edit it directly * Copyright (c) 2022 Two Blue Cubes Ltd. All rights reserved. @@ -15,7 +15,7 @@ #define CATCH_VERSION_MAJOR 2 #define CATCH_VERSION_MINOR 13 -#define CATCH_VERSION_PATCH 9 +#define CATCH_VERSION_PATCH 10 #ifdef __clang__ # pragma clang system_header @@ -7399,8 +7399,6 @@ namespace Catch { template struct ObjectStorage { - using TStorage = typename std::aligned_storage::value>::type; - ObjectStorage() : data() {} ObjectStorage(const ObjectStorage& other) @@ -7443,7 +7441,7 @@ namespace Catch { return *static_cast(static_cast(&data)); } - TStorage data; + struct { alignas(T) unsigned char data[sizeof(T)]; } data; }; } @@ -7953,7 +7951,7 @@ namespace Catch { #if defined(__i386__) || defined(__x86_64__) #define CATCH_TRAP() __asm__("int $3\n" : : ) /* NOLINT */ #elif defined(__aarch64__) - #define CATCH_TRAP() __asm__(".inst 0xd4200000") + #define CATCH_TRAP() __asm__(".inst 0xd43e0000") #endif #elif defined(CATCH_PLATFORM_IPHONE) @@ -13562,7 +13560,7 @@ namespace Catch { // Handle list request if( Option listed = list( m_config ) ) - return static_cast( *listed ); + return (std::min) (MaxExitCode, static_cast(*listed)); TestGroup tests { m_config }; auto const totals = tests.execute(); @@ -15395,7 +15393,7 @@ namespace Catch { } Version const& libraryVersion() { - static Version version( 2, 13, 9, "", 0 ); + static Version version( 2, 13, 10, "", 0 ); return version; } @@ -17532,12 +17530,20 @@ namespace Catch { #ifndef __OBJC__ +#ifndef CATCH_INTERNAL_CDECL +#ifdef _MSC_VER +#define CATCH_INTERNAL_CDECL __cdecl +#else +#define CATCH_INTERNAL_CDECL +#endif +#endif + #if defined(CATCH_CONFIG_WCHAR) && defined(CATCH_PLATFORM_WINDOWS) && defined(_UNICODE) && !defined(DO_NOT_USE_WMAIN) // Standard C/C++ Win32 Unicode wmain entry point -extern "C" int wmain (int argc, wchar_t * argv[], wchar_t * []) { +extern "C" int CATCH_INTERNAL_CDECL wmain (int argc, wchar_t * argv[], wchar_t * []) { #else // Standard C/C++ main entry point -int main (int argc, char * argv[]) { +int CATCH_INTERNAL_CDECL main (int argc, char * argv[]) { #endif return Catch::Session().run( argc, argv ); diff --git a/external_codes/mpi_wrapper/mpi3/.cppcheck-suppressions b/external_codes/mpi_wrapper/mpi3/.cppcheck-suppressions index 0f9cf7386c..b3ce726b04 100644 --- a/external_codes/mpi_wrapper/mpi3/.cppcheck-suppressions +++ b/external_codes/mpi_wrapper/mpi3/.cppcheck-suppressions @@ -3,3 +3,5 @@ missingIncludeSystem #unmatchedSuppression #preprocessorErrorDirective +unusedFunction +# ^^^ cppcheck 2.13 is very picky diff --git a/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml b/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml index 4f77c2d270..94d2f0bab4 100644 --- a/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml +++ b/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml @@ -1,7 +1,11 @@ # -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;-*- # Copyright 2020-2023 Alfredo A. Correa -image: correaadock/gnudev:v2 +image: debian:testing + +workflow: + auto_cancel: + on_new_commit: interruptible variables: GIT_SUBMODULE_STRATEGY: recursive @@ -9,7 +13,7 @@ variables: openmpi: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build @@ -18,25 +22,61 @@ openmpi: - cmake --build . --parallel 2 || make VERBOSE=1 - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure -icpc-intelmpi: +openmpi-g++-7: stage: build - image: intel/oneapi-hpckit:latest - allow_failure: true + image: debian:oldoldstable # default is gcc 8 as of Dec 2023 script: - - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++-7 git libboost-serialization-dev + - apt-get -qq -y install wget + - wget https://github.com/Kitware/CMake/releases/download/v3.27.0-rc3/cmake-3.27.0-rc3-linux-x86_64.sh --no-verbose + - sh ./cmake-3.27.0-rc3-linux-x86_64.sh --skip-license --prefix=/usr + - cmake --version + - g++-7 --version - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build - - icpc --version - - CXX=icpc CXXFLAGS="-diag-disable=593,2196,1786,1478" cmake .. -DCMAKE_BUILD_TYPE=Debug #https://community.intel.com/t5/Intel-C-Compiler/Inline-and-no-inline-warning/td-p/1216764 - - cmake --build . --parallel 2 || cmake --build . --verbose - - ctest --output-on-failure + - export MPI_OVERSUBSCRIBE="--oversubscribe" + - export MPI_ALLOW_RUN_AS_ROOT="--allow-run-as-root" + - cmake .. -DCMAKE_CXX_COMPILER=g++-7 -DCMAKE_BUILD_TYPE=Debug + - cmake --build . --parallel 2 || make VERBOSE=1 + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure needs: ["openmpi"] -icpx-intelmpi: +exampi: + allow_failure: false stage: build - image: intel/oneapi-hpckit:latest - allow_failure: true + script: + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake g++ git libboost-serialization-dev make python3 ssh strace # ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev + - git clone https://correaa:${EXATOKEN}@github.com/tonyskjellum/ExaMPI.git + - cd ExaMPI + - mkdir build && cd build + - cmake .. --install-prefix=$HOME/exa + - make -j 4 + - make install + - export PATH=$HOME/exa/bin:$PATH + - export PATH=$HOME/exa/runtime:$PATH + - export LD_LIBRARY_PATH=$HOME/exa/lib:$LD_LIBRARY_PATH + - export MPI_PATH=$HOME/exa/bin + - export MPI_LIB=$HOME/exa/lib + - export MPI_INC=$HOME/exa/include + - export MPI_HOME=$HOME/exa + - which mpicxx + - which mpirun + - mpirun -n 2 --separate_rank --print_to_file --loghead --logfault --logexec --logcontrol tests/integration_tests/pingpong tests/integration_tests/allreduce + - mpirun -n 2 --separate_rank --print_to_file --loghead --logfault --logexec --logcontrol tests/integration_tests/pingpong tests/integration_tests/alltoall + - ctest --output-on-failure + - cd ../.. + - mkdir build && cd build + - which mpicxx + - mpicxx --version + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_EXAMPI=1 -DMPI_HOME=$HOME/exa + - make -j 2 || make VERBOSE=1 + - ls + - ctest --output-on-failure + +oneapi: + stage: build + image: intel/oneapi-hpckit:2023.0.0-devel-ubuntu22.04 script: - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 @@ -48,10 +88,27 @@ icpx-intelmpi: - ctest --output-on-failure needs: ["openmpi"] +nvhpc-24.5 c++20: + stage: build + image: nvcr.io/nvidia/nvhpc:24.5-devel-cuda12.4-ubuntu22.04 # https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nvhpc/tags + # tags: + # - non-shared + # - large-disk-space + interruptible: true + script: + - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev + - /opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ --version + - mkdir build && cd build + - CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=20 + - cmake --build . --parallel 2 || cmake --build . --verbose + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure + needs: ["openmpi"] + openmpi-clang: stage: build + image: debian:testing script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build @@ -65,8 +122,9 @@ openmpi-clang: openmpi-clang20: stage: build + image: debian:stable script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev - cd test - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" @@ -80,7 +138,7 @@ openmpi-clang20: openmpi-clang-tidy: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" - clang++ --version @@ -93,12 +151,12 @@ openmpi-clang-tidy: openmpi-cppcheck: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev pkg-config libfftw3-dev libfftw3-mpi-dev - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" - g++ --version - cppcheck --version - - cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CPPCHECK="cppcheck;--force;--enable=all;--inline-suppr;--language=c++;--suppress=missingIncludeSystem;--suppress=syntaxError;--suppress=unmatchedSuppression;--std=c++17;--error-exitcode=666;-UEXCLUDE_CPPCHECK" + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CPPCHECK="cppcheck;--force;--enable=all;--inline-suppr;--language=c++;--check-level=exhaustive;--suppress=missingIncludeSystem;--suppress=syntaxError;--suppress=unmatchedSuppression;--suppress=unusedFunction;--std=c++17;--error-exitcode=666;-UEXCLUDE_CPPCHECK" - make --jobs=2 || make VERBOSE=1 - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure needs: ["openmpi"] @@ -106,10 +164,12 @@ openmpi-cppcheck: mpich-debug: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends libmpich-dev mpich + - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev - cd test - mkdir build && cd build - export MPI_OVERSUBSCRIBE="" + - export OMPI_ALLOW_RUN_AS_ROOT=1 + - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - cmake .. -DCMAKE_BUILD_TYPE=Debug - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure @@ -118,7 +178,7 @@ mpich-valgrind: stage: build allow_failure: true script: - - apt update -qq && apt-get install -qq -y --no-install-recommends libmpich-dev mpich + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake git libboost-test-dev libboost-serialization-dev libmpich-dev make mpich valgrind - mpirun --version - mkdir build && cd build - export MPI_OVERSUBSCRIBE="" @@ -132,8 +192,10 @@ mpich-valgrind: qmcpack-openmpi: stage: test + allow_failure: true + image: debian:testing script: - - apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev + - apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev valgrind - git clone https://github.com/QMCPACK/qmcpack.git - cd qmcpack - git config --global user.email "alfredo.correa@gmail.com" && git config --global user.name "Alfredo Correa" @@ -142,11 +204,13 @@ qmcpack-openmpi: - cd build - cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DBUILD_PPCONVERT=1 -DQMC_MIXED_PRECISION=1 -DCMAKE_BUILD_TYPE=Debug -DMPIEXEC_PREFLAGS="--allow-run-as-root;--bind-to;none" .. #-DCMAKE_CXX_FLAGS="-Werror" - make --jobs=2 || make VERBOSE=1 # afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance - - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure + - export VALGRIND_EXE="valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all --suppressions=.valgrind_suppressions --gen-suppressions=all --error-exitcode=1 " + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -L deterministic -j 2 --output-on-failure -T memcheck # -R afqmc + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc -j 2 --output-on-failure -T memcheck # -R afqmc needs: ["openmpi"] qmcpack-cuda-runner: - allow_failure: true + allow_failure: false image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 tags: - nvidia-docker @@ -161,25 +225,26 @@ qmcpack-cuda-runner: - git subtree add --squash -P external_codes/mpi3 $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git - cd ../qmcpack - cd build - - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. + - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DQMC_GPU_ARCHS=sm_61 -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. - make -j4 afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure needs: ["openmpi-cuda-11", "qmcpack-openmpi"] inq-openmpi: stage: test + image: debian:testing tags: - cpu script: - - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config make git ca-certificates wget + - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libboost-iostreams-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config python3-dev make git ca-certificates wget - cmake --version - git clone https://gitlab.com/npneq/inq.git --recurse-submodules - cd inq - cd external_libs/mpi3 - - git checkout $CI_COMMIT_BRANCH + - git checkout $CI_COMMIT_BRANCH # check that multi repo is mirrored correctly from this repo to the submodule repo (npneq) - cd ../.. - mkdir build && cd build - - ../configure --prefix=$HOME --disable-debug + - cmake .. --install-prefix=$HOME -DCMAKE_BUILD_TYPE=Release - make --jobs=2 || make VERBOSE=1 - make install - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure @@ -215,14 +280,14 @@ openmpi-cuda-11: # - sh cmake-3.21.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake # - ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake -inq-cuda-11-openmpi-compileonly: +inq-cuda-11-openmpi: stage: build allow_failure: true image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 tags: - nvidia-docker script: - - apt-get update && apt-get install --no-install-recommends -y --quiet cmake libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ pkg-config make git ca-certificates wget + - apt-get update && apt-get install --no-install-recommends -y --quiet cmake git libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev ca-certificates gfortran g++ make pkg-config python3-dev wget - cmake --version - git clone https://gitlab.com/npneq/inq.git --recurse-submodules - cd inq @@ -231,9 +296,11 @@ inq-cuda-11-openmpi-compileonly: - cd ../.. - mkdir build && cd build - /usr/local/cuda-11/bin/nvcc -V - - CUDA_ARCH_OVERRIDE=1 ../configure --prefix=$HOME --enable-cuda --with-cuda-prefix=/usr/local/cuda --pass-thru -DCMAKE_CUDA_COMPILER=/usr/local/cuda-11/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=61 - - make silicon --jobs=2 + - export MPI_OVERSUBSCRIBE="--oversubscribe" + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. --install-prefix=$HOME -DENABLE_CUDA=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=80 + - make --jobs=2 # silicon - make install - - ctest -R silicon + - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure # --jobs=2 # -R silicon + needs: - openmpi-cuda-11 diff --git a/external_codes/mpi_wrapper/mpi3/CMakeLists.txt b/external_codes/mpi_wrapper/mpi3/CMakeLists.txt index 74f0a7cbd5..7387aa2f6b 100644 --- a/external_codes/mpi_wrapper/mpi3/CMakeLists.txt +++ b/external_codes/mpi_wrapper/mpi3/CMakeLists.txt @@ -1,30 +1,49 @@ -# -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- cmake_minimum_required(VERSION 3.16) project( - mpi3 - VERSION 0.79.0 - DESCRIPTION "B-MPI3 is a C++ library wrapper for version 3.1 of the MPI standard interface that simplifies the utilization and maintenance of MPI code." - HOMEPAGE_URL "https://gitlab.com/correaa/boost-mpi3" - LANGUAGES CXX -) + bmpi3 + VERSION 0.79.0 + DESCRIPTION + "B-MPI3 is a C++ library wrapper for version 3.1 of the MPI standard interface that simplifies the utilization and maintenance of MPI code." + HOMEPAGE_URL "https://gitlab.com/correaa/boost-mpi3" + LANGUAGES CXX) -include(GNUInstallDirs) +find_package(MPI REQUIRED COMPONENTS CXX) # C) # might need to `module load + # mpi` add_library(${PROJECT_NAME} INTERFACE) -target_include_directories(${PROJECT_NAME} INTERFACE $ $ $) - target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17) -#set_target_properties(${PROJECT_NAME} PROPERTIES CXX_EXTENSIONS OFF) - -# this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/ +target_include_directories( + ${PROJECT_NAME} + INTERFACE $ + $ + $) +target_link_libraries(${PROJECT_NAME} INTERFACE MPI::MPI_CXX) + +# ~~~ +# to use this project directly from CMake +# FetchContent_Declare( +# bmpi3 +# GIT_REPOSITORY git@gitlab.com:correaa/boost-mpi3.git # https://gitlab.com/correaa/boost-mpi3.git +# GIT_TAG master) +# FetchContent_MakeAvailable(bmpi3) +# add_executable(main main.cpp) +# target_link_libraries(main PUBLIC bmpi3) +# ~~~ + +# this makes CM FetchContent friendly +# https://www.foonathan.net/2022/06/cmake-fetchcontent/ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - return() + return() endif() +include(GNUInstallDirs) + include(CTest) enable_testing() +add_subdirectory(include/mpi3/adaptors/fftw) + add_subdirectory(test) diff --git a/external_codes/mpi_wrapper/mpi3/README.md b/external_codes/mpi_wrapper/mpi3/README.md index e949010eaa..d820372ee1 100644 --- a/external_codes/mpi_wrapper/mpi3/README.md +++ b/external_codes/mpi_wrapper/mpi3/README.md @@ -18,8 +18,7 @@ B-MPI3 also provides allocators and facilities to manipulate MPI-mediated Remote For example, pointers are not utilized directly and it is replaced by an iterator-based interface and most data, in particular custom type objects are serialized automatically into messages by the library. B-MPI3 interacts well with the C++ standard library, containers and custom data types (classes). -B.MPI3 is written from [scratch](https://octo-repo-visualization.vercel.app/?repo=llnl%2Fb-mpi3) in C++17 and it has been tested with many MPI library implementations and compilers, OpenMPI +1.9, MPICH +3.2.1, MVAPICH or Spectrum MPI, using the following compilers gcc +5.4.1, clang +6.0, PGI 18.04. -(Any standard compliant MPI library can be used.) +B.MPI3 is written from [scratch](https://octo-repo-visualization.vercel.app/?repo=llnl%2Fb-mpi3) in C++17 and it has been tested with many standard compliant MPI library implementations and compilers, OpenMPI +1.9, MPICH +3.2.1, MVAPICH, Spectrum MPI, and [ExaMPI](https://github.com/tonyskjellum/ExaMPI), using the following compilers gcc +5.4.1, clang +6.0, PGI 18.04. B.MPI3 is not an official Boost library, but is designed following the principles of Boost and the STL. B.MPI3 is not a derivative of Boost.MPI and it is unrelated to the, [now deprecated](https://web.archive.org/web/20170421220544/http://blogs.cisco.com/performance/the-mpi-c-bindings-what-happened-and-why/), official MPI-C++ interface. @@ -81,35 +80,46 @@ It turns out that this interface was a very minimal change over the C version, a The B.MPI3 library was designed to use simultaneously (interleaved) with the standard C interface of MPI. In this way, changes to existing code can be made incrementally. -## Installation +## Usage + +The library is "header-only"; no separate compilation or configuration is necessary after downloading the library. + + +```cpp +git clone https://gitlab.com/correaa/boost-mpi3.git +``` + +It requires an MPI distribution (e.g. OpenMPI or MPICH2), a C++14 compiler and Boost libraries installed. +In a system such as Ubuntu or Fedora, the dependencies can by installed by `sudo apt install g++ libmpich-dev libboost-test-dev ` or `sudo dnf install gcc-c++ boost-devel openmpi-devel mpich-devel`. -The library is "header-only"; no separate compilation is necessary. -In order to compile it requires an MPI distribution (e.g. OpenMPI or MPICH2) and the corresponding compiler-wrapper (`mpic++` or `mpicxx`). -This library requires C++14 and the Boost library installed. A typical compilation/run command looks like this: ```bash -$ mpic++ -std=c++14 -O3 mpi3/test/communicator_send.cpp -o communicator_send.x -lboost_serialization +$ mpic++ communicator_send.cpp -o communicator_send.x -lboost_serialization $ mpirun -n 8 ./communicator_send.x ``` -In a system such as Red Hat, the dependencies can by installed by +Alternatively, the library can be fetched on demand by the CMake project: -```bash -dnf install gcc-c++ boost-devel openmpi-devel mpich-devel +```cmake +include(FetchContent) +FetchContent_Declare(bmpi3 GIT_REPOSITORY https://gitlab.com/correaa/boost-mpi3.git) # or git@gitlab.com:correaa/boost-mpi3.git +FetchContent_MakeAvailable(bmpi3) + +target_link_libraries(your_executable PRIVATE bmpi3) ``` -Some systems require loading the MPI module before compiling and using MPI programs, `module load mpi/mpich`. +Some systems require loading the MPI module before compiling or using MPI programs, `module load mpi` (or `mpich`). -The library is tested frequently against `openmpi` and `mpich`, and less frequently with `mvapich2`. +The library is tested frequently against `openmpi` and `mpich` implementations of MPI. ## Testing The library has a basic `ctest` based testing system. ```bash -# module load mpi/mpich # or mpi/openmpi , needed in systems like Fedora -cd mpi3/test +# module load mpi/mpich # or mpi/openmpi # needed in systems like Fedora +cd mpi3 mkdir build && cd build cmake .. cmake --build .. @@ -119,7 +129,7 @@ ctest ## Initialization Like MPI, B.MPI3 requires some global library initialization. -The library includes a convenience `mpi3/main.hpp` which wraps around this initialization steps and *simulates* a main function. +The library includes a convenience header `mpi3/main.hpp`, which provides a "main" function that does this initialization. In this way, a parallel program looks very much like normal programs, except that the main function has a third argument with the default global communicator passed in. ```cpp @@ -129,10 +139,9 @@ In this way, a parallel program looks very much like normal programs, except tha #include namespace mpi3 = boost::mpi3; -using std::cout; -int mpi3::main(int argc, char* argv[], mpi3::communicator world){ - if(world.rank() == 0) cout << mpi3::version() << '\n'; +int mpi3::main(int argc, char** argv, mpi3::communicator world) { + if(world.rank() == 0) {std::cout << mpi3::version() << '\n';} return 0; } ``` diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw.hpp new file mode 100644 index 0000000000..395f4a2271 --- /dev/null +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw.hpp @@ -0,0 +1,455 @@ +// Copyright 2019-2024 Alfredo A. Correa + +#include + +#include + +#include + +#include +#include + +namespace boost::mpi3::fftw { + +struct environment { + environment() { fftw_mpi_init(); } + + environment(environment const&) = delete; + environment(environment&&) = delete; + + environment& operator=(environment const&) = delete; + environment& operator=(environment&&) = delete; + + ~environment() { fftw_mpi_cleanup(); } + + template + static auto local_size_2d(Args... args) { return fftw_mpi_local_size_2d(args...); } + + template + static auto local_size_many(Args... args) { return fftw_mpi_local_size_many(args...); } +}; + +template +using default_allocator = + // std::allocator + boost::multi::fftw::allocator + // boost::mpi3::allocator + ; + +struct local_2d { + private: + std::ptrdiff_t n0_ = -1; + std::ptrdiff_t start0_ = -1; + std::ptrdiff_t count_; + boost::mpi3::communicator* handle_; + multi::extensions_t<2> exts_; + + public: + local_2d(multi::extensions_t<2> const& exts, boost::mpi3::communicator& comm) + : count_{environment::local_size_2d(std::get<0>(exts).size(), std::get<1>(exts).size(), &comm, &n0_, &start0_)}, handle_{&comm}, exts_{exts} {} + + auto count() const { return count_; } + auto extension() const { return multi::extension_t{start0_, start0_ + n0_}; } + auto comm() const -> communicator& { return *handle_; } + auto global_extensions() const { return exts_; } + auto static block() {return FFTW_MPI_DEFAULT_BLOCK;} +}; + +struct local_2d_many { + private: + std::ptrdiff_t n0_ = -1; + std::ptrdiff_t start0_ = -1; + std::ptrdiff_t count_; + + boost::mpi3::communicator* handle_; + multi::extensions_t<2> exts_; + std::ptrdiff_t block_; + + public: + local_2d_many(multi::extensions_t<2> const& exts, boost::mpi3::communicator& comm) : handle_{&comm}, exts_{exts}, block_{FFTW_MPI_DEFAULT_BLOCK} { + count_ = environment::local_size_many(2, std::array{std::get<0>(exts).size(), std::get<1>(exts).size()}.data(), /*howmany*/ 1, FFTW_MPI_DEFAULT_BLOCK /*std::get<0>(ext).size()*/, &comm, &n0_, &start0_); + } + local_2d_many(multi::extensions_t<2> const& exts, boost::mpi3::communicator& comm, std::ptrdiff_t block) : handle_{&comm}, exts_{exts}, block_{block} { + count_ = environment::local_size_many(2, std::array{std::get<0>(exts).size(), std::get<1>(exts).size()}.data(), /*howmany*/ 1, block, &comm, &n0_, &start0_); + } + + auto count() const { return count_; } + auto extension() const { return multi::extension_t{start0_, start0_ + n0_}; } + auto comm() const -> communicator& { return *handle_; } + auto global_extensions() const { return exts_; } + auto block() const {return block_;} +}; + +template< + class T, + boost::multi::dimensionality_type D, + class LocalLayout = local_2d, + class Alloc = default_allocator> +class array; + +template< + class T, + boost::multi::dimensionality_type D, + class Alloc = default_allocator> +class unbalanced_array; + +// namespace bmpi3 = boost::mpi3; + +template +class array { + LocalLayout local_layout_; + + Alloc alloc_; + + boost::multi::array_ptr::pointer> local_ptr_; + + public: + using element_type = T; + + + array(multi::extensions_t<2> exts, boost::mpi3::communicator& comm, Alloc alloc = Alloc{}) + : alloc_{alloc}, + local_layout_(exts, comm), + local_ptr_{alloc_.allocate(local_layout_.count()), multi::extensions_t<2>(local_layout_.extension(), std::get<1>(exts))} {} + + array(multi::extensions_t<2> exts, element_type const& e, boost::mpi3::communicator& comm, Alloc alloc = Alloc{}) + : alloc_{alloc}, + local_layout_(exts, comm), + local_ptr_{alloc_.allocate(local_layout_.count()), multi::extensions_t<2>(local_layout_.extension(), std::get<1>(exts))} { + std::uninitialized_fill_n(local_ptr_.base(), local_ptr_->num_elements(), e); + } + + array(array const&) = delete; + array(array&&) = delete; + + auto operator=(array const&) -> array& = delete; + auto operator=(array&&) -> array& = delete; + + boost::multi::array_ref local_cutout() & { return *local_ptr_; } + boost::multi::array_cref local_cutout() const& { return *local_ptr_; } + + auto local_layout() const { return local_layout_; } + + ptrdiff_t local_count() const& { return local_layout_.count(); } + + // auto extensions() const& { return multi::extensions_t<2>{local_layout_.n0_, std::get<1>(local_cutout().extensions())}; } + + // ptrdiff_t num_elements() const& { return multi::layout_t<2>(extensions()).num_elements(); } + + template + static auto from_scatter(Array const& snd) -> array { + array ret(snd.extensions()); + ret.scatter(snd); + return ret; + } + + // template + // void scatter(Array const& snd) & { + // auto& comm = reinterpret_cast(handle_); + + // auto const sendcounts = comm |= static_cast(local_cutout().num_elements()); + // auto const displs = comm |= static_cast(snd[local_cutout().extension().front()].base() - snd.base()); + + // MPI_Scatterv( + // snd.base(), sendcounts.data(), displs.data(), MPI_DOUBLE_COMPLEX, + // local_cutout().base(), local_cutout().num_elements(), MPI_DOUBLE_COMPLEX, + // 0, &comm + // ); + // } + + // auto communicator() const -> boost::mpi3::communicator& { + // return const_cast(reinterpret_cast(handle_)); + // } + + // template + // void all_gather(Array&& rcv) const& { + // assert(rcv.extensions() == extensions()); + + // auto& comm = const_cast(reinterpret_cast(handle_)); + + // auto const recvcounts = comm |= static_cast(local_cutout().num_elements()); + // auto const displs = comm |= static_cast(rcv[local_cutout().extension().front()].base() - rcv.base()); + + // MPI_Allgatherv( + // local_cutout().base(), local_cutout().num_elements(), MPI_DOUBLE_COMPLEX, + // rcv.base(), + // recvcounts.data(), displs.data(), MPI_DOUBLE_COMPLEX, + // handle_ + // ); + // } + + // template + // explicit operator multi::array() const& { + // multi::array ret(extensions()); + // all_gather(ret); + // return ret; + // } + + auto extensions() const { return local_layout_.global_extensions(); } + + template + array& operator=(array const& other) { + int P = -1; + MPI_Comm_size(&local_layout_.comm(), &P); + fftw_plan plan = fftw_mpi_plan_many_transpose( + 6, 6, /*howmany*/ 2 /*2 for complex*/, 1, 1, + const_cast(reinterpret_cast(other.local_cutout().base())), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(this->local_cutout().base()), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + &local_layout_.comm(), + FFTW_ESTIMATE + ); + fftw_execute(plan); + fftw_destroy_plan(plan); + return *this; + } + + // array& operator=(multi::array const& other) & { + // if(other.extensions() == extensions()) + // local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions())); + // else { + // array tmp{other}; + // std::swap(*this, tmp); + // } + // return *this; + // } + // bool operator==(multi::array const& other) const&{ + // if(other.extensions() != extensions()) return false; + // return comm_&=(local_cutout() == other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()))); + // } + // friend bool operator==(multi::array const& other, array const& self){ + // return self.operator==(other); + // } + // bool operator==(array const& other) const&{assert(comm_==other.comm_); + // return comm_&=(local_cutout() == other.local_cutout()); + // } + // array& operator=(array const& other)&{ + // if(other.extensions() == this->extensions() and other.comm_ == other.comm_) + // local_cutout() = other.local_cutout(); + // else assert(0); + // return *this; + // } + ~array() { alloc_.deallocate(local_cutout().base(), local_layout_.count()); } + + template + void all_gather(Array&& rcv) const& { + assert(rcv.extensions() == extensions()); + + auto const recvcounts = local_layout_.comm() |= static_cast(local_cutout().num_elements()); + auto const displs = local_layout_.comm() |= static_cast(rcv[local_cutout().extension().front()].base() - rcv.base()); + + MPI_Allgatherv( + local_cutout().base(), local_cutout().num_elements(), MPI_DOUBLE_COMPLEX, + rcv.base(), + recvcounts.data(), displs.data(), MPI_DOUBLE_COMPLEX, + &local_layout_.comm() + ); + } + + template + explicit operator multi::array() const& { + multi::array ret(extensions()); + all_gather(ret); + return ret; + } +}; + +template +auto scatter(Array const& arr) { + return array::from_scatter(arr); +} + +template +auto dft_forward(MPIArrayIn const& A, MPIArrayOut& B) -> MPIArrayOut& { + assert( &A.local_layout().comm() == &B.local_layout().comm() ); + + // fftw_plan p = fftw_mpi_plan_dft_2d( + // std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size(), + // (fftw_complex*)A.local_cutout().base(), (fftw_complex*)B.local_cutout().base(), + // &A.local_layout().comm(), + // FFTW_FORWARD, FFTW_ESTIMATE + // ); + + fftw_plan p = fftw_mpi_plan_many_dft( + 2, std::array{std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size()}.data(), + 1, + A.local_layout().block(), B.local_layout().block(), // FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + const_cast(reinterpret_cast(A.local_cutout().base())), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) + reinterpret_cast(B.local_cutout().base()) , // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + &A.local_layout().comm(), + FFTW_FORWARD, FFTW_ESTIMATE + ); + fftw_execute(p); + fftw_destroy_plan(p); + + // // B = BT; + + // fftw_plan t = fftw_mpi_plan_transpose( + // std::get<0>(A.extensions()).size(), std::get<1>(A.extensions()).size(), + // (double*)(fftw_complex*)BT.local_cutout().data_elements(), (double*)(fftw_complex*)B.local_cutout().data_elements(), + // &A.communicator(), FFTW_ESTIMATE); + // fftw_execute(t); + // fftw_destroy_plan(t); + + return B; +} + +#if 0 +template +class array { + boost::mpi3::communicator* commhandle_; + + Alloc alloc_; + + class local_2d_type { + std::ptrdiff_t n0_ = -1; + std::ptrdiff_t start0_ = -1; + std::ptrdiff_t count_; + + public: + local_2d_type(multi::extensions_t<2> const& ext, boost::mpi3::communicator& comm) + : count_{environment::local_size_2d(std::get<0>(ext).size(), std::get<1>(ext).size(), &comm, &n0_, &start0_)} {} + + auto count() const { return count_; } + auto extension() const { return multi::extension_t{start0_, start0_ + n0_}; } + } local_; + + public: + using element_type = T; + using element_ptr = typename std::allocator_traits::pointer; + + private: + boost::multi::array_ptr local_ptr_; + + public: + auto local_count() const {return local_.count();} + + array(multi::extensions_t<2> ext, element_type const& e, boost::mpi3::communicator& comm, Alloc alloc = Alloc{}) + : commhandle_{&comm}, + alloc_{alloc}, + local_{ext, comm}, + local_ptr_{alloc_.allocate(local_.count()), multi::extensions_t<2>(local_.extension(), std::get<1>(ext))} { + std::uninitialized_fill(local_ptr_->elements().begin(), local_ptr_->elements().end(), e); // TODO(correaa) use adl_uninit_fill or uninitialized_fill member + // std::uninitialized_fill_n(local_ptr_->base(), local_ptr_->num_elements(), e); + } + + array(array const&); + + boost::multi::array_ref local() & { return *local_ptr_; } + boost::multi::array_cref local() const& { return *local_ptr_; } + + boost::multi::array_cref clocal() const { return local(); } + + // template + // void scatter(Array const& snd) & { + // auto& comm = reinterpret_cast(handle_); + + // auto const sendcounts = comm |= static_cast(local_cutout().num_elements()); + // auto const displs = comm |= static_cast(snd[local_cutout().extension().front()].base() - snd.base()); + + // MPI_Scatterv( + // snd.base(), sendcounts.data(), displs.data(), MPI_DOUBLE_COMPLEX, + // local_cutout().base(), local_cutout().num_elements(), MPI_DOUBLE_COMPLEX, + // 0, &comm + // ); + // } + + // auto communicator() const -> boost::mpi3::communicator& { + // return const_cast(reinterpret_cast(handle_)); + // } + + // template + // void all_gather(Array&& rcv) const& { + // assert(rcv.extensions() == extensions()); + + // auto& comm = const_cast(reinterpret_cast(handle_)); + + // auto const recvcounts = comm |= static_cast(local_cutout().num_elements()); + // auto const displs = comm |= static_cast(rcv[local_cutout().extension().front()].base() - rcv.base()); + + // MPI_Allgatherv( + // local_cutout().base(), local_cutout().num_elements(), MPI_DOUBLE_COMPLEX, + // rcv.base(), + // recvcounts.data(), displs.data(), MPI_DOUBLE_COMPLEX, + // handle_ + // ); + // } + + // template + // explicit operator multi::array() const& { + // multi::array ret(extensions()); + // all_gather(ret); + // return ret; + // } + + // array& operator=(multi::array const& other) & { + // if(other.extensions() == extensions()) + // local_cutout() = other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions())); + // else { + // array tmp{other}; + // std::swap(*this, tmp); + // } + // return *this; + // } + // // bool operator==(multi::array const& other) const&{ + // // if(other.extensions() != extensions()) return false; + // // return comm_&=(local_cutout() == other.stenciled(std::get<0>(local_cutout().extensions()), std::get<1>(local_cutout().extensions()))); + // // } + // // friend bool operator==(multi::array const& other, array const& self){ + // // return self.operator==(other); + // // } + // // bool operator==(array const& other) const&{assert(comm_==other.comm_); + // // return comm_&=(local_cutout() == other.local_cutout()); + // // } + // // array& operator=(array const& other)&{ + // // if(other.extensions() == this->extensions() and other.comm_ == other.comm_) + // // local_cutout() = other.local_cutout(); + // // else assert(0); + // // return *this; + // // } + ~array() { alloc_.deallocate(local().base(), local_.count()); } +}; +#endif + +template +class unbalanced_array { + boost::mpi3::communicator* commhandle_; + + Alloc alloc_; + + class local_2d_type { + std::ptrdiff_t n0_ = -1; + std::ptrdiff_t start0_ = -1; + std::ptrdiff_t count_; + + public: + local_2d_type(multi::extensions_t<2> const& ext, boost::mpi3::communicator& comm) + : count_{environment::local_size_2d(std::get<0>(ext).size(), std::get<1>(ext).size(), &comm, &n0_, &start0_)} {} + + auto count() const { return count_; } + auto extension() const { return multi::extension_t{start0_, start0_ + n0_}; } + } local_; + + public: + using element_type = T; + using element_ptr = typename std::allocator_traits::pointer; + + private: + boost::multi::array_ptr local_ptr_; + + public: + unbalanced_array(multi::extensions_t<2> ext, element_type const& e, boost::mpi3::communicator& comm, Alloc alloc = Alloc{}) + : commhandle_{&comm}, + alloc_{alloc}, + local_{ext, *commhandle_}, + local_ptr_{alloc_.allocate(local_.count()), multi::extensions_t<2>(local_.extension(), std::get<1>(ext))} { + std::uninitialized_fill(local_ptr_->elements().begin(), local_ptr_->elements().end(), e); // TODO(correaa) use adl_uninit_fill or uninitialized_fill member + // std::uninitialized_fill_n(local_ptr_->base(), local_ptr_->num_elements(), e); + } + + boost::multi::array_ref local() & { return *local_ptr_; } + boost::multi::array_cref local() const& { return *local_ptr_; } + + boost::multi::array_cref clocal() const { return local(); } +}; + +} // namespace boost::mpi3::fftw diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/CMakeLists.txt b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/CMakeLists.txt new file mode 100644 index 0000000000..981ab57808 --- /dev/null +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/CMakeLists.txt @@ -0,0 +1,18 @@ +if(PKG_CONFIG_FOUND) + pkg_search_module( + FFTW + fftw3 + IMPORTED_TARGET + ) + if(FFTW_FOUND) + add_subdirectory(test) + else() + message(WARNING "Cannot find FFTW, FFTW-adaptor will not be tested. If you want this feature install MPI FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev libfftw3-mpi-dev" + "\n sudo dnf install fftw-devel # in Fedora") + endif() +else() + message(WARNING "Cannot find FFTW, FFTW-adaptor will not be tested. If you want this feature install MPI FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev libfftw3-mpi-dev" + "\n sudo dnf install fftw-devel # in Fedora") +endif() diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/CMakeLists.txt b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/CMakeLists.txt new file mode 100644 index 0000000000..ef3f0e498e --- /dev/null +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/CMakeLists.txt @@ -0,0 +1,42 @@ +find_package(PkgConfig) + +set(CMAKE_CXX_EXTENSIONS OFF) + +if(PKG_CONFIG_FOUND) + pkg_search_module( + FFTW + fftw3 + IMPORTED_TARGET + ) + if(FFTW_FOUND) + # include_directories(PkgConfig::FFTW) this seems to be always incorrect + link_libraries(PkgConfig::FFTW) + + include_directories(${CMAKE_BINARY_DIR}) + + #add_subdirectory(test) + else() + message(WARNING "Cannot find FFTW, FFTW-adaptor will not be tested. If you want this feature install FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev" + "\n sudo dnf install fftw-devel # in Fedora") + endif() +else() + message(WARNING "Cannot find PkgConfig and/or FFTW, FFTW-adaptor will not be tested. If you want this feature install PkgConfig and FFTW, for example please run:" + "\n sudo apt install pkg-config libfftw3-dev" + "\n sudo dnf install fftw-devel # in Fedora") +endif() + +include(FetchContent) +FetchContent_Declare(multi GIT_REPOSITORY https://gitlab.com/correaa/boost-multi.git) +FetchContent_MakeAvailable(multi) + +set(TEST_SRCS + array_2d.cpp +) + +foreach(TEST_FILE ${TEST_SRCS}) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + target_link_libraries(${TEST_EXE} PRIVATE bmpi3 multi PkgConfig::FFTW fftw3_mpi) + add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) +endforeach() diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/array_2d.cpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/array_2d.cpp new file mode 100644 index 0000000000..7c54b8927f --- /dev/null +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/adaptors/fftw/test/array_2d.cpp @@ -0,0 +1,167 @@ +// Copyright 2019-2024 Alfredo A. Correa + +#include +#include +#include + +#include // for std::norm + +template auto power(M const& elem) -> decltype(std::norm(elem)) { return std::norm(elem); } + +template= 1)>> // DELETE((M::rank::value < 1))> +auto power(M const& array) { + return accumulate(begin(array), end(array), 0.0, [](auto const& alpha, auto const& omega) { return alpha + power(omega); }); +} + +struct sum_power { + template auto operator()(A const& alpha, B const& omega) const { return alpha + power(omega); } +}; + +// template +// void chop(Array&& arr) { +// std::replace_if(arr.elements().begin(), arr.elements().end(), [](auto const& e) {std::fabs(e) < 1.0e-30}, 0.0); +// // for(auto& e : arr.elements()) { +// // if(std::fabs(e) < 1.0e-30) { +// // e = 0.0; +// // } +// // } +// } + +template +void mpi_fill(Array&& arr) { + auto [is, js] = arr.local_cutout().extensions(); + std::for_each(is.begin(), is.end(), [&, js = js](auto i) { + std::for_each(js.begin(), js.end(), [&](auto j) { + arr.local_cutout()[i][j] = std::complex{static_cast(i + j), static_cast(i + 2 * j)}; + }); + }); +} + +template +void mpi_print(Array const& arr, boost::mpi3::communicator& comm, std::string const& /*msg*/ = "") { + boost::mpi3::ostream ccout{comm, std::cout}; + + ccout << "rank=" << comm.rank() << " count=" << arr.local_count() << '\n'; + auto [is, js] = arr.local_cutout().extensions(); + std::for_each(is.begin(), is.end(), [&, js = js](auto i) { + std::for_each(js.begin(), js.end(), [&](auto j) { + ccout << arr.local_cutout()[i][j] << " "; + }); + ccout << '\n'; + }); +} + +namespace mpi3 = boost::mpi3; + +auto mpi3::main(int /*argc*/, char** /*argv*/, boost::mpi3::environment& env) -> int try { + auto world = env.world(); + + boost::mpi3::fftw::environment fftwenv; + + boost::mpi3::fftw::array, 2, boost::mpi3::fftw::local_2d> G({6, 6}, 0.0, world); + mpi_fill(G); + + boost::mpi3::fftw::array, 2, boost::mpi3::fftw::local_2d> F({6, 6}, 0.0, world); + dft_forward(G, F); + + boost::multi::array, 2> const g{G}; + boost::multi::array, 2> f(g.extensions()); + + boost::multi::fftw::dft_forward({true, true}, g, f); + + boost::multi::array, 2> const ff{F}; + assert( ff == f ); + + + // boost::mpi3::fftw::array, 2, boost::mpi3::fftw::local_2d_many> G_many({6, 6}, 0.0, world); + + // mpi_print(G_many, world); + + // dft(G, G_many); + + // mpi_print(G_many, world); + +// G_many = G; + + if(world.rank() == 0) { + // assert( G_many.local_cutout()[2][2] == std::complex(4.0, 6.0) ); + } + + // multi::array, 2> g{G}; + + // if(world.rank() == 0) { + // std::cout << "gathered power " << power(g) << std::endl; + // auto [is, js] = g.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // std::cout << g[i][j] << ","; + // } + // std::cout << std::endl; + // } + // } + world.barrier(); + + // boost::mpi3::fftw::array, 2> F({6, 6}, 0.0, world); + // F.scatter(g); + + // multi::array, 2> f{F}; + // assert(g == f); + + // auto F2 = boost::mpi3::fftw::array, 2>::from_scatter(g); +// auto F3 = boost::mpi3::scatter(g); + +// multi::array, 2> f3{F3}; + + // assert(f3 == g); + + // multi::array, 2> g_transformed(g.extensions()); + // boost::multi::fftw::dft_forward({true, true}, g, g_transformed); + + // if(world.rank() == 0) { + // std::cout << "g_transformed power " << power(g_transformed) / g_transformed.num_elements() << std::endl; + // auto [is, js] = g_transformed.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // std::cout << g_transformed[i][j] << ","; + // } + // std::cout << std::endl; + // } + // } + // world.barrier(); + + // multi::fftw::mpi::array, 2> G_transformed({6, 6}, 0.0, &world); + // boost::multi::fftw::mpi::dft_forward(G, G_transformed); + // chop(G_transformed); + + // boost::multi::fftw::mpi::dft_forward(G, G); + // chop(G); + + // mpi_print(G, world, "G_transformed"); + + // multi::array, 2> g_mpi_transformed{G}; + // chop(g_mpi_transformed); + + // if(world.rank() == 0) { + // std::cout << "g_mpi_transformed power " << power(g_mpi_transformed) / g_mpi_transformed.num_elements() << std::endl; + // auto [is, js] = g_mpi_transformed.extensions(); + // for(auto i : is) { + // for(auto j : js) { + // std::cout << g_mpi_transformed[i][j] << ","; + // } + // std::cout << std::endl; + // } + // } + // world.barrier(); + + // assert(g_mpi_transformed == g_transformed); + + // world.barrier(); + // mpi_print(G, world, "G transformed already"); + + // world.barrier(); + // mpi_print(G_transformed, world, "G_transformed"); + + return 0; +} catch(...) { + return 1; +} diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp index 9fd4a52208..fe8348ff5e 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp @@ -1,4 +1,4 @@ -/* -*- indent-tabs-mode: t -*- */ +// Copyright 2018-2024 Alfredo A. Correa #ifndef BOOST_MPI3_ALLOCATOR_HPP #define BOOST_MPI3_ALLOCATOR_HPP @@ -17,13 +17,21 @@ struct /*__attribute__((aligned(0)))*/ bad_alloc : std::bad_alloc{using std::bad inline void* malloc(mpi3::size_t size) { void* ret; // NOLINT(cppcoreguidelines-init-variables) delayed init +#if not defined(EXAMPI) int const s = MPI_Alloc_mem(size, MPI_INFO_NULL, &ret); if(s != MPI_SUCCESS) {return nullptr;} //s throw bad_alloc();//"cannot allocate " + std::to_string(size) + " bytes"); +#else + ret = std::malloc(size); +#endif return ret; } inline void free(void* ptr){ +#if not defined(EXAMPI) MPI_(Free_mem)(ptr); +#else + std::free(ptr); +#endif } template @@ -37,14 +45,14 @@ struct /*__attribute__((aligned(0)))*/ allocator{ // cppcheck-suppress noExplicitConstructor template allocator(allocator const&/*other*/) {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : allocator convention - auto allocate(size_type n) { + inline auto allocate(size_type n) { if(void* ptr = mpi3::malloc(n * static_cast(sizeof(T)))) { return static_cast(ptr); } throw bad_alloc(); } void deallocate(pointer p, std::size_t /*size*/) { mpi3::free(p); } - static size_type max_size() { return std::numeric_limits::max(); } + static inline size_type max_size() { return std::numeric_limits::max(); } }; template @@ -87,42 +95,42 @@ constexpr std::add_const_t& as_const(T& t) noexcept{return t;} //int mpi3::main(int argc, char* argv[], mpi3::communicator world){ -// std::vector> v(1000000); -// std::vector> uv(1000000); -// std::iota(v.begin(), v.end(), 0.); -// using boost::mpi3::data; -// assert( data(uv.begin()) == &*uv.begin() ); -// assert( std::accumulate(v.begin(), v.end(), 0.) == (v.size()*(v.size() - 1))/2 ); -// return 0; -// -// { -// boost::container::flat_set, mpi3::allocator > fs; -// fs.insert(5.); -// fs.insert(3.); -// auto it = fs.begin(); -// assert(*it == 3.); -// ++it; -// assert(*it == 5.); -// } -// { -// boost::container::flat_set, std::allocator_traits>::rebind_alloc> fs; -// fs.insert(5); -// fs.insert(3); -// auto it = fs.begin(); -// assert(*it == 3); -// ++it; -// assert(*it == 5); -// } -// { -// boost::container::flat_set, std::less>, mpi3::allocator>> fsp; -// fsp.insert({1.,2.}); -// fsp.insert({3.,4.}); -// auto it = fsp.begin(); -// assert(*it == std::make_pair(1.,2.)); -// ++it; -// assert(*it == std::make_pair(3.,4.)); -// } -// return 0; +// std::vector> v(1000000); +// std::vector> uv(1000000); +// std::iota(v.begin(), v.end(), 0.); +// using boost::mpi3::data; +// assert( data(uv.begin()) == &*uv.begin() ); +// assert( std::accumulate(v.begin(), v.end(), 0.) == (v.size()*(v.size() - 1))/2 ); +// return 0; +// +// { +// boost::container::flat_set, mpi3::allocator > fs; +// fs.insert(5.); +// fs.insert(3.); +// auto it = fs.begin(); +// assert(*it == 3.); +// ++it; +// assert(*it == 5.); +// } +// { +// boost::container::flat_set, std::allocator_traits>::rebind_alloc> fs; +// fs.insert(5); +// fs.insert(3); +// auto it = fs.begin(); +// assert(*it == 3); +// ++it; +// assert(*it == 5); +// } +// { +// boost::container::flat_set, std::less>, mpi3::allocator>> fsp; +// fsp.insert({1.,2.}); +// fsp.insert({3.,4.}); +// auto it = fsp.begin(); +// assert(*it == std::make_pair(1.,2.)); +// ++it; +// assert(*it == std::make_pair(3.,4.)); +// } +// return 0; //} //#endif diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp index 8ace96f784..55667de61e 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp @@ -1,4 +1,3 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa #ifndef BOOST_MPI3_CARTESIAN_COMMUNICATOR_HPP @@ -32,7 +31,7 @@ struct cartesian_communicator : communicator { assert(s.size() == p.size()); using dimensionality_type = int; MPI_(Cart_create)(comm_old.get(), static_cast(s.size()), s.data(), p.data(), /*reorder*/ true, &impl_); - // assert(impl_ != MPI_COMM_NULL); // null communicator is a valid outcome + // assert(impl_ != MPI_COMM_NULL); // null communicator is a valid outcome // TODO(correaa) try with mpich, WAS: there is an bug in mpich, in which if the remaining dim are none then the communicator is not well defined. } @@ -46,25 +45,31 @@ struct cartesian_communicator : communicator { cartesian_communicator(communicator& comm_old, std::initializer_list shape, std::initializer_list period) : cartesian_communicator(comm_old, std::vector(shape), std::vector(period)) {} - [[deprecated("use dimensionality() instead of dimension")]] int dimension() const { - int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init - MPI_Cartdim_get(impl_, &ret); + +#if !defined(EXAMPI) + [[deprecated("use dimensionality() instead of dimension")]] + int dimension() const { + int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init // TODO(correaa) + MPI_(Cartdim_get)(impl_, &ret); return ret; } +#endif cartesian_communicator& operator=(cartesian_communicator const&) = delete; - cartesian_communicator& operator=(cartesian_communicator&&) = default; + cartesian_communicator& operator=(cartesian_communicator&&) = default; // NOLINT(clang-diagnostic-deprecated-declarations) TODO(correaa) // vvv nvcc 11 workaround, needs explicit definition of duplicate assigment [[deprecated]] cartesian_communicator& operator=(cartesian_communicator& other) { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) "duplicate" assignment if(this == std::addressof(other)) { return *this; } // lints cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator - communicator::operator=(other); + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // communicator::operator=(other); return *this; } ~cartesian_communicator() = default; +#if not defined(EXAMPI) int dimensionality() const { int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init MPI_(Cartdim_get)(impl_, &ret); @@ -100,10 +105,13 @@ struct cartesian_communicator : communicator { } std::vector shape() const { return topology().dimensions(); } + std::vector periods() const { auto ps = topology().periods(); return {ps.begin(), ps.end()}; } +#endif + auto num_elements() const { return size(); } template @@ -111,8 +119,10 @@ struct cartesian_communicator : communicator { int rank = -1; MPI_(Cart_rank)(impl_, coord.data(), &rank); return (*this)[rank]; - // return operator[](rank); + // return operator[](rank); } + +#if not defined(EXAMPI) // int MPI_Cart_map not implemented cartesian_communicator sub_aux(std::vector const& remain_dims) { assert(static_cast(remain_dims.size()) == dimensionality()); @@ -131,6 +141,7 @@ struct cartesian_communicator : communicator { remain[0] = 0 /*false*/; return sub_aux(remain); } +#endif }; enum fill_t { @@ -150,24 +161,45 @@ struct cartesian_communicator : cartesian_communicator<> { ~cartesian_communicator() = default; +// #if not defined(EXAMPI) static std::array division(int nnodes, std::array suggest = {}) { MPI_(Dims_create)(nnodes, D, suggest.data()); return suggest; } +// #endif + constexpr static dimensionality_type dimensionality = D; - explicit cartesian_communicator( + cartesian_communicator( communicator& other, - std::array dims = {}, - std::array periods = std::apply([](auto... e) { return std::array{(static_cast(e), true)...}; }, std::array{}) - ) try : cartesian_communicator - <>{other, division(other.size(), dims), std::apply([](auto... e) { return std::array{e...}; }, periods)} {} + std::array dims, + std::array periods + ) + try + : cartesian_communicator<>{ + other, + division(other.size(), dims), + std::apply([](auto... e) { return std::array{e...}; }, periods) + } {} catch(std::runtime_error& e) { std::ostringstream ss; std::copy(dims.begin(), dims.end(), std::ostream_iterator{ss, " "}); throw std::runtime_error{"cannot create cartesian communicator with constrains " + ss.str() + " from communicator of size " + std::to_string(other.size()) + " because " + e.what()}; } + cartesian_communicator( + communicator& other, + std::array dims + ) : cartesian_communicator( + other, + dims, + std::apply([](auto... e) { return std::array{(static_cast(e), true)...}; }, std::array{}) + ) {} + + explicit cartesian_communicator( + communicator& other + ) : cartesian_communicator(other, std::array{}) {} + auto topology() const { struct topology_t { std::array dimensions, periods, coordinates; @@ -182,13 +214,14 @@ struct cartesian_communicator : cartesian_communicator<> { constexpr auto dimensions() const { return topology().dimensions; } cartesian_communicator& operator=(cartesian_communicator const&) = delete; - cartesian_communicator& operator=(cartesian_communicator&&) noexcept = default; + cartesian_communicator& operator=(cartesian_communicator&&) noexcept = default; // NOLINT(clang-diagnostic-deprecated-declarations) TODO(correaa) // vvv nvcc 11 workaround, needs explicit definition of duplicate assigment [[deprecated]] cartesian_communicator& operator=(cartesian_communicator& other) { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) duplicate assignment if(this == std::addressof(other)) { return *this; } // lints cert-oop54-cpp - cartesian_communicator<>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // cartesian_communicator<>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) return *this; } @@ -249,6 +282,7 @@ struct cartesian_communicator : cartesian_communicator<> { using coordinates_type = std::array; using cartesian_communicator<>::rank; +#if not defined(EXAMPI) auto rank(coordinates_type cs) const -> int { auto const ps = periods(); auto const s = shape(); @@ -260,6 +294,7 @@ struct cartesian_communicator : cartesian_communicator<> { } return MPI_(Cart_rank)(impl_, cs.data()); } +#endif auto coordinates(int r) const -> coordinates_type { coordinates_type ret; MPI_(Cart_coords)(impl_, r, D, ret.data()); @@ -309,7 +344,8 @@ struct circular_communicator : cartesian_communicator<1> { if(this == std::addressof(other)) { return *this; } // lints cert-oop54-cpp - cartesian_communicator<1>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // cartesian_communicator<1>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) return *this; } @@ -317,7 +353,10 @@ struct circular_communicator : cartesian_communicator<1> { auto coordinate(int rank) const { return std::get<0>(this->coordinates(rank)); } using cartesian_communicator<1>::rank; + +#if not defined(EXAMPI) auto rank(int coordinate) const { return cartesian_communicator<1>::rank({coordinate}); } +#endif template auto rotate(As... as, int displacement) { return this->send_receive(as..., this->shift<0>(-displacement)); } diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp index 7e70869042..d8bd515ca8 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp @@ -1,4 +1,3 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa #ifndef MPI3_COMMUNICATOR_HPP @@ -108,39 +107,114 @@ namespace mpi3 { #endif // https://www.open-mpi.org/doc/v4.0/man3/MPI_Comm_split_type.3.php#toc8 -enum class communicator_type : int { - shared = MPI_COMM_TYPE_SHARED ,/*synomym*/ node = OMPI_COMM_TYPE_NODE, - hw_thread = OMPI_COMM_TYPE_HWTHREAD, - core = OMPI_COMM_TYPE_CORE , - l1_cache = OMPI_COMM_TYPE_L1CACHE , - l2_cache = OMPI_COMM_TYPE_L2CACHE , - l3_cache = OMPI_COMM_TYPE_L3CACHE , - socket = OMPI_COMM_TYPE_SOCKET , - numa = OMPI_COMM_TYPE_NUMA , - board = OMPI_COMM_TYPE_BOARD , - host = OMPI_COMM_TYPE_HOST , - cu = OMPI_COMM_TYPE_CU ,/*synomym*/ cpu = OMPI_COMM_TYPE_CU , - cluster = OMPI_COMM_TYPE_CLUSTER + +// enum class communicator_type : int { +// shared = MPI_COMM_TYPE_SHARED ,/*synomym*/ node = OMPI_COMM_TYPE_NODE, +// hw_thread = OMPI_COMM_TYPE_HWTHREAD, +// core = OMPI_COMM_TYPE_CORE , +// l1_cache = OMPI_COMM_TYPE_L1CACHE , +// l2_cache = OMPI_COMM_TYPE_L2CACHE , +// l3_cache = OMPI_COMM_TYPE_L3CACHE , +// socket = OMPI_COMM_TYPE_SOCKET , +// numa = OMPI_COMM_TYPE_NUMA , +// board = OMPI_COMM_TYPE_BOARD , +// host = OMPI_COMM_TYPE_HOST , +// cu = OMPI_COMM_TYPE_CU ,/*synomym*/ cpu = OMPI_COMM_TYPE_CU , +// cluster = OMPI_COMM_TYPE_CLUSTER +// }; + +class communicator_type { + int value_; + + public: + constexpr explicit communicator_type(int v) noexcept : value_{v} {} + constexpr explicit operator int() const noexcept { return value_; } + + constexpr bool operator==(communicator_type const& o) const noexcept { return value_ == o.value_; } + constexpr bool operator!=(communicator_type const& o) const noexcept { return value_ != o.value_; } + + static communicator_type const shared ; static communicator_type const /*synomym*/ node; + static communicator_type const hw_thread; + static communicator_type const core ; + static communicator_type const l1_cache ; + static communicator_type const l2_cache ; + static communicator_type const l3_cache ; + static communicator_type const socket ; + static communicator_type const numa ; + static communicator_type const board ; + static communicator_type const host ; + static communicator_type const cu ; static communicator_type const /*synomym*/ cpu; + static communicator_type const cluster ; }; -enum constant { - undefined = MPI_UNDEFINED , - process_null = MPI_PROC_NULL , - any_source = MPI_ANY_SOURCE +inline communicator_type const communicator_type::shared {MPI_COMM_TYPE_SHARED }; inline communicator_type const /*synomym*/ node{OMPI_COMM_TYPE_NODE}; +inline communicator_type const communicator_type::hw_thread{OMPI_COMM_TYPE_HWTHREAD}; +inline communicator_type const communicator_type::core {OMPI_COMM_TYPE_CORE }; +inline communicator_type const communicator_type::l1_cache {OMPI_COMM_TYPE_L1CACHE }; +inline communicator_type const communicator_type::l2_cache {OMPI_COMM_TYPE_L2CACHE }; +inline communicator_type const communicator_type::l3_cache {OMPI_COMM_TYPE_L3CACHE }; +inline communicator_type const communicator_type::socket {OMPI_COMM_TYPE_SOCKET }; +inline communicator_type const communicator_type::numa {OMPI_COMM_TYPE_NUMA }; +inline communicator_type const communicator_type::board {OMPI_COMM_TYPE_BOARD }; +inline communicator_type const communicator_type::host {OMPI_COMM_TYPE_HOST }; +inline communicator_type const communicator_type::cu {OMPI_COMM_TYPE_CU }; inline communicator_type const& /*synomym*/ cpu = communicator_type::cu; +inline communicator_type const communicator_type::cluster {OMPI_COMM_TYPE_CLUSTER }; + +// enum constant { +// undefined = MPI_UNDEFINED , +// process_null = MPI_PROC_NULL , +// any_source = MPI_ANY_SOURCE +// }; + +class constant { + int value_; + + public: + constexpr explicit constant(int v) noexcept : value_{v} {} + constexpr operator int() const noexcept { return value_; } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + + constexpr bool operator==(constant const& o) const noexcept { return value_ == o.value_; } + constexpr bool operator!=(constant const& o) const noexcept { return value_ != o.value_; } + + // static constant const undefined ; + // static constant const process_null; + // static constant const any_source ; }; -enum key { // for attributes - tag_ub = MPI_TAG_UB, - host = MPI_HOST, - io = MPI_IO, - wtime_is_global = MPI_WTIME_IS_GLOBAL, - application_number = MPI_APPNUM, - universe_size = MPI_UNIVERSE_SIZE, - last_used_code = MPI_LASTUSEDCODE +inline constant const undefined {MPI_UNDEFINED }; +inline constant const process_null{MPI_PROC_NULL }; +inline constant const any_source {MPI_ANY_SOURCE}; + +// enum key { // for attributes +// tag_ub = MPI_TAG_UB, +// host = MPI_HOST, +// io = MPI_IO, +// wtime_is_global = MPI_WTIME_IS_GLOBAL, +// application_number = MPI_APPNUM, +// universe_size = MPI_UNIVERSE_SIZE, +// last_used_code = MPI_LASTUSEDCODE +// }; + +class key { // for attributes + int value_; + + public: + explicit key(int v) noexcept : value_{v} {} + + constexpr bool operator==(key const& o) const noexcept { return value_ == o.value_; } + constexpr bool operator!=(key const& o) const noexcept { return value_ != o.value_; } }; +inline key const tag_ub {MPI_TAG_UB}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const host {MPI_HOST}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const io {MPI_IO}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const wtime_is_global {MPI_WTIME_IS_GLOBAL}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const application_number{MPI_APPNUM}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const universe_size {MPI_UNIVERSE_SIZE}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI +// inline key const last_used_code {MPI_LASTUSEDCODE}; // NOLINT(fuchsia-statically-constructed-objects) MPI_TAG_UB, etc are not constants in ExaMPI + template struct overload_priority : overload_priority{ -// using overload_priority::overload_priority; +// using overload_priority::overload_priority; }; template<> struct overload_priority<0>{}; @@ -217,14 +291,19 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com [[deprecated]] auto operator=(communicator& other) -> communicator& { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) duplicate assigment communicator tmp{other}; operator=(std::move(tmp)); - // swap(tmp); + // swap(tmp); return *this; } + + // [[deprecated("communicator will stop supporting move assignment")]] auto operator=(communicator && other) noexcept -> communicator& { // TODO(correaa) tidy this operator if(impl_ != MPI_COMM_NULL) { try { + #if not defined(EXAMPI) MPI_(Comm_disconnect)(&impl_); //this will wait for communications to finish communications, if it gets to this point is probably an error anyway <-- not true, it is necessary to synchronize the flow - // MPI_Comm_free(&impl_); + #else + MPI_Comm_free(&impl_); + #endif } catch(std::exception& e) { std::cerr<< e.what() < iterator_t& {++rank_; return *this;} -//// auto operator--() -> iterator_t& {--rank_; return *this;} -//// auto operator*() const -> reference; - -//// private: -//// communicator* commP_ = nullptr; -//// int rank_ = MPI_PROC_NULL; - -//// friend class communicator; -//// iterator_t(communicator* self, int rank) : commP_{self}, rank_{rank} {} -// }; +// struct iterator_t { +//// iterator_t() = default; +//// explicit iterator_t(std::nullptr_t n) : commP_{n} {} +//// auto operator++() -> iterator_t& {++rank_; return *this;} +//// auto operator--() -> iterator_t& {--rank_; return *this;} +//// auto operator*() const -> reference; + +//// private: +//// communicator* commP_ = nullptr; +//// int rank_ = MPI_PROC_NULL; + +//// friend class communicator; +//// iterator_t(communicator* self, int rank) : commP_{self}, rank_{rank} {} +// }; // using iterator = iterator_t; -// auto begin() -> iterator {return {this, 0 };} -// auto end () -> iterator {return {this, size()};} +// auto begin() -> iterator {return {this, 0 };} +// auto end () -> iterator {return {this, size()};} auto& handle() {return impl_;} auto get_mutable() {return impl_;} @@ -269,12 +348,15 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator* ptr_; public: - explicit ptr(communicator* ptr) : ptr_{ptr} {} + explicit ptr(communicator* ptr) : ptr_{ptr} {} // cppcheck-suppress constParameterPointer ; TODO(correaa) operator MPI_Comm() const {return ptr_->get_mutable();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) explicit operator communicator *() const {return ptr_;} - // explicit operator communicator const*() const{return ptr_;} + // explicit operator communicator const*() const{return ptr_;} friend bool operator==(ptr const& a, ptr const& b) {return a.ptr_ == b.ptr_;} friend bool operator!=(ptr const& a, ptr const& b) {return a.ptr_ != b.ptr_;} + + friend bool operator==(ptr const& a, boost::mpi3::communicator const* b) {return a.ptr_ == b;} + friend bool operator!=(ptr const& a, boost::mpi3::communicator const* b) {return a.ptr_ != b;} }; ptr operator&() & {return ptr{this};} // NOLINT(google-runtime-operator) @@ -284,8 +366,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ~communicator() { if(impl_ != MPI_COMM_WORLD and impl_ != MPI_COMM_NULL and impl_ != MPI_COMM_SELF) { try { + #if not defined(EXAMPI) MPI_(Comm_disconnect)(&impl_); //this will wait for communications to finish communications, if it gets to this point is probably an error anyway <-- not true, it is necessary to synchronize the flow - // MPI_Comm_free(&impl_); + #else + MPI_Comm_free(&impl_); + #endif } catch(std::exception& e) { std::cerr<< e.what() < class keyval { static int delete_fn(MPI_Comm /*comm*/, int /*keyval*/, void *attr_val, void */*extra_state*/){ delete static_cast(attr_val); // NOLINT(cppcoreguidelines-owning-memory) - // attr_val = nullptr; + // attr_val = nullptr; return MPI_SUCCESS; } static int copy_fn( MPI_Comm /*oldcomm*/, int /*keyval*/, - void * /*extra_state*/, void *attribute_val_in, + void * /*extra_state*/, void* attribute_val_in, // cppcheck-suppress [constParameterCallback,constParameterPointer] ; C-function callback // void *attribute_val_out, int *flag ) { *static_cast(attribute_val_out) = static_cast(new T{*(static_cast(attribute_val_in))}); @@ -348,14 +434,19 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com keyval& operator=(keyval const&) = delete; keyval& operator=(keyval &&) = delete; - ~keyval() noexcept {MPI_Comm_free_keyval(&impl_);} + ~keyval() noexcept { + MPI_Comm_free_keyval(&impl_); + } }; +#endif using detail::basic_communicator::send_receive_n; +#if not defined(EXAMPI) using detail::basic_communicator::matched_probe; +#endif template - auto send_n( + auto send_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, detail::contiguous_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -383,7 +474,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); } template - void send_n( + void send_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, detail::forward_iterator_tag /*tag*/, detail::value_unspecified_tag /*tag*/, @@ -394,7 +485,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com package_oarchive poa(p); std::copy_n(first, count, package_oarchive::iterator::value_type>(poa)); // while(count--) {poa << *first++;} - send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); + send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); } template auto isend_n(It first, Size count, int dest, int tag = 0){ @@ -426,7 +517,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com template struct has_dimensionality : decltype(has_dimensionality_aux(T{})) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) template{})> > - void send_n(It first, Size count, int dest, int tag = 0) { + void send_n(It first, Size count, int dest, int tag = 0) { // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class return send_n( first, detail::iterator_category_t{}, @@ -436,7 +527,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); } template - auto send( + auto send( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, It last, detail::random_access_iterator_tag /*tag*/, detail::value_unspecified_tag /*tag*/, @@ -445,7 +536,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return send_n(first, std::distance(first, last), dest, tag); } template - auto send( + auto send( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, It last, detail::contiguous_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -454,7 +545,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return send_n(first, std::distance(first, last), dest, tag); } template - auto send( + auto send( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, It last, detail::input_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -464,7 +555,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return send_n(buffer.begin(), buffer.size(), dest, tag); } template - auto send( + auto send( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, It last, /**/ detail::input_iterator_tag /*tag*/, /**/ detail::value_unspecified_tag /*tag*/, @@ -474,7 +565,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com package_oarchive poa(p); std::copy(first, last, package_oarchive::iterator::value_type>(poa)); // while(first!=last) {poa << *first++;} - send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); + send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); } template @@ -497,7 +588,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return isend_n(first, std::distance(first, last), dest, tag); } template - auto send(It first, It last, int dest, int tag = 0) { + auto send(It first, It last, int dest, int tag = 0) { // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class return send( first, last, detail::iterator_category_t{}, @@ -528,7 +619,9 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator ret; MPI_(Comm_split)(impl_, color, key, &ret.impl_); if(ret) {ret.set_name(name() + std::to_string(color));} + #if not defined(EXAMPI) if(ret) {ret.attribute("color") = color;} + #endif return ret; } communicator split(int color = MPI_UNDEFINED) { @@ -544,6 +637,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator reversed() {return split(0, size() - rank());} +#if not defined(EXAMPI) int cartesian_map(std::vector const& dims, std::vector const& periods) const { assert(dims.size() == periods.size()); return MPI_(Cart_map)(impl_, static_cast(dims.size()), dims.data(), periods.data()); // TODO(correaa) use safe cast @@ -551,6 +645,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com int cartesian_map(std::vector const& dimensions) const { return cartesian_map(dimensions, std::vector(dimensions.size(), 0)); } +#endif pointer malloc(MPI_Aint size) const; template void deallocate_shared(pointer p); @@ -572,7 +667,19 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator subcomm(std::initializer_list l) const { return subcomm(std::vector(l)); } - enum class topology{undefined = MPI_UNDEFINED, graph = MPI_GRAPH, cartesian = MPI_CART}; + + class topology { + int value_; + + public: + constexpr explicit topology(int v) noexcept : value_{v} {} + + constexpr bool operator<(topology const& o) const noexcept {return value_ < o.value_;} + + static topology const undefined; + static topology const graph; + static topology const cartesian; + }; int rank() const { assert(not is_empty()); // an empty communicator doesn't have ranks @@ -598,21 +705,29 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com assert(rank() - n > 0); return rank() - n; } + +#if not defined(EXAMPI) communicator accept(port const& p, int root = 0) const { communicator ret; MPI_Comm_accept(p.name_.c_str(), MPI_INFO_NULL, root, impl_, &ret.impl_); return ret; } +#endif + [[deprecated("call non const version")]] void barrier() const { MPI_( Barrier)(get() ) ;} void barrier() { MPI_( Barrier)(handle()) ;} +#if not defined(EXAMPI) auto ibarrier() {request ret; MPI_(Ibarrier)(handle(), &ret.impl_); return ret;} +#endif +#if not defined(EXAMPI) communicator connect(port const& p, int root = 0) const { communicator ret; MPI_(Comm_connect)(p.name_.c_str(), MPI_INFO_NULL, root, impl_, &ret.impl_); return ret; } +#endif bool root() const {return (not empty()) and (rank() == 0);} bool is_root() const {return root();} @@ -624,6 +739,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto operator[](int rank) -> reference; protected: +#if not defined(EXAMPI) template void set_attribute(int kv_idx, T const& t) { MPI_(Comm_set_attr)(impl_, kv_idx, new T{t}); // NOLINT(readability-implicit-bool-conversion, cppcoreguidelines-owning-memory) TODO(correaa) } @@ -643,8 +759,10 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Comm_get_attr(impl_, kvidx, &v, &flag); return flag != 0; } +#endif public: +#if not defined(EXAMPI) template void set_attribute(keyval const& k, TT const& t = {}) {set_attribute(k.impl_, t);} template @@ -661,7 +779,9 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return get_attribute(kv); } mpi3::any& attribute(std::string const& s); +#endif +#if not defined(EXAMPI) void call_error_handler(int errorcode) noexcept { auto const s = MPI_Comm_call_errhandler(impl_, errorcode); (void)s; assert(s == MPI_SUCCESS); @@ -670,6 +790,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto const s = MPI_Comm_call_errhandler(impl_, static_cast(e)); (void)s; assert(s == MPI_SUCCESS); } +#endif + communicator divide_low(int n) { assert(n != 0); return split( @@ -772,8 +894,9 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com #endif #endif +#if not defined(EXAMPI) template - auto send_receive_replace_n( + auto send_receive_replace_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, Size size, int dest, int source, // = MPI_ANY_SOURCE, int sendtag = 0, int recvtag = MPI_ANY_TAG @@ -787,8 +910,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com dest, source, sendtag, recvtag ); } +#endif + +#if not defined(EXAMPI) template - It send_receive_replace_n( + It send_receive_replace_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, detail::random_access_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -801,6 +927,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + s.count::value_type>(); } +#endif + template auto send_receive_n( It1 first, Size count, int dest, @@ -834,21 +962,21 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com // private: // public: -// template -// auto isend_receive_replace_n( -// It first, Size size, -// int dest, int source, // = MPI_ANY_SOURCE, -// int sendtag = 0, int recvtag = MPI_ANY_TAG -// ) { -// using value_type = typename std::iterator_traits::value_type; -// return isend_receive_replace_n( -// first, -// detail::iterator_category_t{}, -// detail::value_category_t{}, -// size, -// dest, source, sendtag, recvtag -// ); -// } +// template +// auto isend_receive_replace_n( +// It first, Size size, +// int dest, int source, // = MPI_ANY_SOURCE, +// int sendtag = 0, int recvtag = MPI_ANY_TAG +// ) { +// using value_type = typename std::iterator_traits::value_type; +// return isend_receive_replace_n( +// first, +// detail::iterator_category_t{}, +// detail::value_category_t{}, +// size, +// dest, source, sendtag, recvtag +// ); +// } private: template @@ -891,8 +1019,9 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return d_first + ret.count(); } +#if not defined(EXAMPI) template - auto send_receive_replace_n( + auto send_receive_replace_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, /**/ detail::forward_iterator_tag /*tag*/, /**/ detail::value_unspecified_tag /*tag*/, @@ -923,9 +1052,10 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com count, first ); } +#endif template - auto send_receive_replace_n( + auto send_receive_replace_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, detail::forward_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -939,7 +1069,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com public: template - auto send_receive_n( + auto send_receive_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It first, Size size, int dest, int source, // = MPI_ANY_SOURCE, int sendtag = 0, int recvtag = MPI_ANY_TAG @@ -1098,6 +1228,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Recv(begin, n, MPI_PACKED, source, tag, impl_, &ret.impl_); return ret; } + +#if not defined(EXAMPI) auto receive_packed(void* begin, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { MPI_Status status; MPI_Message msg; // NOLINT(cppcoreguidelines-init-variables) delayed init @@ -1105,12 +1237,14 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Mprobe(source, tag, impl_, &msg, &status); MPI_Get_count(&status, MPI_PACKED, &count); MPI_Mrecv(begin, count, MPI_PACKED, &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) - // auto n = probe(source, tag).count(); - // receive_packed_n(begin, n, source, tag); + // auto n = probe(source, tag).count(); + // receive_packed_n(begin, n, source, tag); return static_cast(std::next(static_cast(begin), count)); } +#endif + template - auto receive_n( + auto receive_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It dest, detail::contiguous_iterator_tag /*tag*/, detail::basic_tag /*tag*/, @@ -1142,8 +1276,10 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return r; } // NOLINT(clang-analyzer-optin.mpi.MPI-Checker) // MPI_Wait called on destructor of ret + +#if not defined(EXAMPI) template - auto receive_n( + auto receive_n( // cppcheck-suppress duplInheritedMember ; TODO(correaa) remove duplications in the base class It dest, detail::forward_iterator_tag /*tag*/, detail::value_unspecified_tag /*tag*/, @@ -1155,6 +1291,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com package_iarchive pia(p); return std::copy_n(package_iarchive::iterator::value_type>{pia}, count, dest); } +#endif template{}, int> =0// or (not detail::is_basic::value_type>{}), int> =0 // needed by intel commpiler @@ -1180,6 +1317,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com source, tag ); } + +#if not defined(EXAMPI) template auto receive( It dest, @@ -1192,6 +1331,9 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com m.receive_n(dest, count); return dest + count; } +#endif + +#if not defined(EXAMPI) template [[deprecated]] auto receive( It dest, @@ -1205,6 +1347,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com while(p) {pia >> *dest++;} // NOLINT(altera-unroll-loops) deprecating return dest; } + template auto receive( It dest, @@ -1214,6 +1357,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ) { return matched_probe(source, tag).receive_n(dest); } +#endif + template [[deprecated]] auto receive(It dest, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { return receive( @@ -1240,7 +1385,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com int source, int tag ) { return receive_n(std::addressof(*d_first), std::distance(d_first, d_last), source, tag); - // return std::copy(buffer.begin(), buffer.end(), d_first); + // return std::copy(buffer.begin(), buffer.end(), d_first); } template @@ -1254,89 +1399,89 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com receive_n(buffer.begin(), buffer.size(), source, tag); return std::copy(buffer.begin(), buffer.end(), d_first); } -// class ir_req{ -// boost::mpi3::status query(){ -// boost::mpi3::status ret; -// ret.set_source(MPI_UNDEFINED); -// ret.set_tag(MPI_UNDEFINED); -// ret.set_cancelled(); -// ret.set_elements(0); -// return ret; -// } -// static void free(){ -// std::cout << "free" << std::endl; -// } -// static void cancel(int complete) { -// std::cout << "cancel " << complete << std::endl; -// } -// }; -// template -// struct receive_args { -// communicator* commP; -// It d_first; -// // It d_last; -// int source; -// int tag; -// MPI_Request* requestP; -// }; -// struct receive_state{ -// int cancelled = 0; -// int source = MPI_UNDEFINED; -// int tag = MPI_UNDEFINED; -// }; -// template -// inline static void* receive_thread(void* ptr) { -// receive_args* args = (receive_args*)ptr; -// args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); -// MPI_Grequest_complete(*args->requestP); -// ::free(ptr); -// return nullptr; -// } -// inline static int query_fn(void* extra_state, MPI_Status *status){ -// auto* rs = static_cast(extra_state); -// /* always send just one int */ -// MPI_Status_set_elements(status, MPI_INT, 1); -// /* can never cancel so always true */ -// MPI_Status_set_cancelled(status, rs->cancelled); -// /* choose not to return a value for this */ -// status->MPI_SOURCE = rs->source; -// /* tag has not meaning for this generalized request */ -// status->MPI_TAG = rs->tag; -// /* this generalized request never fails */ -// return MPI_SUCCESS; -// } -// inline static int free_fn(void* extra_state) { -// /* this generalized request does not need to do any freeing */ -// /* as a result it never fails here */ -// ::free(extra_state); -// return MPI_SUCCESS; -// } -// inline static int cancel_fn(void* /*extra_state*/, int complete) { -// /* This generalized request does not support cancelling. -// Abort if not already done. If done then treat as if cancel failed. */ -// if(not (complete == 0)) { -// std::cerr<< "Cannot cancel generalized request - aborting program" < -// auto ireceive(It d_first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { -// // based on http://liinwww.ira.uka.de/courses/spprakt/mpi2-html-doc/node157.html -// mpi3::request ret; /* receive_args* args = (receive_args*)::malloc(sizeof(receive_args)); args->commP = this; args->d_first = d_first; // args->d_last = d_last; args->source = source; args->tag = tag; args->requestP = &ret.impl_;*/ -// receive_state* rs = (receive_state*)::malloc(sizeof(receive_state)); -// rs->cancelled = 0; -// rs->source = source; -// rs->tag = tag; -// MPI_Grequest_start(query_fn, free_fn, cancel_fn, rs, &ret.impl_);//args->requestP); -// std::thread( // static_cast(receive_thread), args -// [this, d_first, source, tag, &ret](){ -// this->receive(d_first, source, tag); // receive_args* args = (receive_args*)ptr; // args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); -// MPI_Grequest_complete(ret.impl_); // MPI_Grequest_complete(*args->requestP); // ::free(ptr); -// } -// ).detach(); // t.detach(); // pthread_t thread; // pthread_create(&thread, NULL, static_cast(receive_thread), args); // pthread_detach(thread); -// return ret; -// } +// class ir_req{ +// boost::mpi3::status query(){ +// boost::mpi3::status ret; +// ret.set_source(MPI_UNDEFINED); +// ret.set_tag(MPI_UNDEFINED); +// ret.set_cancelled(); +// ret.set_elements(0); +// return ret; +// } +// static void free(){ +// std::cout << "free" << std::endl; +// } +// static void cancel(int complete) { +// std::cout << "cancel " << complete << std::endl; +// } +// }; +// template +// struct receive_args { +// communicator* commP; +// It d_first; +// // It d_last; +// int source; +// int tag; +// MPI_Request* requestP; +// }; +// struct receive_state{ +// int cancelled = 0; +// int source = MPI_UNDEFINED; +// int tag = MPI_UNDEFINED; +// }; +// template +// inline static void* receive_thread(void* ptr) { +// receive_args* args = (receive_args*)ptr; +// args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); +// MPI_Grequest_complete(*args->requestP); +// ::free(ptr); +// return nullptr; +// } +// inline static int query_fn(void* extra_state, MPI_Status *status){ +// auto* rs = static_cast(extra_state); +// /* always send just one int */ +// MPI_Status_set_elements(status, MPI_INT, 1); +// /* can never cancel so always true */ +// MPI_Status_set_cancelled(status, rs->cancelled); +// /* choose not to return a value for this */ +// status->MPI_SOURCE = rs->source; +// /* tag has not meaning for this generalized request */ +// status->MPI_TAG = rs->tag; +// /* this generalized request never fails */ +// return MPI_SUCCESS; +// } +// inline static int free_fn(void* extra_state) { +// /* this generalized request does not need to do any freeing */ +// /* as a result it never fails here */ +// ::free(extra_state); +// return MPI_SUCCESS; +// } +// inline static int cancel_fn(void* /*extra_state*/, int complete) { +// /* This generalized request does not support cancelling. +// Abort if not already done. If done then treat as if cancel failed. */ +// if(not (complete == 0)) { +// std::cerr<< "Cannot cancel generalized request - aborting program" < +// auto ireceive(It d_first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { +// // based on http://liinwww.ira.uka.de/courses/spprakt/mpi2-html-doc/node157.html +// mpi3::request ret; /* receive_args* args = (receive_args*)::malloc(sizeof(receive_args)); args->commP = this; args->d_first = d_first; // args->d_last = d_last; args->source = source; args->tag = tag; args->requestP = &ret.impl_;*/ +// receive_state* rs = (receive_state*)::malloc(sizeof(receive_state)); +// rs->cancelled = 0; +// rs->source = source; +// rs->tag = tag; +// MPI_Grequest_start(query_fn, free_fn, cancel_fn, rs, &ret.impl_);//args->requestP); +// std::thread( // static_cast(receive_thread), args +// [this, d_first, source, tag, &ret](){ +// this->receive(d_first, source, tag); // receive_args* args = (receive_args*)ptr; // args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); +// MPI_Grequest_complete(ret.impl_); // MPI_Grequest_complete(*args->requestP); // ::free(ptr); +// } +// ).detach(); // t.detach(); // pthread_t thread; // pthread_create(&thread, NULL, static_cast(receive_thread), args); // pthread_detach(thread); +// return ret; +// } template auto ireceive( It d_first, It d_last, @@ -1401,18 +1546,21 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto bsend(InputIterator It1, InputIterator It2, int dest, int tag = 0){ return send(buffered_communication_mode{}, blocking_mode{}, It1, It2, dest, tag); } + +#if not defined(EXAMPI) template::value_type> auto dynamic_receive(InputIt first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { - // auto count = probe(source, tag).count(); - // return receive(first, first + count, source, tag); + // auto count = probe(source, tag).count(); + // return receive(first, first + count, source, tag); MPI_Status status; MPI_Message msg; // NOLINT(cppcoreguidelines-init-variables) delayed init - int count = -1; - MPI_Mprobe(source, tag, impl_, &msg, &status); - MPI_Get_count(&status, datatype{}(), &count); - using detail::data; - MPI_Mrecv(data(first), count, datatype{}(), &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) for macro + int count = -1; + MPI_Mprobe(source, tag, impl_, &msg, &status); + MPI_Get_count(&status, datatype{}(), &count); + using detail::data; + MPI_Mrecv(data(first), count, datatype{}(), &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) for macro } +#endif template::iterator_category> auto breceive(Iterator It1, Iterator It2, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG){ @@ -1549,8 +1697,13 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return d_first + count; } +#if not defined(EXAMPI) using in_place_type = decltype(MPI_IN_PLACE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast,performance-no-int-to-ptr) openmpi #defines this as (void*)1, it may not be a pointer in general +#else + using in_place_type = int; +#endif +#if not defined(EXAMPI) template auto all_to_all_n( It1 first, @@ -1578,8 +1731,10 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + count*size(); } +#endif public: +#if not defined(EXAMPI) template auto all_to_all_inplace_n(It1 first, Size count) { using count_type = int; @@ -1595,6 +1750,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + count; } +#endif template auto all_to_all_n(It1 first, Size count, It2 d_first) { @@ -1688,7 +1844,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } template auto scatter_builtin_q(std::false_type, Iterator1 first, Iterator2 last, Iterator1 d_first, int root) -// { TODO implement } +// { TODO implement } ; public: @@ -1716,8 +1872,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com detail::data(first), static_cast(count), datatype::value_type>{}(), root, impl_, &r.impl_ ); - return r; - } // NOLINT(clang-analyzer-optin.mpi.MPI-Checker) // MPI_Wait called on destructor of ret + return r; // NOLINT(clang-analyzer-optin.mpi.MPI-Checker) // MPI_Wait called on destructor of ret + } // NOLINT(clang-analyzer-optin.mpi.MPI-Checker) // MPI_Wait called on destructor of ret template auto broadcast_n( It first, @@ -1829,7 +1985,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com detail::iterator_category_t{}, detail::value_category_t::value_type>{}, op, - // predefined_operation{}, + // predefined_operation{}, root ); } @@ -1930,25 +2086,28 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } public: + #if not defined(EXAMPI) template< class It1, class Size, class Op = std::plus<>, - class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})), - class = decltype(std::declval::reference>() = std::declval()(V1{}, V1{})) + class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(std::declval())), + class = std::enable_if_t()(std::declval(), std::declval()))>> > auto all_reduce_in_place_n(It1 first, Size count, Op /*op*/) { auto const in_place = MPI_IN_PLACE; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast,llvm-qualified-auto,readability-qualified-auto,performance-no-int-to-ptr) openmpi #defines this as (void*)1, it may not be a pointer in general static mpi3::operation::value_type, typename std::iterator_traits::pointer> const combine{Op{}}; // will leak? MPI_(Allreduce)(in_place, data_adl(first), static_cast(count), datatype{}(), &combine, impl_); } +#endif template< class It1, class Size, class Op = std::plus<>, - class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})) + class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(std::declval())) > auto all_reduce_n(It1 first, Size count, Op op = {}) ->decltype(all_reduce_in_place_n(first, count, op)) { return all_reduce_in_place_n(first, count, op); } +#if not defined(EXAMPI) template< class It1, class Size, class Op, class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})), @@ -1960,6 +2119,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_(Reduce)(data_adl(first), nullptr , count, datatype{}(), PredefinedOp{}, root, impl_) ; } +#endif template< class It1, class Size, class Op = std::plus<>, @@ -2243,7 +2403,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com std::vector counts(c.size()); std::transform( counts.begin(), counts.end(), begin(c), counts.begin(), - [](auto& /*unused*/, auto& b){return std::distance(begin(b), end(b));} + [](auto& /*unused*/, auto const& b){return std::distance(begin(b), end(b));} ); int n = scatter(counts); scatterv_n( @@ -2386,7 +2546,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); if(s != MPI_SUCCESS) {throw std::runtime_error("cannot gather");} advance(d_first, count*size(), root...); - // std::advance(d_first, count); + // std::advance(d_first, count); return d_first; } @@ -2942,29 +3102,37 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } public: - std::string get_name() const { - std::array comm_name{}; + auto get_name() const { + // std::array comm_name{}; + std::string comm_name(MPI_MAX_OBJECT_NAME, '\0'); int len; // NOLINT(cppcoreguidelines-init-variables) : delayed initialization MPI_(Comm_get_name)(impl_, comm_name.data(), &len); - return {comm_name.data(), static_cast(len)}; + comm_name.resize(static_cast(len)); + return comm_name; } void set_name(std::string const& s) {MPI_(Comm_set_name)(impl_, s.c_str());} std::string name() const {return get_name();} [[deprecated]] void name(std::string const& s) {set_name(s);} +#if not defined(EXAMPI) static mpi3::communicator& parent() { static_assert(sizeof(MPI_Comm) == sizeof(mpi3::communicator), "!"); static_assert(std::is_same{}, "!"); MPI_Comm* p{}; MPI_Comm_get_parent(p); assert(p); return reinterpret_cast(*p); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) avoid reinterpret_cast } +#endif + +#if not defined(EXAMPI) static communicator spawn(std::string const& argv0, int np) { communicator intercomm; MPI_Comm_spawn(argv0.data(), MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_SELF, &intercomm.impl_, MPI_ERRCODES_IGNORE ); return intercomm; } +#endif +#if not defined(EXAMPI) communicator intercommunicator_create(int local_leader, communicator const& peer, int remote_leader, int tag = 0) const{ communicator ret; int const s = MPI_Intercomm_create(impl_, local_leader, peer.impl_, remote_leader, tag, &ret.impl_); @@ -2975,10 +3143,14 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator create(int local_leader, communicator const& peer, int remote_leader, int tag = 0) const{ return intercommunicator_create(local_leader, peer, remote_leader, tag); } +#endif communicator create(group const& g) const; communicator create_group(group const& g, int tag) const; + +#if not defined(EXAMPI) FILE* fopen(char const* filename, int amode = unsigned{MPI_MODE_RDWR} | unsigned{MPI_MODE_CREATE}); +#endif inline static auto name(communicator::topology const& t) -> std::string const& { static std::map const names = { @@ -2990,8 +3162,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com //template //friend auto operator,(communicator& comm, T const& t){ -// std::vector ret(comm.size()); -// comm.all_gather_n(std::addressof(t), 1, first, root); +// std::vector ret(comm.size()); +// comm.all_gather_n(std::addressof(t), 1, first, root); //} template @@ -3022,8 +3194,14 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } }; +inline communicator::topology const communicator::topology::undefined{MPI_UNDEFINED}; +inline communicator::topology const communicator::topology::graph {MPI_GRAPH }; +inline communicator::topology const communicator::topology::cartesian{MPI_CART }; + inline void barrier(communicator& self) { self. barrier();} +#if not defined(EXAMPI) inline auto ibarrier(communicator& self) {return self.ibarrier();} +#endif inline communicator::communicator(group const& g, int tag){ MPI_(Comm_create_group)(MPI_COMM_WORLD, &const_cast(g), tag, &impl_); // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) consider using non-const argument to begin with @@ -3059,16 +3237,16 @@ inline communicator communicator::create_group(class group const& g, int tag = 0 template inline void communicator::deallocate_shared(pointer /*unused*/){ -// MPI_Free_mem(p.base_ptr(rank())); +// MPI_Free_mem(p.base_ptr(rank())); } template inline void communicator::deallocate(pointer& /*p*/, MPI_Aint /*size*/) { // TODO(correaa) should be called free? -// p.pimpl_->fence(); -// MPI_Free_mem(p.local_ptr()); -// MPI_Win_free(&p.pimpl_->impl_); -// delete p.pimpl_; -// p.pimpl_ == nullptr; +// p.pimpl_->fence(); +// MPI_Free_mem(p.local_ptr()); +// MPI_Win_free(&p.pimpl_->impl_); +// delete p.pimpl_; +// p.pimpl_ == nullptr; } #if 0 @@ -3134,100 +3312,99 @@ inline mpi3::communicator& grip_communicator(MPI_Comm const& handle) { //namespace mpi3 = boost::mpi3; //class V{ -// mpi3::communicator comm_; -// public: -// V(mpi3::communicator const& c) : comm_(c){} -// V(mpi3::communicator&& c) : comm_(std::move(c)){} +// mpi3::communicator comm_; +// public: +// V(mpi3::communicator const& c) : comm_(c){} +// V(mpi3::communicator&& c) : comm_(std::move(c)){} //}; //int mpi3::main(int, char*[], mpi3::communicator world){ -// std::cout << mpi3::undefined << std::endl; +// std::cout << mpi3::undefined << std::endl; -// static_assert(std::is_nothrow_constructible::value, "MyType should be noexcept MoveConstructible"); +// static_assert(std::is_nothrow_constructible::value, "MyType should be noexcept MoveConstructible"); -//// auto worldcopy1 = world; -//// auto worldcopy2 = std::move(worldcopy1); -//// V v(worldcopy); -//// V v2(std::move(v)); +//// auto worldcopy1 = world; +//// auto worldcopy2 = std::move(worldcopy1); +//// V v(worldcopy); +//// V v2(std::move(v)); -// if(world.rank() == 0) cout << "MPI version " << mpi3::version() << '\n'; -//// if(world.rank() == 0) cout << "Topology: " << name(world.topo()) << '\n'; +// if(world.rank() == 0) cout << "MPI version " << mpi3::version() << '\n'; +//// if(world.rank() == 0) cout << "Topology: " << name(world.topo()) << '\n'; -// cout << "MPI_ERR_COMM = " << MPI_ERR_COMM << '\n'; +// cout << "MPI_ERR_COMM = " << MPI_ERR_COMM << '\n'; -// mpi3::communicator comm; -// assert(!comm); -//// cout << comm.rank() << '\n'; +// mpi3::communicator comm; +// assert(!comm); +//// cout << comm.rank() << '\n'; -// mpi3::communicator comm2 = world; -// assert(comm2); -// assert(comm2.size() == world.size()); -// assert(comm2 == world); -// assert(&comm2 != &world); +// mpi3::communicator comm2 = world; +// assert(comm2); +// assert(comm2.size() == world.size()); +// assert(comm2 == world); +// assert(&comm2 != &world); -// mpi3::communicator comm3 = world;//.duplicate(); -// assert(comm3); -// assert(comm3 == world); -// assert(&comm3 != &world); -// comm = comm2; -// assert(&comm != &comm2); +// mpi3::communicator comm3 = world;//.duplicate(); +// assert(comm3); +// assert(comm3 == world); +// assert(&comm3 != &world); +// comm = comm2; +// assert(&comm != &comm2); -//// world2 = world; +//// world2 = world; -// return 0; +// return 0; //#if 0 -//// boost::mpi3::communicator newcomm = world; -// { -// int color = world.rank()/3; -// communicator row_comm; -// row_comm = world.split(color); -// world.barrier(); -// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; -// world.barrier(); -// } -// { -// communicator row_comm = world/3; -// world.barrier(); -// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; -// world.barrier(); -// } - -// world.barrier(); -// if(world.rank() == 0) cout << "prime communicator" << '\n'; -// world.barrier(); - -// { -// // group world_group(world); -// // const int ranks[4] = {2, 3, 5, 7}; -// // group prime = world_group.include(ranks, ranks + 4); -// // communicator prime_comm(world, prime); -// auto prime_comm = world.subcomm({2,3,5,7}); -// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; +//// boost::mpi3::communicator newcomm = world; +// { +// int color = world.rank()/3; +// communicator row_comm; +// row_comm = world.split(color); +// world.barrier(); +// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; +// world.barrier(); +// } +// { +// communicator row_comm = world/3; +// world.barrier(); +// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; +// world.barrier(); +// } + +// world.barrier(); +// if(world.rank() == 0) cout << "prime communicator" << '\n'; +// world.barrier(); + +// { +// // group world_group(world); +// // const int ranks[4] = {2, 3, 5, 7}; +// // group prime = world_group.include(ranks, ranks + 4); +// // communicator prime_comm(world, prime); +// auto prime_comm = world.subcomm({2,3,5,7}); +// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; //#if 0 -// if(communicator::null != prime_comm){ -// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; -// }else{ -// cout << world.rank() << " not in prime comm\n"; -// } +// if(communicator::null != prime_comm){ +// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; +// }else{ +// cout << world.rank() << " not in prime comm\n"; +// } //#endif -// } - -// world.barrier(); -// if(world.rank() == 0) cout << "prime communicator" << '\n'; -// world.barrier(); - -// if(0){ -// auto prime = world.subcomm({2,3,5,7}); -// if(prime.is_empty()){ -// // if (communicator::null != prime){ -// cout << world.rank() << " -> " << prime.rank() << "/" << prime.size() << '\n'; -// }else{ -// cout << world.rank() << " not in prime comm\n"; -// } -// } +// } + +// world.barrier(); +// if(world.rank() == 0) cout << "prime communicator" << '\n'; +// world.barrier(); + +// if(0){ +// auto prime = world.subcomm({2,3,5,7}); +// if(prime.is_empty()){ +// // if (communicator::null != prime){ +// cout << world.rank() << " -> " << prime.rank() << "/" << prime.size() << '\n'; +// }else{ +// cout << world.rank() << " not in prime comm\n"; +// } +// } //#endif //} //#endif #endif - diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp index cc42b027f7..ec3569082a 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp @@ -1,8 +1,7 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa -#ifndef MPI3_DETAIL_BASIC_COMMUNICATOR_HPP -#define MPI3_DETAIL_BASIC_COMMUNICATOR_HPP +#ifndef BMPI3_DETAIL_BASIC_COMMUNICATOR_HPP +#define BMPI3_DETAIL_BASIC_COMMUNICATOR_HPP #include "../../mpi3/vector.hpp" @@ -154,8 +153,8 @@ class basic_communicator{ ) { std::for_each(first, last, [&b, &pos, this](auto& e) {pos = unpack_n(b, pos, std::addressof(e), 1);}); // while(first != last){ - // pos = unpack_n(b, pos, std::addressof(*first), 1); - // ++first; + // pos = unpack_n(b, pos, std::addressof(*first), 1); + // ++first; // } return pos; } @@ -186,7 +185,7 @@ class basic_communicator{ } template auto unpack_n(detail::buffer& b, It first, Size count) { - // assert(0); + // assert(0); b.pos = unpack_n(b, b.pos, first, count); return b.pos; } @@ -246,11 +245,7 @@ class basic_communicator{ auto send(uvector const& p, int dest, int tag = 0) { return send_n(p.data(), p.size(), dest, tag); } - match matched_probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { - match m; - MPI_(Mprobe)(source, tag, impl_, &m.message::impl_, &m.status::impl_); - return m; - } + template auto receive_n( It dest, @@ -263,6 +258,14 @@ class basic_communicator{ receive_n(buffer.data(), buffer.size(), source, tag); return std::copy_n(buffer.begin(), n, dest); } + + #if not defined(EXAMPI) + match matched_probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { + match m; + MPI_(Mprobe)(source, tag, impl_, &m.message::impl_, &m.status::impl_); + return m; + } + auto receive(uvector& b, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { match m = matched_probe(source, tag); auto const count = static_cast(m.count()); @@ -270,9 +273,13 @@ class basic_communicator{ b.resize(b.size() + count); return m.receive_n(std::next(b.data(), size), count); } + #endif + + #if not defined(EXAMPI) auto receive(detail::buffer& b, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { return receive(static_cast&>(b), source, tag); } + #endif template auto send_receive_replace_n( It first, diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp index 7edb563370..b3e81844cb 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp @@ -26,11 +26,12 @@ int call() { template std::string call() { int len = -1; - std::array name{}; + std::string name(MPI_MAX_PROCESSOR_NAME, '\0'); // std::array name{}; auto const e = static_cast((*F)(name.data(), &len)); assert(len >= 0); + name.resize(static_cast(len)); if(e != mpi3::error::success) {throw std::system_error{e, "cannot call function " + std::string{__PRETTY_FUNCTION__}};} - return {name.data(), static_cast(len)}; + return name; } template((*F)(std::declval()...)))* = nullptr> diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp index adf95dd702..933b459efc 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp @@ -1,5 +1,4 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2017-2023 Alfredo A. Correa +// Copyright 2017-2024 Alfredo A. Correa #ifndef BOOST_MPI3_DETAIL_DATATYPE_HPP #define BOOST_MPI3_DETAIL_DATATYPE_HPP @@ -7,7 +6,7 @@ // #define OMPI_SKIP_MPICXX 1 // https://github.com/open-mpi/ompi/issues/5157 #include -#if defined(__NVCC__) +#if defined(__NVCC__) || defined(__HIPCC__) #include #endif @@ -77,17 +76,30 @@ class packed { template struct basic_datatype; + +#if defined(MPI_DOUBLE_COMPLEX) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define MPI3_DECLARE_DATATYPE(TypE, MpiiD) \ template<> struct basic_datatype { \ -/* constexpr*/ operator MPI_Datatype() const { \ +/* constexpr*/ operator MPI_Datatype() const { \ assert(MPI_DOUBLE_COMPLEX != MPI_DATATYPE_NULL ); /* NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert)*/ \ assert( (MpiiD) != MPI_DATATYPE_NULL ); /* NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) in some MPI distros this is not constexpr */ /*this system doesn't support this type*/ \ return MpiiD; \ } \ auto get() const -> MPI_Datatype {return MpiiD;} \ -/* static constexpr MPI_Datatype value = MpiiD;*/ \ +/* static constexpr MPI_Datatype value = MpiiD;*/ \ } +#else +#define MPI3_DECLARE_DATATYPE(TypE, MpiiD) \ +template<> struct basic_datatype { \ +/* constexpr*/ operator MPI_Datatype() const { \ + assert( (MpiiD) != MPI_DATATYPE_NULL ); /* NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) in some MPI distros this is not constexpr */ /*this system doesn't support this type*/ \ + return MpiiD; \ + } \ + auto get() const -> MPI_Datatype {return MpiiD;} \ +/* static constexpr MPI_Datatype value = MpiiD;*/ \ +} +#endif // basic data types http://beige.ucs.indiana.edu/I590/node100.html MPI3_DECLARE_DATATYPE(char , MPI_CHAR); @@ -111,30 +123,55 @@ MPI3_DECLARE_DATATYPE(long long int , MPI_LONG_LONG_INT); MPI3_DECLARE_DATATYPE(bool , MPI_C_BOOL); // C++ binding not used MPI_CXX_BOOL); -// MPI_INT8_T int8_t -// MPI_INT16_T int16_t -// MPI_INT32_T int32_t -// MPI_INT64_T int64_t -// MPI_UINT8_T uint8_t -// MPI_UINT16_T uint16_t -// MPI_UINT32_T uint32_t -// MPI_UINT64_T uint64_t +// MPI_INT8_T int8_t +// MPI_INT16_T int16_t +// MPI_INT32_T int32_t +// MPI_INT64_T int64_t +// MPI_UINT8_T uint8_t +// MPI_UINT16_T uint16_t +// MPI_UINT32_T uint32_t +// MPI_UINT64_T uint64_t +#if defined(MPI_C_FLOAT_COMPLEX) MPI3_DECLARE_DATATYPE(cxx_float_complex , MPI_C_FLOAT_COMPLEX); +#else +MPI3_DECLARE_DATATYPE(cxx_float_complex , MPI_CXX_FLOAT_COMPLEX); +#endif + +#if defined(MPI_C_DOUBLE_COMPLEX) MPI3_DECLARE_DATATYPE(cxx_double_complex , MPI_C_DOUBLE_COMPLEX); +#else +MPI3_DECLARE_DATATYPE(cxx_double_complex , MPI_CXX_DOUBLE_COMPLEX); +#endif + +#if defined(MPI_C_LONG_DOUBLE_COMPLEX) MPI3_DECLARE_DATATYPE(cxx_long_double_complex, MPI_C_LONG_DOUBLE_COMPLEX); +#else +MPI3_DECLARE_DATATYPE(cxx_long_double_complex, MPI_CXX_LONG_DOUBLE_COMPLEX); +#endif // MPI3_DECLARE_DATATYPE(cxx_2double_complex , MPI_2DOUBLE_COMPLEX); // not available in mpich // TODO(correaa) these types below probably don't behave correctly for reductions with multiplication +#if defined(MPI_COMPLEX) MPI3_DECLARE_DATATYPE(float_float , MPI_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#else +MPI3_DECLARE_DATATYPE(float_float , MPI_CXX_FLOAT_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#endif + +#if defined(MPI_DOUBLE_COMPLEX) MPI3_DECLARE_DATATYPE(double_double , MPI_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); -MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_DOUBLE_COMPLEX); +MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_DOUBLE_COMPLEX); // TODO(correaa) is this correct? reduce (specially multiplication) will not give correct result MPI3_DECLARE_DATATYPE(long_double_long_double, MPI_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#else +MPI3_DECLARE_DATATYPE(double_double , MPI_CXX_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_CXX_DOUBLE_COMPLEX); // TODO(correaa) is this correct? reduce (specially multiplication) will not give correct result +MPI3_DECLARE_DATATYPE(long_double_long_double, MPI_CXX_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#endif -#if defined(__NVCC__) -MPI3_DECLARE_DATATYPE(thrust::complex, MPI_DOUBLE_COMPLEX); +#if defined(__NVCC__) || defined(__HIPCC__) +MPI3_DECLARE_DATATYPE(::thrust::complex, MPI_DOUBLE_COMPLEX); #endif MPI3_DECLARE_DATATYPE(float_int , MPI_FLOAT_INT); @@ -189,6 +226,14 @@ auto datatype_detect(...) -> default_datatype; template auto datatype_detect(U const&) -> default_datatype>; +// support enums +template< + class T, class U, + class = std::enable_if_t >, // this is necessary for libstdc++ gcc 7, otherwise underlying_type gives a hard error below + class UL = std::underlying_type_t +> +auto datatype_detect(U const&) -> default_datatype

()))* =nullptr> + constexpr explicit transform_ptr(Other const& other) : p_{other.p_}, f_{other.f_} {} constexpr auto functor() const -> UF {return f_;} constexpr auto base() const -> Ptr const& {return p_;} constexpr auto operator*() const -> reference { // NOLINT(readability-const-return-type) in case synthesis reference is a `T const` + // invoke allows for example to use .transformed( &member) instead of .transformed( std::mem_fn(&member) ) return std::invoke(f_, *p_); // NOLINT(readability-const-return-type) in case synthesis reference is a `T const` // return f_(*p_); // NOLINT(readability-const-return-type) in case synthesis reference is a `T const` } @@ -101,19 +117,18 @@ struct transform_ptr { constexpr auto operator-(transform_ptr const& other) const -> difference_type {return p_ - other.p_;} - constexpr auto operator[](difference_type n) const -> reference {return *((*this) + n);} + constexpr auto operator[](difference_type n) const -> reference {return *((*this) + n);} // NOLINT(readability-const-return-type) transformed_view might return by const value. constexpr auto operator==(transform_ptr const& other) const -> bool {return p_ == other.p_;} constexpr auto operator!=(transform_ptr const& other) const -> bool {return p_ != other.p_;} private: Ptr p_; - UF f_; + UF f_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) technically this type can be const template friend struct transform_ptr; }; - template struct array_traits; @@ -142,14 +157,6 @@ constexpr auto rank_aux(T const&) -> std::integral_constant struct rank : decltype(rank_aux(std::declval())) {}; -#if not defined(__cpp_lib_nonmember_container_access) or __cpp_lib_nonmember_container_access < 201411 -template -constexpr auto size(Container const& con) --> std::make_signed_t { - return static_cast>(con.size());} -#else -#endif - template{}, int> = 0> // special sfinae trick constexpr auto stride(Pointer /*ptr*/) -> std::ptrdiff_t {return 1;} @@ -168,7 +175,7 @@ template inline auto has_get_allocator_aux(... ) -> std::false_type; template -constexpr auto get_allocator(T(&/*array*/)[N]) noexcept -> std::allocator::type>> {return {};} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility +constexpr auto get_allocator(T(&/*array*/)[N]) noexcept -> std::allocator>> {return {};} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility template constexpr auto get_allocator(T* const& /*t*/) @@ -202,7 +209,7 @@ auto common(T1 const& val1, T2 const& val2) -> Ret { template auto has_num_elements_aux(T const& /*array*/)->decltype(std::declval().num_elements() + 1, std::true_type {}); inline auto has_num_elements_aux(... )->decltype( std::false_type{}); -template struct has_num_elements : decltype(has_num_elements_aux(std::declval())) {}; +template struct has_num_elements : decltype(has_num_elements_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template{}> > constexpr auto num_elements(A const& arr) @@ -211,32 +218,36 @@ constexpr auto num_elements(A const& arr) } template - auto has_size_aux(T const& cont) -> decltype(cont.size(), std::true_type {}); -inline auto has_size_aux(... ) -> decltype( std::false_type{}); -template struct has_size : decltype(has_size_aux(std::declval())) {}; + auto has_size_aux(T const& cont) -> decltype(std::size(cont), std::true_type {}); +inline auto has_size_aux(... ) -> decltype( std::false_type{}); +template struct has_size : decltype(has_size_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template auto has_data_elements_aux(T&& array)->decltype(array.data_elements() + 1, std::true_type {}); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) TODO(correaa) why +1? inline auto has_data_elements_aux(... )->decltype( std::false_type{}); -template struct has_data_elements : decltype(has_data_elements_aux(std::declval())) {}; +template struct has_data_elements : decltype(has_data_elements_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) + +template + auto has_base_aux(T&& array)->decltype(array.base() + 1, std::true_type {}); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) TODO(correaa) why +1? +inline auto has_base_aux(... )->decltype( std::false_type{}); +template struct has_base : decltype(has_base_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) namespace detail { template - auto has_data_aux(T&& cont) -> decltype(cont.data() + 1, std::true_type {}); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) TODO(correaa) why +1? -inline auto has_data_aux(... ) -> decltype( std::false_type{}); + auto has_data_aux(T&& cont) -> decltype(cont.data_elements() + 1, std::true_type {}); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) TODO(correaa) why +1? +inline auto has_data_aux(... ) -> decltype( std::false_type{}); } // end namespace detail -template struct has_data : decltype(detail::has_data_aux(std::declval())) {}; +template struct has_data : decltype(detail::has_data_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template>::value && !has_data_elements>::value, int> =0> -auto data_elements(Array& arr) {return arr.data();} +auto data_elements(Array& arr) {return std::data(arr);} template>::value && !has_data_elements>::value, int> =0> -auto data_elements(Array const& arr) {return arr.data();} +auto data_elements(Array const& arr) {return std::data(arr);} // .data();} template::value && has_size::value && has_data::value, int> =0> -constexpr auto num_elements(A const& arr) -> std::make_signed_t { - - return static_cast>(arr.size()); +constexpr auto num_elements(A const& arr) -> std::make_signed_t { + return static_cast>(std::size(arr)); // (arr.size()); } template{}, int> =0> @@ -244,8 +255,8 @@ constexpr auto data_elements(A const& arr) ->decltype(arr.data_elements()) { return arr.data_elements(); } -template> and not has_data_elements>::value && !has_data>::value, int> =0> -constexpr auto data_elements(T& value) {return &value;} +template> && ! has_data_elements>::value && !has_data>::value, int> =0> +constexpr auto data_elements(T& value) -> decltype(&value) {return &value;} template struct num_elements_t: std::integral_constant {}; @@ -256,7 +267,7 @@ template struct num_elements_t : num_elements_t template constexpr auto num_elements(const T(&/*array*/)[N]) noexcept {return num_elements_t{};} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility -template().data())>>> +template().data())>>, class = decltype(Vector{}.resize(1))> auto data_elements(Vector const& vec) ->decltype(vec.data()) { return vec.data(); } @@ -280,26 +291,26 @@ constexpr auto data_elements(T(&array)[N]) noexcept {return data_elements(array[ template auto has_dimensionality_aux(T const& /*array*/)->decltype(T::rank_v, std::true_type {}); inline auto has_dimensionality_aux(... )->decltype( std::false_type{}); -template struct has_dimensionality : decltype(has_dimensionality_aux(std::declval())) {}; +template struct has_dimensionality : decltype(has_dimensionality_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template{}, int> =0> constexpr auto dimensionality(Container const& /*container*/) -->std::decay_t { +->std::decay_t { return Container::rank_v;} template auto has_dimensionaliy_member_aux(T const& /*array*/) -> decltype(static_cast(static_cast(T::rank_v)), std::true_type {}); inline auto has_dimensionaliy_member_aux(... ) -> decltype( std::false_type{}); -template struct has_dimensionality_member : decltype(has_dimensionaliy_member_aux(std::declval())){}; +template struct has_dimensionality_member : decltype(has_dimensionaliy_member_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) -template{}>> +template{}>> constexpr auto dimensionality(T const&/*, void* = nullptr*/) {return 0;} template constexpr auto dimensionality(T const(&array)[N]) {return 1 + dimensionality(array[0]);} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility template -inline constexpr auto sizes(T const& /*unused*/) noexcept -> tuple<> {return {};} +constexpr auto sizes(T const& /*unused*/) noexcept -> tuple<> {return {};} template constexpr auto sizes(const T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) for backwards compatibility @@ -317,13 +328,13 @@ constexpr auto base(T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid- template constexpr auto base(T(*&array)[N]) noexcept {return base(*array);} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) for backwards compatibility -template>> +template>> constexpr auto base(T const* ptr) noexcept {return ptr;} -template>> +template>> constexpr auto base(T* ptr) noexcept {return ptr;} -template and std::is_trivial_v, int> =0> +template && std::is_trivial_v, int> =0> auto base(T& array) {return &array;} template @@ -335,27 +346,27 @@ constexpr auto corigin(const T(&array)[N]) noexcept {return corigin(array[0]);} template().extension())> auto has_extension_aux(T const&) -> std::true_type; inline auto has_extension_aux(... ) -> std::false_type; -template struct has_extension : decltype(has_extension_aux(std::declval())){}; +template struct has_extension : decltype(has_extension_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template::value>> auto extension(Container const& cont) // TODO(correaa) consider "extent" ->decltype(multi::extension_t>(0, static_cast>(size(cont)))) { return multi::extension_t>(0, static_cast>(size(cont))); } -template().shape())> - auto has_shape_aux(T const&) -> std::true_type; -inline auto has_shape_aux(... ) -> std::false_type; +// template().shape())> +// auto has_shape_aux(T const&) -> std::true_type; +// inline auto has_shape_aux(... ) -> std::false_type; -template struct has_shape : decltype(has_shape_aux(std::declval())) {}; +template struct has_shape : decltype(has_shape_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) trick template().extensions())> auto has_extensions_aux(T const&) -> std::true_type; inline auto has_extensions_aux(... ) -> std::false_type; -template struct has_extensions : decltype(has_extensions_aux(std::declval())) {}; +template struct has_extensions : decltype(has_extensions_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) trick template::value, int> =0> -[[nodiscard]] auto extensions(T const& array) -> std::decay_\ +[[nodiscard]] constexpr auto extensions(T const& array) -> std::decay_\ t { return array.extensions(); } @@ -366,12 +377,23 @@ constexpr auto extensions_aux2(BoostMultiArray const& arr, std::index_sequence::value && !has_extensions::value, int> =0> -constexpr auto extensions(BoostMultiArray const& array) { - return extensions_aux2(array, std::make_index_sequence{}); +template::value, int> =0> +[[nodiscard]] auto extensions_of(T const& array) { + if constexpr(std::is_convertible_v) { + return boost::multi::extensions_t<0>{}; + } + if constexpr(std::is_convertible_v) { + return boost::multi::extensions_t<1>{array.extension()}; + } } -template::value && !has_shape::value, int> =0> + +// template::value && !has_extensions::value, int> =0> +// constexpr auto extensions(BoostMultiArray const& array) { +// return extensions_aux2(array, std::make_index_sequence{}); +// } + +template::value /*&& !has_shape::value*/, int> =0> constexpr auto extensions(T const& /*unused*/) -> multi::layout_t<0>::extensions_type {return {};} template @@ -413,14 +435,14 @@ template().layout())> inline auto has_layout_member_aux(... ) -> std::false_type; template -struct has_layout_member : decltype(has_layout_member_aux(std::declval())){}; +struct has_layout_member : decltype(has_layout_member_aux(std::declval())) {}; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) template{}> > auto layout(T const& array) ->decltype(array.layout()) { return array.layout(); } -template{}> > +template{}> > auto layout(T const& /*unused*/) -> layout_t<0> {return {};} template @@ -459,10 +481,10 @@ template constexpr auto data_elements(std template constexpr auto data_elements(std::array const& arr) noexcept {return arr.data();} template constexpr auto data_elements(std::array, N> const& arr) noexcept {return data_elements(arr[0]);} -template constexpr auto data_elements(std::array && arr) noexcept {return arr.data();} +template constexpr auto data_elements(std::array && arr) noexcept {return std::move(arr).data();} template -constexpr auto data_elements(std::array, N>&& arr) noexcept {return data_elements(arr[0]);} +constexpr auto data_elements(std::array, N>&& arr) noexcept {return data_elements(std::move(arr)[0]);} template constexpr auto num_elements(std::array const& /*unused*/) noexcept -> std::ptrdiff_t{return N;} @@ -499,7 +521,7 @@ auto extensions(std::array, M> const& arr) { template constexpr auto stride(std::array const& /*arr*/) { - return static_cast(1); // multi::stride_type? + return static_cast(1U); // multi::stride_type? } template @@ -513,4 +535,7 @@ constexpr auto layout(std::array const& arr) { } } // end namespace boost::multi -#endif + +#undef BOOST_MULTI_HD + +#endif // BOOST_MULTI_UTILITY_HPP diff --git a/external_codes/boost_multi/multi/include/multi b/external_codes/boost_multi/multi/include/multi new file mode 120000 index 0000000000..f91c3d23ce --- /dev/null +++ b/external_codes/boost_multi/multi/include/multi @@ -0,0 +1 @@ +boost/multi \ No newline at end of file diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas.hpp deleted file mode 100644 index c812cccb16..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas.hpp +++ /dev/null @@ -1,80 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2018-2021 - -#ifndef MULTI_ADAPTORS_BLAS_HPP -#define MULTI_ADAPTORS_BLAS_HPP - -#include "../adaptors/blas/asum.hpp" -#include "../adaptors/blas/axpy.hpp" -#include "../adaptors/blas/copy.hpp" -#include "../adaptors/blas/dot.hpp" -#include "../adaptors/blas/gemm.hpp" -#include "../adaptors/blas/gemv.hpp" -//#include "../adaptors/blas/ger.hpp" -#include "../adaptors/blas/herk.hpp" -#include "../adaptors/blas/iamax.hpp" -#include "../adaptors/blas/nrm2.hpp" -#include "../adaptors/blas/scal.hpp" -#include "../adaptors/blas/swap.hpp" -#include "../adaptors/blas/syrk.hpp" -#include "../adaptors/blas/trsm.hpp" - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS" -#define BOOST_TEST_DYN_LINK -#include - -#include "../array.hpp" -#include "../utility.hpp" - -#include -#include -#include // iota -#include // transform - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_blas_herk_complex){ - using complex = std::complex; complex const I{0, 1}; - using multi::blas::herk; - { - multi::array const A = { - {1. + 3.*I, 9. + 1.*I}, - {3. - 2.*I, 7. - 8.*I}, - {4. + 1.*I, 1. - 3.*I} - }; - multi::array C({3, 3}, 9999.); - herk(1., A, C); // herk(A, C); // C†=C=AA†=(A†A)† - BOOST_REQUIRE( C[1][2] == complex(41., 2.) ); - BOOST_REQUIRE( C[2][1] == conj(C[1][2]) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){ - using complex = std::complex; - multi::array arr(1000, 0.); -// std::iota(begin(arr), end(arr), -700.); -// std::transform(cbegin(arr), cend(arr), begin(arr), [](auto&& a){return sqrt(a);}); - { - using multi::blas::asum; - BOOST_REQUIRE( asum(arr) == 0 ); - // std::cout << asum(arr) << std::endl; - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_nrm2_complex){ - multi::array arr(1000, 0.); -// std::iota(begin(arr), end(arr), -700.); -// std::transform(cbegin(arr), cend(arr), begin(arr), [](auto&& a){return sqrt(a);}); - { - using multi::blas::nrm2; - BOOST_REQUIRE( nrm2(arr) == 0. ); - } -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/CMakeLists.txt b/external_codes/boost_multi/multi/include/multi/adaptors/blas/CMakeLists.txt deleted file mode 100644 index 75847dee95..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/CMakeLists.txt +++ /dev/null @@ -1,78 +0,0 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- -cmake_minimum_required(VERSION 3.16) # for reference Ubuntu 20.04 uses 3.16, 3.18 for BLAS::BLAS - -set(CMAKE_VERBOSE_MAKEFILE ON) - -#project( -# boost-multi-adaptors-blas -# VERSION 0.1 -# LANGUAGES CXX -#) - -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -if((NOT - CMAKE_CXX_COMPILER_ID - STREQUAL - "PGI" - ) - AND (NOT - DART_COMPILER_NAME - STREQUAL - "nvcc" - ) -) - set(BLA_VENDOR Intel10_64lp) - find_package(BLAS) - if(BLAS_FOUND) # in some systems with MKL, regular BLAS headers need to be found for it to work - message("Multi/BLAS: MKL environment detected") - add_definitions(-DFORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) - SET(CMAKE_SKIP_BUILD_RPATH FALSE) - SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) - SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib64") - SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib64") - else() - message("Multi/BLAS: MKL environment not detected, looking for other BLAS") - unset(BLA_VENDOR) - find_package(BLAS) - find_path( - BLAS_INCLUDE_DIRS - cblas.h - /usr/include - /usr/local/include - $ENV{BLAS_HOME}/include - ) - if(BLAS_FOUND) - message("Some BLAS found") - endif() - endif() - - link_libraries(${BLAS_LIBRARIES}) - # link_libraries(BLAS::BLAS) // needs 3.18 - - # include_directories(BLAS::BLAS) # does not including a real path so far - find_path( - BLAS_INCLUDE_DIRS - cblas.h - /usr/include - /usr/local/include - $ENV{BLAS_HOME}/include - ) -else() - link_libraries("-lblas") # cmake cannot detect BLAS with pgi/nvc++ but it ships with its own version -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "PGI") - add_definitions(-DFORTRAN_COMPLEX_FUNCTIONS_RETURN_VOID) - add_definitions(-DRETURN_BY_STACK) # TODO(correaa) check -endif() - -include_directories(${CMAKE_BINARY_DIR}) - -if(BLAS_FOUND) - add_subdirectory(test) -else() - message(WARNING "BLAS not found, BLAS-adaptor tests will not be compiled and run. If you want this feature install BLAS, for example please run:\n sudo apt install libblas-dev") -endif() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/README.md b/external_codes/boost_multi/multi/include/multi/adaptors/blas/README.md deleted file mode 100644 index 006d8bb803..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/README.md +++ /dev/null @@ -1,71 +0,0 @@ - -# [Boost.]Multi BLAS Adaptor - -(not an official Boost library) - -_© Alfredo A. Correa, 2018-2021_ - -The BLAS Adaptor provides an interface for BLAS-like libraries. - -## Contents -[[_TOC_]] - -## Numeric Arrays, Conjugation Real and Imaginary parts - -This functions produce views (not copies) related to conjugation, real and imaginary parts. - -```cpp - using complex = std::complex; - complex const I{0, 1}; - multi::array B = { - {1. - 3.*I, 6. + 2.*I}, - {8. + 2.*I, 2. + 4.*I}, - {2. - 1.*I, 1. + 1.*I} - }; - - namespace blas = multi::blas; - multi::array conjB = blas::conj(B); - - assert( blas::conj(B)[2][1] == std::conj(B[2][1]) ); - - assert( blas::transposed(B)[1][2] == B[2][1] ); - assert( blas::transposed(B) == ~B ); - - assert( blas::hermitized(B)[2][1] == blas::conj(B)[1][2] ); - assert( blas::hermitized(B) == blas::conj(blas::transposed(B)) ); - - assert( blas::real(B)[2][1] == std::real(B[2][1]) ); - assert( blas::imag(B)[2][1] == std::imag(B[2][1]) ); - - multi::array B_real_doubled = { - { 1., -3., 6., 2.}, - { 8., 2., 2., 4.}, - { 2., -1., 1., 1.} - }; - assert( blas::real_doubled(B) == B_real_doubled ); -``` - -Usage: -```cpp - multi::array const a_real = { - { 1., 3., 1.}, - { 9., 7., 1.}, - }; - - multi::array const b = { - { 11.+1.*I, 12.+1.*I, 4.+1.*I, 8.-2.*I}, - { 7.+8.*I, 19.-2.*I, 2.+1.*I, 7.+1.*I}, - { 5.+1.*I, 3.-1.*I, 3.+8.*I, 1.+1.*I} - }; - - multi::array c({2, 4}); - - blas::real_doubled(c) = blas::gemm(1., a_real, blas::real_doubled(b)); // c = a_real*b -``` - -## Installation and Tests - -... - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/asum.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/asum.hpp deleted file mode 100644 index 1198375ff3..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/asum.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2021 Alfredo A. Correa - -// TODO(correaa) : make it work with thrust complex - -#ifndef MULTI_ADAPTORS_BLAS_ASUM_HPP -#define MULTI_ADAPTORS_BLAS_ASUM_HPP - -#include "../blas/core.hpp" - -namespace boost::multi::blas { - -template -auto asum_n(It first, Size n) -->decltype(asum(n, base(first), stride(first))) { - return asum(n, base(first), stride(first)); } - -using std::distance; - -template -auto asum(It f, It last) -->decltype(asum_n(f, distance(f, last))) {assert(stride(f) == stride(last)); - return asum_n(f, distance(f, last)); } - -using std::begin; using std::end; - -template -auto asum(X1D const& x) -->decltype(asum(begin(x), end(x))) {assert( not offset(x) ); - return asum(begin(x), end(x)); } - -} // end namespace boost::multi::blas - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi.BLAS asum" -//#define BOOST_TEST_DYN_LINK -//#include -////#include - -//#include "../../array.hpp" -////#include "../../utility.hpp" - -//#include // accumulate - -//namespace multi = boost::multi; -//using multi::blas::asum; - -//BOOST_AUTO_TEST_CASE(multi_blas_asum_double){ -// multi::array const A = { -// {1., 2., 3., 4.}, -// {-5., 6., -7., 8.}, -// {9., 10., 11., 12.} -// }; -// BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a+std::abs(b);})); -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){ - -// using complex = std::complex; complex const I{0, 1}; -// multi::array const A = { -// { 1. + 1.*I, 2., 3., 4.}, -// {-5. + 3.*I, 6., -7., 8.}, -// { 9. - 2.*I, 10., 11., 12.} -// }; -// BOOST_REQUIRE(asum(rotated(A)[0]) == 1.+1. + 5.+3. + 9.+2.); - -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_asum_double_carray){ -//// double A[3][4] = { -//// {1., 2., 3., 4.}, -//// {-5., 6., -7., 8.}, -//// {9., 10., 11., 12.} -//// }; (void)A; -//// using std::begin; using std::end; -//// BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a+abs(b);})); -//} - -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/axpy.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/axpy.hpp deleted file mode 100644 index 444c90e7e5..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/axpy.hpp +++ /dev/null @@ -1,106 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2021 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_AXPY_HPP -#define MULTI_ADAPTORS_BLAS_AXPY_HPP - -#include "../../array_ref.hpp" - -#include "../../adaptors/blas/core.hpp" - -#include "../../config/NODISCARD.hpp" - -namespace boost::multi::blas { - -using core::axpy; - -template -auto axpy_n(typename It1::value_type alpha, It1 first, Size n, OutIt d_first) -->decltype(axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { - return axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first) ), d_first + n; } - -template{}>> -auto axpy_n(Context&& ctxt, typename It1::value_type alpha, It1 first, Size n, OutIt d_first) -->decltype(std::forward(ctxt).axpy(n, &alpha, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { - return std::forward(ctxt).axpy(n, &alpha, base(first) , stride(first) , base(d_first) , stride(d_first)) , d_first + n; } - -template()[0] = 0. )> -auto axpy(typename X1D::element alpha, X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) conventional BLAS names -->decltype(/*axpy_n(alpha, x.begin(), x.size(), y.begin()),*/ axpy_n(alpha, x.begin(), size(x), y.begin()), std::forward(y)) { - assert(size(x)==size(y)); // intel doesn't like ADL in deduced/sfinaed return types // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 - return axpy_n(alpha, begin(x), size(x), begin(y)), std::forward(y); -} - -template()[0] = 0. )> -auto axpy(Context&& ctxt, typename X1D::element alpha, X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) conventional BLAS names -->decltype(/*axpy_n(std::forward(ctxt), alpha, x.begin( ), x.size( ), y.begin( )),*/ std::forward(y)) { - assert(size(x)==size(y)); // intel doesn't like ADL in deduced/sfinaed return types // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 - return axpy_n(std::forward(ctxt), alpha, begin(x), size(x), begin(y)), std::forward(y); } - -template -auto axpy(X1D const& x, Y1D&& y) -> Y1D&& { // NOLINT(readability-identifier-length) conventional BLAS names - return axpy(+1., x, std::forward(y)); -} - -template{}> > -auto axpy(Context&& ctxt, X1D const& x, Y1D&& y) -> Y1D&& { // NOLINT(readability-identifier-length) conventional BLAS names - return axpy(std::forward(ctxt), +1., x, std::forward(y)); -} - -template -class axpy_range { - Context ctxt_; - Scale alpha_; - ItX x_begin_; - size_type count_; - - public: - axpy_range(axpy_range const&) = delete; - axpy_range(axpy_range&&) noexcept = delete; - ~axpy_range() = default; - auto operator=(axpy_range const&) -> axpy_range& = delete; - auto operator=(axpy_range&&) noexcept -> axpy_range& = delete; - - axpy_range(Context ctxt, Scale alpha, ItX x_first, ItX x_last) - : ctxt_{ctxt}, alpha_{alpha}, x_begin_{x_first}, count_{x_last - x_first} {} - - template - friend auto operator+=(Other&& other, axpy_range const& self) -> Other&& { - assert(other.size() == self.count_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 - blas::axpy_n(std::forward(self.ctxt_), +self.alpha_, self.x_begin_, self.count_, other.begin()); - return std::forward(other); - } - template - friend auto operator-=(Other&& other, axpy_range const& self) -> Other&& { - assert(other.size() == self.count_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : bug in clang-tidy https://reviews.llvm.org/D31130 - blas::axpy_n(std::forward(self.ctxt_), -self.alpha_, self.x_begin_, self.count_, other.begin()); - return std::forward(other); - } - auto operator*=(Scale s) & -> axpy_range& {alpha_ *= s; return *this;} // NOLINT(readability-identifier-length) conventional BLAS naming -}; - -template{}>> -auto axpy(Context&& ctxt, Scalar a, X1D const& x) // NOLINT(readability-identifier-length) conventional BLAS naming --> axpy_range { // NOLINT(readability-identifier-length) conventional BLAS naming - return {std::forward(ctxt), a, begin(x), end(x)}; -} - -template -auto axpy(Scalar a, X1D const& x) // NOLINT(readability-identifier-length) conventional BLAS naming --> axpy_range { - static blas::context ctxt{}; - return {ctxt, a, begin(x), end(x)}; // TODO(correaa) fix temporary -} - -namespace operators { - -template auto operator+=(X1D&& x, Y1D const& other) DECLRETURN(axpy(+1., other, std::forward(x))) // NOLINT(readability-identifier-length) conventional name in BLAS -template auto operator-=(X1D&& x, Y1D const& other) DECLRETURN(axpy(-1., other, std::forward(x))) // NOLINT(readability-identifier-length) conventional name in BLAS - -template auto operator+(X1D const& x, Y1D const& y) -> std::decay_t {auto X = x.decay(); X += y; return X;} // NOLINT(readability-identifier-length) conventional name in BLAS -template auto operator-(X1D const& x, Y1D const& y) -> std::decay_t {auto X = x.decay(); X -= y; return X;} // NOLINT(readability-identifier-length) conventional name in BLAS - -} // end namespace operators - -} // end namespace boost::multi::blas -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/copy.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/copy.hpp deleted file mode 100644 index 41ec7e51c8..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/copy.hpp +++ /dev/null @@ -1,134 +0,0 @@ - // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_COPY_HPP -#define MULTI_ADAPTORS_BLAS_COPY_HPP - -#include "../blas/core.hpp" -#include "../blas/operations.hpp" - -#include "../../config/NODISCARD.hpp" - -#include - -namespace boost::multi::blas { - -using core::copy; - -template -auto copy_n(It first, Size n, OutIt d_first) -->decltype(copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { - return copy(n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n; } - -template{}>> -auto copy_n(Context&& ctxt, It first, Size n, OutIt d_first) -->decltype(copy(std::forward(ctxt), n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n) { - return copy(std::forward(ctxt), n, first.base(), first.stride(), d_first.base(), d_first.stride()), d_first + n; } - -template -auto copy(It first, It last, OutIt d_first) -->decltype(copy_n(first, last - first, d_first)) { - return copy_n(first, last - first, d_first); } - -template{}>> -auto copy(Context&& ctxt, It first, It last, OutIt d_first) -->decltype(copy_n(std::forward(ctxt), first, last - first, d_first)) { - return copy_n(std::forward(ctxt), first, last - first, d_first); } - -template -auto copy(X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) BLAS naming -->decltype(blas::copy_n(x.begin(), size(x), y.begin()), std::forward(y)) { - assert( (x.size() == y.size()) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : assert - return blas::copy_n(x.begin(), x.size(), y.begin()), std::forward(y); } - -template -auto copy(Context&& ctxt, X1D const& x, Y1D&& y) // NOLINT(readability-identifier-length) BLAS naming -->decltype(blas::copy_n(std::forward(ctxt), x.begin(), size(x), y.begin()), std::forward(y)) { - assert(x.size()==y.size()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr assert - return blas::copy_n(std::forward(ctxt), x.begin(), x.size(), y.begin()), std::forward(y); -} - -template -class copy_iterator{ - ContextPtr ctxt = {}; - It1D it_; -public: - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = void; - using reference = void; - using iterator_category = std::output_iterator_tag; - using iterator_type = It1D; - using context_type = ContextPtr; - constexpr explicit copy_iterator(It1D it) : it_{it}{} - constexpr copy_iterator(ContextPtr cp, It1D it) : ctxt{cp}, it_{it}{} - constexpr auto base() const -> iterator_type{return it_;} - template - friend constexpr auto copy_n(copy_iterator first, difference_type count, It1DOut result) -> It1DOut{ - return blas::copy_n(first.ctxt, first.base(), count, result); - } - template - friend constexpr auto copy(copy_iterator first, copy_iterator last, It1DOut d_first) -> It1DOut{ - return copy_n(first, distance(first, last), d_first); - } - template - friend constexpr auto uninitialized_copy(copy_iterator first, copy_iterator last, It1DOut d_first) -> It1DOut{ - return copy_n(first, distance(first, last), d_first); - } - friend constexpr auto distance(copy_iterator const& self, copy_iterator const& other) -> difference_type{ - assert(stride(other.it_) == stride(self.it_)); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr assert - return other.it_ - self.it_; - } - constexpr auto operator*() const -> value_type{return *it_;} -}; - -template::difference_type> -class copy_range { - ContextPtr ctxp_ = {}; - It1D begin_, end_; - - public: - using difference_type = DiffType; - using iterator = copy_iterator; - using decay_type = DecayType; - - copy_range(copy_range const&) = delete; - copy_range(copy_range&&) noexcept = default; - auto operator=(copy_range const&) -> copy_range& = delete; - auto operator=(copy_range&&) -> copy_range& = delete; - ~copy_range() = default; - - constexpr copy_range(It1D first, It1D last) : begin_{first}, end_{last} {} - constexpr copy_range(ContextPtr ctxp, It1D first, It1D last) : ctxp_{ctxp}, begin_{first}, end_{last} {} - constexpr auto size() const -> difference_type {return end_ - begin_;} - constexpr auto begin() const {return iterator{ctxp_, begin_};} - constexpr auto end() const {return iterator{ctxp_, end_ };} - constexpr auto extensions() const -> typename decay_type::extensions_type {return {multi::iextension{size()}};} - template(), std::declval()))> - operator Other() const{return Other(begin(), end());} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - friend auto operator+(copy_range const& self) {return self.operator decay_type();} -}; - -template NODISCARD() -auto copy(It const& first, It const& last) -->decltype(copy_range{first, last}){ - return copy_range{first, last};} - -template NODISCARD() -auto copy(Context&& ctxt, It const& first, It const& last) -->decltype(copy_range{ctxt, first, last}){ - return copy_range{ctxt, first, last};} - -template NODISCARD() -auto copy(A const& array) // need to specify templates (instead of deduced for intel) -->decltype(copy(array.begin(), array.end())){ - return copy(array.begin(), array.end());} - -template{}>> NODISCARD() -auto copy(Context&& ctxt, A const& a) // NOLINT(readability-identifier-length) conventional name in BLAS -->decltype(copy(std::forward(ctxt), a.begin(), a.end())){ - return copy(std::forward(ctxt), a.begin(), a.end());} - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/iamax.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/iamax.cpp deleted file mode 100644 index 52f49c2881..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/cuda/tests/iamax.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -$CXXX $CXXFLAGS $0 -o $0x `pkg-config --libs blas` -Wno-deprecated-declarations `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework&&$0x&&rm $0x; exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS iamax" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../../../adaptors/blas.hpp" -#include "../../../../adaptors/cuda.hpp" -#include "../../../../adaptors/blas/cuda.hpp" - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_cuda_iamax){ - using complex = std::complex; complex const I{0, 1}; - { - multi::array const A = {1. + 2.*I, 2., 3. + 3.*I, 4.}; - using multi::blas::iamax; - BOOST_REQUIRE( iamax(A) == 2 ); - } - { - multi::cuda::array const A = {1. + 2.*I, 2., 3. + 3.*I, 4.}; - using multi::blas::iamax; - BOOST_REQUIRE( iamax(A) == 2 ); - } - { - multi::cuda::managed::array const A = {1. + 2.*I, 2., 3. + 3.*I, 4.}; - using multi::blas::iamax; - BOOST_REQUIRE( iamax(A) == 2 ); - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/filling.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/filling.hpp deleted file mode 100644 index a5e9cf95d6..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/filling.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_FILLING_HPP -#define MULTI_ADAPTORS_BLAS_FILLING_HPP - -#include "multi/array_ref.hpp" - -#include "../blas/core.hpp" -#include "../blas/operations.hpp" - -namespace boost::multi::blas { - -enum class filling : char { - lower = 'U', - upper = 'L' -}; - -[[maybe_unused]] static constexpr filling U = filling::upper; // NOLINT(readability-identifier-length) BLAS naming -[[maybe_unused]] static constexpr filling L = filling::lower; // NOLINT(readability-identifier-length) BLAS naming - -inline auto flip(filling side) -> filling { - switch(side) { - case filling::lower: return filling::upper; - case filling::upper: return filling::lower; - } __builtin_unreachable(); -} - -inline auto operator-(filling side) -> filling {return flip(side);} -inline auto operator+(filling side) -> filling {return side;} - -template{}, int> =0> -auto detect_triangular_aux(A2D const& array, std::false_type /*false*/) -> filling { - { - for(auto i = size(array); i != 0; --i) { // NOLINT(altera-id-dependent-backward-branch) - auto const asum_up = blas::asum(begin(array[i-1])+i, end(array[i-1])); - if(std::isnan(asum_up)) {return filling::lower;} - if(asum_up !=0. ) {return filling::upper;} - - auto const asum_lo = blas::asum(begin(rotated(array)[i-1])+i, end(rotated(array)[i-1])); - if(std::isnan(asum_lo)) {return filling::upper;} - if(asum_lo != 0. ) {return filling::lower;} - } - } - return filling::lower; -} - -template -auto detect_triangular(A2D const& /*array*/) -> filling; - -template{}, int> =0> -auto detect_triangular_aux(A2D const& array) -> filling { - return flip(detect_triangular(hermitized(array))); -} - -template -auto detect_triangular(A2D const& array) -> filling { -#if defined(__cpp_if_constexpr) - if constexpr(not is_conjugated{}) { - using blas::asum; - for(auto i = size(array); i != 0; --i) { - auto const asum_up = asum(A[i-1]({i, array[i-1].size()})); - if(std::isnan(asum_up)) {return filling::lower;} - if(asum_up!=0. ) {return filling::upper;} - - auto const asum_lo = asum(rotated(array)[i-1]({i, rotated(array)[i-1].size()})); - if(std::isnan(asum_lo)) {return filling::upper;} - if(asum_lo != 0. ) {return filling::lower;} - } - return filling::lower; - } else { - return flip(detect_triangular(hermitized(array))); - } -#else - return detect_triangular_aux(A);//, is_conjugated{});//std::integral_constant()>{}); -#endif -} - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemm.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemm.hpp deleted file mode 100644 index 33c8761ba9..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemm.hpp +++ /dev/null @@ -1,304 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_GEMM_HPP -#define MULTI_ADAPTORS_BLAS_GEMM_HPP - -#include "../blas/core.hpp" -#include "../blas/gemv.hpp" -#include "../blas/numeric.hpp" -#include "../blas/operations.hpp" - -namespace boost::multi::blas { - -using core::gemm; - -template -auto xbase_aux(It const& it, std::true_type const& /*true */) -->decltype(underlying(base(it))) { - return underlying(base(it)); } - -template -auto xbase_aux(It const& it, std::false_type const& /*false*/) -->decltype(base(it)) { - return base(it); } - -template -auto xbase(It const& it) -->decltype(xbase_aux(it, std::integral_constant{}>{})) { - return xbase_aux(it, std::integral_constant{}>{}); } - -template -auto gemm_n(Context&& ctxt, typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) // NOLINT(readability-function-cognitive-complexity) : 125 -//->decltype(std::forward(ctxt).gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, xbase(b_first), b_first->size() , xbase(a_first), a_first->size(), &beta, c_first.base(), c_first->size() ), It2DC{}) -try { - assert( b_first->size() == c_first->size() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - assert( a_first.stride()==1 or a_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - assert( b_first.stride()==1 or b_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - assert( c_first.stride()==1 or c_first->stride()==1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - - if(a_count != 0){ - #define CTXT std::forward(ctxt) - if constexpr (!is_conjugated{} and !is_conjugated{}){ - if (a_first->stride()==1 and b_first->stride()==1 and c_first->stride()==1) { - if ( a_count==1 and b_first->size()==1 ) {CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );} - else if( a_count==1 ) {CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->size() );} - else {CTXT.gemm('N', 'N', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first. stride());} - }else if(a_first->stride()==1 and b_first->stride()==1 and c_first. stride()==1) { - if (a_count==1) {CTXT.gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first->size() , &beta, base(c_first), a_first->size() );} - else {CTXT.gemm('T', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first. stride(), base(b_first), b_first. stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first. stride()==1 and b_first->stride()==1 and c_first->stride()==1) { - if (a_count==1) {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), a_count );} - else {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first.stride());} - }else if(a_first. stride()==1 and b_first->stride()==1 and c_first. stride()==1) { - if (a_count==1) {CTXT.gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), a_first->size() , &beta, base(c_first), b_first->size() );} - else {CTXT.gemm('N', 'T', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first. stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first->stride()==1 and b_first.stride()==1 and c_first. stride()==1) { - if (a_count==1 and b_first->size()==1 ) {CTXT.gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->size() , base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());} - else if(a_count==1) {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first->stride());} - else if(a_first->size() == 1 and b_first->size() == 1) - {CTXT.gemm('N', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} - else {CTXT.gemm('N', 'T', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first->stride()==1 and b_first. stride()==1 and c_first->stride()==1) { - if (a_count==1) {CTXT.gemm('T', 'N', a_count, c_first->size(), a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first. stride());} - else {CTXT.gemm('T', 'N', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first. stride());} - }else if(a_first. stride()==1 and b_first.stride( )==1 and c_first. stride()==1) { - if (b_first->size()==1) {CTXT.gemm('N', 'N', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first->stride(), &beta, base(c_first), a_count );} - else {CTXT.gemm('N', 'N', a_count, b_first->size(), a_first->size(), &alpha, base(a_first), a_first->stride(), base(b_first), b_first->stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first. stride()==1 and b_first.stride( )==1 and c_first->stride()==1) { - {CTXT.gemm('T', 'T', b_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());} - } else {assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - - }else if constexpr(!is_conjugated{} and is_conjugated{}){ - if (a_first->stride()==1 and b_first->stride()==1 and c_first->stride()==1){ - // TODO(correaa) : check why these two branches are identical - /* if(b_first->size()==1)*/{CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());} - /* else {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());}*/ - }else if(a_first->stride()==1 and b_first. stride()==1 and c_first->stride()==1){ - if (a_count==1) {CTXT.gemm('C', 'N', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->size() , &beta, base(c_first), c_first.stride());} - else {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first.stride(), &beta, base(c_first), c_first.stride());} - }else if(a_first->stride()==1 and b_first. stride()==1 and c_first. stride()==1){ - {CTXT.gemm('C', 'N', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first. stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first. stride()==1 and b_first. stride()==1 and c_first. stride()==1){ - {CTXT.gemm('C', 'T', c_first->size(), a_count, a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first->stride());} - }else if(a_first. stride()==1 and b_first. stride()==1 and c_first->stride()==1){ - {CTXT.gemm('C', 'T', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), base(a_first), a_first->stride(), &beta, base(c_first), c_first. stride());} - }else{assert(0);} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - }else if constexpr( is_conjugated{} and !is_conjugated{}){ - if (a_first. stride()==1 and b_first->stride()==1 and c_first->stride()==1){ - if (a_count==1) {CTXT.gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), a_first->size() );} - else {CTXT.gemm('N', 'C', c_first->size(), a_count, a_first->size(), &alpha, base(b_first), b_first. stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first.stride());} - }else {assert(0);} - }else if constexpr( is_conjugated{} and is_conjugated{}){ - if (a_first. stride()==1 and b_first. stride()==1 and c_first->stride()==1){ - {CTXT.gemm('C', 'C', a_count, c_first->size(), a_first->size(), &alpha, underlying(base(b_first)), b_first->stride(), underlying(base(a_first)), a_first->stride(), &beta, base(c_first), c_first. stride());} - }else {assert(0);} - } - #undef CTXT - } - return c_first + a_count; -} catch(std::logic_error& e) { - using std::to_string; - throw std::logic_error{ - "couldn't do "+std::string(__PRETTY_FUNCTION__)+" of layout a_count="+std::to_string(a_count) // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - +" a_strides="+ to_string(a_first.stride()) +","+ to_string(a_first->stride()) +" a->size="+ to_string(a_first->size()) - +" b_strides="+ to_string(b_first.stride()) +","+ to_string(b_first->stride()) +" b->size="+ to_string(b_first->size()) - +" c_strides="+ to_string(c_first.stride()) +","+ to_string(c_first->stride()) +" c->size="+ to_string(c_first->size()) - +" because " + e.what() - }; -} - -template // TODO(correaa) automatic deduction of context -auto gemm_n(typename It2DA::element alpha, It2DA a_first, Size a_count, It2DB b_first, typename It2DA::element beta, It2DC c_first) -->decltype(gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first)) { - return gemm_n(Context{}, alpha, a_first, a_count, b_first, beta, c_first); } - -template -auto gemm(Context&& ctx, typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c) -> C&& { // NOLINT(readability-identifier-length) BLAS naming - assert( size( a) == size( c) ); - if(not a.is_empty()) {assert( size(~a) == size( b) );} - if constexpr(is_conjugated{}) {blas::gemm (std::forward(ctx), conj(alpha), conj(a), conj(b) , conj(beta), conj(c) );} - else {blas::gemm_n(std::forward(ctx), alpha , begin(a), size(a), begin(b), beta , begin(c));} - return std::forward(c); -} - -template -auto gemm(typename A::element alpha, A const& a, B const& b, typename A::element beta, C&& c) -> C&& { // NOLINT(readability-identifier-length) BLAS naming - return gemm(blas::context{}, alpha, a, b, beta, std::forward(c)); -} - -template -class gemm_range; - -template -class gemm_reference { // TODO(correaa) implement this in terms of gemv_range - Ext exts_; - - public: - explicit gemm_reference(Ext exts) : exts_{std::move(exts)} {} - auto extensions() const {return exts_;} - friend auto extensions(gemm_reference const& self) {return self.extensions();} -}; - -template -class gemm_iterator { - ContextPtr ctxtp_; - Scalar s_; - ItA a_it_; - ItB b_begin_; - gemm_iterator(ContextPtr ctxtp, Scalar s, ItA a_it, ItB b_begin) : ctxtp_{ctxtp}, s_{s}, a_it_{std::move(a_it)}, b_begin_{std::move(b_begin)} {} // NOLINT(readability-identifier-length) BLAS naming - template - friend class gemm_range; - - public: - gemm_iterator(gemm_iterator const&) = default; - gemm_iterator(gemm_iterator&&) noexcept = default; - ~gemm_iterator() = default; - auto operator=(gemm_iterator&&) -> gemm_iterator& = delete; - auto operator=(gemm_iterator const&) -> gemm_iterator& = delete; - - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = void*; - using reference = gemm_referenceextensions())>; - using iterator_category = std::random_access_iterator_tag; // using iterator_category = std::input_iterator_tag; - - static_assert( std::is_base_of::iterator_category>{} ); - - auto operator+=(difference_type n) -> gemm_iterator& {a_it_ += n; return *this;} - auto operator-=(difference_type n) -> gemm_iterator& {a_it_ -= n; return *this;} - - auto operator++() -> gemm_iterator& {return operator+=(1);} // required by random access concept requires even if not used explicitly - auto operator--() -> gemm_iterator& {return operator-=(1);} - - auto operator+(difference_type n) const {gemm_iterator ret{*this}; ret+=n; return ret;} - - friend auto operator-(gemm_iterator const& a, gemm_iterator const& b) -> difference_type { // NOLINT(readability-identifier-length) BLAS naming - assert(a.b_begin_ == b.b_begin_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - return a.a_it_ - b.a_it_; - } - friend auto operator==(gemm_iterator const& self, gemm_iterator const& other) -> bool {return self.a_it_ == other.a_it_;} - friend auto operator!=(gemm_iterator const& self, gemm_iterator const& other) -> bool {return self.a_it_ != other.a_it_;} - - template - friend auto copy_n(gemm_iterator const& first, difference_type count, ItOut d_first) - ->decltype(blas::gemm_n(*std::declval(), std::declval(), std::declval(), count, std::declval(), 0., d_first)) try { - return blas::gemm_n(*first.ctxtp_ , first.s_ , first.a_it_ , count, first.b_begin_ , 0., d_first); - } catch(std::exception const& e) { - throw std::logic_error( - std::string{"in "} + __PRETTY_FUNCTION__ + "\nCouldn't decay product of arrays of size " + std::to_string(count) +"x"+ std::to_string(first.a_it_->size()) + " and " + // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - std::to_string(first.a_it_->size())+ "x" +std::to_string(first.b_begin_->size()) + " into " + std::to_string(count) +"x" + std::to_string(first.b_begin_->size()) + - "\nbecause\n"+e.what() - ); - } - - template - friend auto copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first) {assert(first.s_ == last.s_); - return copy_n(first, last - first, d_first); - } - - template - friend auto uninitialized_copy_n(gemm_iterator const& first, difference_type count, ItOut d_first) { - return copy_n(first, count, d_first); - } - - template - friend auto uninitialized_copy(gemm_iterator const& first, gemm_iterator const& last, ItOut d_first) { - assert( first.s_ == last.s_ ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - return uninitialized_copy_n(first, last - first, d_first); - } - - auto operator*() const {return reference{b_begin_->extensions()};} -}; - -template -class gemm_range { - ContextPtr ctxtp_; - Scalar s_; - ItA a_begin_; - ItA a_end_; - ItB b_begin_; - - public: - gemm_range(gemm_range const&) = delete; - gemm_range(gemm_range&&) = delete; - auto operator=(gemm_range const&) -> gemm_range& = delete; - auto operator=(gemm_range&&) -> gemm_range& = delete; - ~gemm_range() = default; - - gemm_range(ContextPtr ctxtp, Scalar s, ItA a_first, ItA a_last, ItB b_first) // NOLINT(bugprone-easily-swappable-parameters,readability-identifier-length) BLAS naming - : ctxtp_{ctxtp} - , s_{s}, a_begin_{std::move(a_first)}, a_end_{std::move(a_last)} - , b_begin_{std::move(b_first)} - {} - - using iterator = gemm_iterator; - using decay_type = DecayType; - using size_type = typename decay_type::size_type; - - auto begin() const& -> iterator {return {ctxtp_, s_, a_begin_, b_begin_};} - auto end() const& -> iterator {return {ctxtp_, s_, a_end_ , b_begin_};} - friend auto begin(gemm_range const& self) {return self.begin();} - friend auto end (gemm_range const& self) {return self.end ();} - - auto size() const -> size_type {return a_end_ - a_begin_;} - auto extensions() const -> typename decay_type::extensions_type {return size()*b_begin_->extensions();} - friend auto extensions(gemm_range const& self) {return self.extensions();} -// operator decay_type() const{return decay_type(*this);} // do not use curly { } - auto operator+() const -> decay_type {return *this;} // TODO(correaa) : investigate why return decay_type{*this} doesn't work - template - friend auto operator+=(Arr&& a, gemm_range const& self) -> Arr&& { // NOLINT(readability-identifier-length) BLAS naming - blas::gemm_n(*self.ctxtp_, self.s_, self.a_begin_, self.a_end_ - self.a_begin_, self.b_begin_, 1., a.begin()); - return std::forward(a); - } -}; - -template{}> > -auto gemm(ContextPtr ctxtp, Scalar s, A2D const& a, B2D const& b) // NOLINT(readability-identifier-length) BLAS naming -->gemm_range -{ - return {ctxtp, s, begin(a), end(a), begin(b)}; -} - -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif -template -auto gemm(Scalar s, A2D const& a, B2D const& b) { // NOLINT(readability-identifier-length) conventional BLAS naming - if constexpr(is_conjugated{}) { - auto ctxtp = blas::default_context_of(underlying(a.base())); - return blas::gemm(ctxtp, s, a, b); - } else { - auto ctxtp = blas::default_context_of(a.base()); - return blas::gemm(ctxtp, s, a, b); - } -} -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #pragma diagnostic pop - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif - -namespace operators { - template - auto operator*(A2D const& A, B2D const& B) // NOLINT(readability-identifier-length) conventional BLAS names - ->decltype(+blas::gemm(1., A, B)) { - return +blas::gemm(1., A, B); } -} // end namespace operators - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemv.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemv.hpp deleted file mode 100644 index 4fb7728dca..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/gemv.hpp +++ /dev/null @@ -1,154 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_GEMV_HPP -#define MULTI_ADAPTORS_BLAS_GEMV_HPP - -#include "../blas/core.hpp" -#include "../blas/dot.hpp" - -#include "./../../detail/../utility.hpp" - -namespace boost::multi::blas { - -using core::gemv; - -template -auto gemv_n(Context&& ctxt, A a, MIt m_first, Size count, XIt x_first, B b, YIt y_first) { // NOLINT(readability-identifier-length) BLAS naming - assert(m_first->stride()==1 or m_first.stride()==1); // blas doesn't implement this case - assert( x_first.base() != y_first.base() ); - - if constexpr(not is_conjugated{}) { - if (m_first .stride()==1) {std::forward(ctxt).gemv('N', count, m_first->size(), a, m_first.base() , m_first->stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());} - else if(m_first->stride()==1) {std::forward(ctxt).gemv('T', m_first->size(), count, a, m_first.base() , m_first. stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());} - else {assert(0);} - } else { - if (m_first->stride()==1) {std::forward(ctxt).gemv('C', m_first->size(), count, a, underlying(m_first.base()), m_first. stride(), x_first.base(), x_first.stride(), b, y_first.base(), y_first.stride());} - // else if(m_first. stride()==1) {assert(0);} // not implemented in blas (use cblas?) - else {assert(0);} // not implemented in blas - } - - struct { - MIt m_last; - YIt y_last; - } ret{m_first + count, y_first + count}; - - return ret; -} - -template -auto gemv_n(A a, MIt m_first, Size count, XIt x_first, B b, YIt y_first) { // NOLINT(readability-identifier-length) BLAS naming - return gemv_n(blas::context{}, a, m_first, count, x_first, b, y_first); -} - -template -auto gemv(A const& a, M const& m, V const& v, B const& b, W&& w) -> W&& { // NOLINT(readability-identifier-length) BLAS naming - assert(size( m) == size(w) ); - assert(size(~m) == size(v) ); - gemv_n(a, begin(m), size(m), begin(v), b, begin(w)); - return std::forward(w); -} - -template -class gemv_iterator { - Scalar alpha_ = 1.; - It2D m_it_; - It1D v_first_; - Context ctxt_; - - public: - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = void; - using reference = void; - using iterator_category = std::random_access_iterator_tag; - - friend auto operator-(gemv_iterator const& self, gemv_iterator const& other) -> difference_type { - assert(self.v_first_ == other.v_first_); - return self.m_it_ - other.m_it_; - } - template - friend auto copy_n(gemv_iterator first, difference_type count, It1DOut result){ - if constexpr(std::is_same_v) {blas::gemv_n( first.alpha_, first.m_it_, count, first.v_first_, 0., result);} - else {blas::gemv_n(first.ctxt_, first.alpha_, first.m_it_, count, first.v_first_, 0., result);} - return result + count; - } - template - friend auto copy(gemv_iterator first, gemv_iterator last, It1DOut result){return copy_n(first, last - first, result);} - template - friend auto uninitialized_copy(gemv_iterator first, gemv_iterator last, It1DOut result) { - static_assert(boost::multi::is_trivially_default_constructible_v); - return copy(first, last, result); - } - gemv_iterator(Scalar alpha, It2D m_it, It1D v_first, Context ctxt) - : alpha_{alpha}, m_it_{std::move(m_it)}, v_first_{std::move(v_first)}, ctxt_{ctxt} {} - auto operator*() const -> value_type{return 0.;} -}; - -template -class gemv_range { - Scalar alpha_ = 1.; - It2D m_begin_; - It2D m_end_; - It1D v_first_; - Context ctxt_ = {}; - - public: - gemv_range(gemv_range&&) noexcept = default; - gemv_range(gemv_range const&) = delete; - ~gemv_range() = default; - auto operator=(gemv_range const&) = delete; - auto operator=(gemv_range&&) = delete; - - gemv_range(Scalar alpha, It2D m_first, It2D m_last, It1D v_first) // NOLINT(bugprone-easily-swappable-parameters) - : alpha_{alpha}, m_begin_{std::move(m_first)}, m_end_{std::move(m_last)}, v_first_{std::move(v_first)} { - assert(m_begin_.stride() == m_end_.stride()); - } - gemv_range(Context&& ctxt, Scalar alpha, It2D m_first, It2D m_last, It1D v_first) // NOLINT(bugprone-easily-swappable-parameters) - : alpha_{alpha} - , m_begin_{std::move(m_first)}, m_end_{std::move(m_last)} - , v_first_{std::move(v_first)} - , ctxt_{std::forward(ctxt)} { - assert(m_begin_.stride() == m_end_.stride()); - } - using iterator = gemv_iterator; - using decay_type = DecayType; - - auto begin() const -> iterator{return {alpha_, m_begin_, v_first_, ctxt_};} - auto end() const -> iterator{return {alpha_, m_end_ , v_first_, ctxt_};} - - auto size() const -> size_type{return end() - begin();} - auto extensions() const -> typename decay_type::extensions_type{return typename decay_type::extensions_type{{0, size()}};} - auto decay() const{return decay_type{*this};} - - friend auto operator+(gemv_range const& self) {return self.decay();} - template - friend auto operator+=(V&& v, gemv_range const& s) -> V&& { // NOLINT(readability-identifier-length) BLAS naming - if constexpr(std::is_same{}) {blas::gemv_n( s.alpha_, s.m_begin_, s.m_end_ - s.m_begin_, s.v_first_, 1., v.begin());} - else {blas::gemv_n(s.ctxt_, s.alpha_, s.m_begin_, s.m_end_ - s.m_begin_, s.v_first_, 1., v.begin());} - return std::forward(v); - } -}; - -template -auto gemv(Scalar s, M const& m, V const& v) // NOLINT(readability-identifier-length) BLAS naming -{//->decltype(gemv_range{s, m, v}){ - assert(size(~m) == size(v)); - return gemv_range(s, m.begin(), m.end(), v.begin());} - -template -auto gemv(Context&& ctxt, Scalar s, M const& m, V const& v) { // NOLINT(readability-identifier-length) BLAS naming - assert(size(~m) == size(v)); - return gemv_range(std::forward(ctxt), s, m.begin(), m.end(), v.begin()); -} - -namespace operators{ - template - auto operator%(M const& m, V const& v) // NOLINT(readability-identifier-length) BLAS naming - ->decltype(+blas::gemv(1., m, v)) { - return +blas::gemv(1., m, v); } -} // end namespace operators - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/iamax.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/iamax.hpp deleted file mode 100644 index be79a4b2ab..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/iamax.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2021 Alfredo A. Correa -#ifndef MULTI_ADAPTORS_BLAS_IAMAX_HPP -#define MULTI_ADAPTORS_BLAS_IAMAX_HPP - -#include "../blas/core.hpp" - -namespace boost::multi::blas { - -template -auto iamax_n(It first, Size n) { - using core::iamax; - return iamax(n, base(first), stride(first)); - // if you get an error here make sure that you are including (and linking) the appropriate BLAS backend for your memory type -} - -template -auto iamax(It first, It last) -->decltype(iamax_n(first, std::distance(first, last))) { - return iamax_n(first, std::distance(first, last)); } - -template -auto iamax(X1D const& x) -->decltype(iamax(begin(x), end(x))) {assert( not offset(x) ); - return iamax(begin(x), end(x)); } - -template auto amax(X1D const& x) {return begin(x) + iamax(x);} - -} // end namespace boost::multi::blas - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS iamax" -//#define BOOST_TEST_DYN_LINK -//#include - -//#include "../../array.hpp" -//#include "../../utility.hpp" - -//#include -//#include - -//using std::cout; -//namespace multi = boost::multi; -//namespace blas = multi::blas; - -//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_real){ -// multi::array const A = {1., 2., 3., 4.}; - -// auto i = blas::iamax(A); -// BOOST_REQUIRE( i == 3 ); -// BOOST_REQUIRE( A[blas::iamax(A)] == 4. ); - -// BOOST_REQUIRE( *blas::amax(A) == 4. ); -//} - -//using complex = std::complex; - -//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_complex){ -// multi::array const A = {1., 2., 3., 4.}; -// auto i = blas::iamax(A); -// BOOST_REQUIRE( i == 3 ); -// BOOST_REQUIRE( A[blas::iamax(A)] == 4. ); -// BOOST_REQUIRE( *blas::amax(A) == 4. ); -//} - -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/nrm2.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/nrm2.hpp deleted file mode 100644 index 5fa63b5f79..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/nrm2.hpp +++ /dev/null @@ -1,184 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_NRM2_HPP -#define MULTI_ADAPTORS_BLAS_NRM2_HPP - -#include "../blas/core.hpp" - -#include "../../array.hpp" - -#include // std::norm - -namespace boost::multi::blas { - -using core::nrm2; - -using multi::base; -using std::norm; // nvcc11 needs using std::FUNCTION and the FUNCTION (and it works in clang, gcc, culang, icc) - -template -auto nrm2(A1D const& x, A0D&& res) // NOLINT(readability-identifier-length) conventional BLAS naming -->decltype(nrm2(size(x), x.base(), x.stride(), base(res)), std::forward(res)) { - return nrm2(size(x), x.base(), x.stride(), base(res)), std::forward(res); } - -#if 0 -template -auto nrm2(A1D const& x, double& r) -->decltype(nrm2(x.size(), x.base(), x.stride(), &r), r){ - return nrm2(x.size(), x.base(), x.stride(), &r), r;} - -template -auto nrm2(A1D const& x, float& r) -->decltype(nrm2(x.size(), x.base(), x.stride(), &r), r){ - return nrm2(x.size(), x.base(), x.stride(), &r), r;} -#endif - -template< - class A1D, typename T = double, // decltype(norm(std::declval())), - class Alloc = typename std::allocator_traits::template rebind_alloc -> -NODISCARD("") -auto nrm2(A1D const& array) -//->std::decay_t({}, x.get_allocator()) ))>{ -->std::decay_t({})))> { // array.get_allocator() in decltype doesn't work for icc - return nrm2(array, multi::static_array({}, array.get_allocator()));} - -template())), - class AllocR = typename std::allocator_traits::template rebind_alloc -> -NODISCARD("") -auto nrm2(A1D const& array, AllocR const& alloc) -->std::decay_t({}, alloc)))> { - return blas::nrm2(array, multi::static_array({}, alloc)) ; } - -namespace operators { - using std::norm; - template()))>//decltype(norm(std::declval()))> - NODISCARD("") auto operator^(A1D const& array, int n) - ->decltype(std::pow(Real{blas::nrm2(array)}, n)) { - return std::pow(Real{blas::nrm2(array)}, n); } -} // end namespace operators - -} // end namespace boost::multi::blas - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS nrm2" -//#define BOOST_TEST_DYN_LINK -//#include - - -//#include "../../array.hpp" -//#include "../../complex.hpp" - -////#include - -//#include - -//namespace multi = boost::multi; - -//BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_real){ -// namespace blas = multi::blas; -// multi::array const cA = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; - -// double n; -// BOOST_REQUIRE( blas::nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); -// BOOST_REQUIRE( n == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); -// BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); - -// double n2 = blas::nrm2(rotated(cA)[1]); -// BOOST_REQUIRE( n == n2 ); - -// multi::array R(4); -// blas::nrm2( rotated(cA)[1], R[2]); -// BOOST_REQUIRE( R[2] == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); - -// multi::array R0; -// blas::nrm2( rotated(cA)[1], R0); -// BOOST_REQUIRE( R0 == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); - -// BOOST_REQUIRE( blas::nrm2(rotated(cA)[1]) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); - -//} - -//BOOST_AUTO_TEST_CASE(multi_adaptor_blas_nrm2_operators){ -// multi::array X = {1.1,2.1,3.1, 4.1}; -// double n; multi::blas::nrm2(X, n); -// BOOST_REQUIRE( n == multi::blas::nrm2(X) ); - -//} - -//BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case){ -// using complex = std::complex; -// multi::array const cA = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; - -// using multi::blas::nrm2; -// double n; -// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); -// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); -//} - -//#if 0 -//BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_thrust){ -// using complex = thrust::complex; -// multi::array const cA = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; - -// using multi::blas::nrm2; -// double n; -// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); -// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); -//} - -//BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex_real_case_types){ -// boost::mpl::for_each, -// thrust::complex//, -// // boost::multi::complex // TODO make this work -// >>([](auto cplx){ -// multi::array const cA = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; - -// using multi::blas::nrm2; -// double n; -// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( 2.*2. + 6.*6 + 10.*10.) ); -// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == n ); -// }); -//} -//#endif - -//BOOST_AUTO_TEST_CASE(multi_adaptor_multi_nrm2_complex){ -// using complex = std::complex; complex const I{0,1}; -// multi::array const cA = { -// {1., 2. + 1.*I, 3., 4.}, -// {5., 6. + 4.*I, 7., 8.}, -// {9., 10. - 3.*I, 11., 12.} -// }; - -// using multi::blas::nrm2; -// double n; -// BOOST_REQUIRE( nrm2(rotated(cA)[1], n) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) ); -// BOOST_REQUIRE( nrm2(rotated(cA)[1]) == std::sqrt( norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) ) ); - -// using namespace multi::blas::operators; -// BOOST_TEST_REQUIRE( (rotated(cA)[1]^-1) == 1/std::sqrt(norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1])) , boost::test_tools::tolerance(1e-15) ); -// BOOST_TEST_REQUIRE( (rotated(cA)[1]^2) == norm(cA[0][1]) + norm(cA[1][1]) + norm(cA[2][1]) , boost::test_tools::tolerance(1e-15) ); -//} - -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric.hpp deleted file mode 100644 index b7992d5430..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/numeric.hpp +++ /dev/null @@ -1,274 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_NUMERIC_HPP -#define MULTI_ADAPTORS_BLAS_NUMERIC_HPP - -#include "../../array_ref.hpp" -#include "../../complex.hpp" - -#include "../../memory/pointer_traits.hpp" - -#include "numeric/is_complex.hpp" - -namespace boost { -namespace multi::blas { - -template struct complex_dummy {T real; T imag;}; - -template< - class A, typename Complex = typename std::decay_t::element, typename T=typename Complex::value_type, - class=std::enable_if_t::value> -> -auto real(A&& array) -->decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::real)){ - return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::real);} - -template< - class A, class Complex = typename std::decay_t::element_type, typename T=typename Complex::value_type, - class=std::enable_if_t::value> -> -auto imag(A&& array) -->decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::imag)){ - return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex_dummy::imag);} - -template::element, typename RealElem = typename ComplexElem::value_type, - class=std::enable_if_t::value> -> -auto real_doubled(ComplexArr&& array) { // produces a real view of complex array with the last dimension duplicated and with interleaved real imaginary parts - return std::forward(array).template reinterpret_array_cast(2).rotated().flatted().unrotated(); -} - -template class involuted; - -template::reference, F> > class involuter; - -template -class involuted { - Ref r_; // [[no_unique_address]] - Involution f_; - -public: - using decay_type =std::decay_t()(std::declval()))>; - - constexpr explicit involuted(Ref ref, Involution fun) : r_{std::forward(ref)}, f_{fun}{} - constexpr explicit involuted(Ref ref) : r_{std::forward(ref)}, f_{}{} - - auto operator=(involuted const& other) -> involuted& = delete; - - ~involuted() = default; - involuted(involuted const&) = delete; - involuted(involuted&&) noexcept = default; // for C++14 - auto operator=(involuted&& other) noexcept -> involuted&{ - r_ = std::move(other.r_); - return *this; - } - - constexpr auto decay() const& -> decay_type{return f_(r_);} - - constexpr explicit operator decay_type() &{return f_(r_);} - constexpr explicit operator decay_type() const&{return f_(r_);} - constexpr explicit operator decay_type() &&{return f_(r_);} - - constexpr auto operator*(decay_type const& other) const{return f_(r_)*other;} - constexpr auto operator&()&& -> decltype(auto){ // NOLINT(google-runtime-operator) : reference-like object - return involuter()), Involution>{&r_, f_}; - } - - template() = (std::declval())(std::declval()))> - constexpr auto operator=(DecayType&& other)& -> involuted&{ - r_=f_(std::forward(other)); - return *this; - } - - template() = (std::declval())(std::declval()))> - constexpr auto operator=(DecayType&& other)&& -> involuted&{ - r_=f_(std::forward(other)); - return *this; - } - - template - constexpr auto operator==(DecayType&& other) const - ->decltype(this->operator decay_type()==other){ - return this->operator decay_type()==other;} - template - constexpr auto operator!=(DecayType&& other) const - ->decltype(this->operator decay_type()!=other){ - return this->operator decay_type()!=other;} - - friend constexpr auto operator==(decay_type const& other, involuted const& self){ - return other == self.operator decay_type();} - - template{}, int> =0> - friend constexpr auto operator==(DecayType&& other, involuted const& self) { - return other == self.operator decay_type(); - } - template{}, int> =0> - friend constexpr auto operator!=(DecayType&& other, involuted const& self) { - return other != self.operator decay_type();\ - } -// auto imag() const{return static_cast(*this).imag();} - template friend constexpr auto operator<<(Sink&& sink, involuted const& self) -> Sink& { - return sink<< self.operator decay_type(); - } - constexpr auto conj() const& {return adl_conj(operator decay_type());} - - template - friend constexpr auto imag(involuted const& self) - ->decltype(adl_imag(std::declval())) { - return adl_imag(self.operator decay_type()); } -}; - -#if defined(__cpp_deduction_guides) -template involuted(T&&, F) -> involuted; -//template involuted(T&, F)->involuted; -//template involuted(T const&, F)->involuted; -#endif - -template -class involuter; - -template -auto default_allocator_of(involuter const& iv) { - return default_allocator_of(iv.it_); -} - -template -class involuter { - It it_; // [[no_unique_address]] - F f_; - template friend class involuter; - - public: - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = involuter;//svoid; // typename std::iterator_traits::pointer - using reference = Reference; - using iterator_category = typename std::iterator_traits::iterator_category; - using element_type = typename std::pointer_traits::element_type; - template using rebind = involuter::template rebind, F>; - - involuter() = default; -// ~involuter() = default; - - HD constexpr explicit involuter(It it) : it_{std::move(it)}, f_{} {} - HD constexpr explicit involuter(It it, F fun) : it_{std::move(it)}, f_{std::move(fun)} {} - -// involuter(involuter const& other) = default; - - template(typename Other::underlying_type{}))* = nullptr> - // cppcheck-suppress noExplicitConstructor - HD constexpr/*implct*/involuter(Other const& other) : it_{other.it_}, f_{other.f_}{} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : inherit implicit conversion of underlying type - template(typename Other::underlying_type{}))* = nullptr> - HD constexpr explicit involuter(Other const& other) : it_{other.it_}, f_{other.f_}{} - - constexpr auto operator*() const {return reference{*it_, f_};} - constexpr auto operator[](difference_type n) const {return reference{*(it_ + n), f_};} - - auto operator==(involuter const& other) const -> bool {return it_ == other.it_;} - auto operator!=(involuter const& other) const -> bool {return it_ != other.it_;} - - constexpr auto operator+=(difference_type n) -> involuter& {it_ += n; return *this;} - constexpr auto operator-=(difference_type n) -> involuter& {it_ -= n; return *this;} - - constexpr auto operator+(difference_type n) const {return involuter{it_ + n, f_};} - constexpr auto operator-(difference_type n) const {return involuter{it_ - n, f_};} - - auto operator-(involuter const& other) const{return it_ - other.it_;} - - explicit operator bool() const{return it_;} - using underlying_type = It; - friend /*constexpr*/ auto underlying(involuter const& self) -> underlying_type{return self.it_;} - constexpr explicit operator It() const {return underlying(*this);} -// friend auto get_allocator(involuter const& self){return get_allocator(self.it_);} - friend auto default_allocator_of(involuter const& inv){ - using multi::default_allocator_of; - return default_allocator_of(inv.it_); - } - using default_allocator_type = typename multi::pointer_traits::default_allocator_type; - friend auto get_allocator(involuter const& inv){ - using boost::multi::get_allocator; - return get_allocator(inv.it_); - } -}; - -template using negated = involuted>; -template using negater = involuter>; - -struct conjugate { - template - auto operator()(Complex&& zee) const -> decltype(auto) { - // using std::conj; /*for doubles?*/ - return multi::adl_conj(std::forward(zee)); // this is needed by icc - } -}; - -template using conjugated = involuted; - -template using conjugater = involuter;//, conjugated::reference> >; - -template auto make_conjugater(It it){return conjugater{it};} -template auto make_conjugater(conjugater it) -> It {return underlying(it);} - -template auto imag(involuted const& inv) {return inv.decay().imag();} -template auto real(involuted const& inv) {return inv.decay().real();} - -template auto has_imag_fun_aux(T const& value) -> decltype(imag(value), std::true_type {}); - auto has_imag_fun_aux(... ) -> decltype( std::false_type{}); -template struct has_imag_fun : decltype(has_imag_fun_aux(std::declval())){}; - - -template auto has_imag_mem_aux(T const& value) -> decltype(value.imag(), std::true_type {}); - auto has_imag_mem_aux(... ) -> decltype( std::false_type{}); -template struct has_imag_mem : decltype(has_imag_mem_aux(std::declval())){}; - -template struct has_imag : std::integral_constant{} or has_imag_mem{})>{}; - -template -struct is_complex_array : has_imag()))>> {}; -// template static auto _(T const& t) -> has_imag; -// constexpr explicit operator bool() &{return decltype(_(*base(std::declval()))){};} -// constexpr explicit operator bool() &&{return decltype(_(*base(std::declval()))){};} -// constexpr operator bool() const&{return decltype(_(*base(std::declval()))){};} -// static constexpr bool value = decltype(_(*base(std::declval()))){}; -// template constexpr auto operator()(AA&& /*unused*/){return _(*base(std::declval()));} -//}; - -template struct is_complex : has_imag {}; - -template -auto is_conjugated_aux(conjugater /*self*/) -> std::true_type ; -auto is_conjugated_aux(... ) -> std::false_type; - -template struct is_conjugated : decltype(is_conjugated_aux(base(std::declval()))) { - template constexpr auto operator()(AA&& /*unused*/) {return is_conjugated_aux(base(std::declval()));} -}; - -template, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr, - std::enable_if_t{}, int> =0> -auto conj(A&& array) -> A&& { - return std::forward(array); -} - -template, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr, - std::enable_if_t{} and is_complex_array{}, int> =0> -auto conj(A&& array) -> decltype(auto) { - return std::forward(array).template static_array_cast>(); -} - -template, typename Elem=typename D::element_type, typename Ptr=typename D::element_ptr::underlying_type, - std::enable_if_t< is_conjugated{}, int> =0> -auto conj(A&& array) -->decltype(std::forward(array).template static_array_cast()) { - return std::forward(array).template static_array_cast(); } - -} // end namespace multi::blas - -template -auto default_allocator_of(multi::blas::involuter it) { - return multi::default_allocator_of(it.underlying()); -} - -} // end namespace boost - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/scal.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/scal.hpp deleted file mode 100644 index 3ab9ef63d9..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/scal.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef MULTI_ADAPTORS_BLAS_SCAL_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -#define MULTI_ADAPTORS_BLAS_SCAL_HPP -// Copyright 2019-2021 Alfredo A. Correa - -#include "../blas/core.hpp" - -namespace boost::multi::blas{ - -using core::scal; - -template -auto scal_n(Scalar const& a, It first, Size count) // NOLINT(readability-identifier-length) conventional BLAS naming -->decltype(scal(count, &a, first.base(), first.stride()), void()) { - scal(count, &a, first.base(), first.stride()); } - -template -auto scal(Scalar const& a, It1D first, It1D last) // NOLINT(readability-identifier-length) conventional BLAS naming -->decltype(blas::scal_n(a, first, last - first)){ - return blas::scal_n(a, first, last - first);} - -template // don't do this: ", typename Elem = typename X1D::element_type>" -auto scal(Scalar const& a, X1D&& x) // NOLINT(readability-identifier-length) conventional BLAS naming -->decltype(blas::scal(a, x.begin(), x.end()), std::forward(x)) { - return blas::scal(a, x.begin(), x.end()), std::forward(x); } - -template -class scal_range { - A alpha_; - - public: - using scalar_type = A; - explicit scal_range(A const& alpha) : alpha_{alpha} {} - template - friend auto operator*=(X1D&& x, scal_range const& self) // NOLINT(readability-identifier-length) conventional BLAS naming - ->decltype(std::forward(scal(std::declval(), x))) { - return std::forward(scal(self.alpha_, x));} -}; - -template auto scal(A const& array) {return scal_range{array};} - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/side.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/side.hpp deleted file mode 100644 index ca11bd213d..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/side.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef MULTI_ADAPTORS_BLAS_SIDE_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -#define MULTI_ADAPTORS_BLAS_SIDE_HPP -// Copyright 2019-2021 Alfredo A. Correa - -namespace boost::multi::blas { - -enum side : char { - left = 'L', - right = 'R'//, -// pre_multiply = 'R', -// post_multiply = 'L' -}; - -inline auto swap(side sid) -> side { - switch(sid) { - case side::left : return side::right; - case side::right: return side::left ; - } __builtin_unreachable(); -} - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/swap.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/swap.hpp deleted file mode 100644 index c4eb6784cb..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/swap.hpp +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef MULTI_ADAPTORS_BLAS_SWAP_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -#define MULTI_ADAPTORS_BLAS_SWAP_HPP -// Copyright 2019-2021 Alfredo A. Correa - -#include "../blas/core.hpp" - -namespace boost::multi::blas { - -template -auto swap(It1 first, It2 last, It2 first2) -> It2 { - assert(stride(first) == stride(last)); - using std::distance; - auto d = distance(first, last); - swap(d, base(first), stride(first), base(first2), stride(first2)); - return first2 + d; -} - -template -auto swap(X1D&& x, Y1D&& y) -> Y1D&& { - assert( size(x) == size(y) ); - assert( offset(x) == 0 and offset(y) == 0 ); - swap( begin(x), end(x), begin(y) ); - return std::forward(y); -} - -} // end namespace boost::multi::blas - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS swap" -//#define BOOST_TEST_DYN_LINK -//#include - -//#include "../../array.hpp" -//#include "../../utility.hpp" - -//#include "../blas/dot.hpp" - -//namespace multi = boost::multi; - -//BOOST_AUTO_TEST_CASE(multi_blas_swap, *boost::unit_test::tolerance(0.00001) ){ -// multi::array A = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; -//// using multi::blas::swap; -// multi::blas::swap(rotated(A)[1], rotated(A)[3]); // can ambiguate with (friend) multi::swap -// BOOST_REQUIRE( A[0][1] == 4. ); -// BOOST_REQUIRE( A[0][3] == 2. ); -//} - -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/syrk.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/syrk.hpp deleted file mode 100644 index e5b325225b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/syrk.hpp +++ /dev/null @@ -1,432 +0,0 @@ -#ifndef MULTI_ADAPTORS_BLAS_SYRK_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -#define MULTI_ADAPTORS_BLAS_SYRK_HPP -// Copyright 2019-2021 Alfredo A. Correa - -#include "../blas/core.hpp" -#include "../blas/filling.hpp" -#include "../blas/numeric.hpp" - -namespace boost::multi::blas { - -using core::syrk; - -template -auto syrk(filling c_side, AA alpha, A2D const& a, BB beta, C2D&& c) { // NOLINT(readability-identifier-length) BLAS naming -//->decltype(syrk('\0', '\0', size(c), size(a), alpha, base(a), stride(rotated(a)), beta, base(c), stride(c)), std::forward(c)){ - assert( size(c) == size(rotated(c)) ); - if(stride(a)==1) { - if(stride(c)==1) {syrk(flip(c_side)==filling::upper?'L':'U', 'N', size(c), size(a ), &alpha, base(a), stride(rotated(a)), &beta, base(c), stride(rotated(c)));} - else {syrk(c_side ==filling::upper?'L':'U', 'N', size(c), size(rotated(a)), &alpha, base(a), stride(rotated(a)), &beta, base(c), stride( c ));} - } else { - if(stride(c)==1) {syrk(flip(c_side)==filling::upper?'L':'U', 'T', size(c), size(rotated(a)), &alpha, base(a), stride(a), &beta, base(c), stride(rotated(c)));} - else {syrk(c_side ==filling::upper?'L':'U', 'T', size(c), size(rotated(a)), &alpha, base(a), stride(a), &beta, base(c), stride( c ));} - } - return std::forward(c); -} - -template -auto syrk(filling c_side, AA alpha, A2D const& a, C2D&& c) // NOLINT(readability-identifier-length) BLAS naming -->decltype(syrk(c_side, alpha, a, 0., std::forward(c))) { - return syrk(c_side, alpha, a, 0., std::forward(c)); } - -template -auto syrk(AA alpha, A2D const& a, C2D&& c) // NOLINT(readability-identifier-length) BLAS naming -->decltype(syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward(c)))) { - return syrk(filling::upper, alpha, a, syrk(filling::lower, alpha, a, std::forward(c))); } - -template -[[nodiscard]] // ("because input argument is const") -// this decay in the return type is important -// NOLINTNEXTLINE(readability-identifier-length) BLAS naming -auto syrk(AA alpha, A2D const& a) -> std::decay_\ -t { - return syrk(alpha, a, Ret({size(a), size(a)}, get_allocator(a))); } - -template -[[nodiscard]] -auto syrk(A2D const& A) // NOLINT(readability-identifier-length) BLAS naming -->decltype(syrk(1., A)) { - return syrk(1., A); } - -} // end namespace boost::multi::blas - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS syrk" -//#include - -//#include "../blas/gemm.hpp" - -//#include "../../array.hpp" -//#include "../../utility.hpp" - -//#include - -//#include -//#include -//#include -//#include -//#include - -////#include - -//using std::cout; -//using std::cerr; - -//namespace multi = boost::multi; - -//template decltype(auto) print(M const& C){ -// using boost::multi::size; -// for(int i = 0; i != size(C); ++i){ -// for(int j = 0; j != size(C[i]); ++j) -// std::cout << C[i][j] << ' '; -// std::cout << std::endl; -// } -// return std::cout << std::endl; -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_real){ -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1] == 19. ); -// BOOST_REQUIRE( c[1][2] == 9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::upper, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][2] == 19. ); -// BOOST_REQUIRE( c[2][1] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::syrk; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::upper, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular -// BOOST_REQUIRE( c[0][1] == 34. ); -// BOOST_REQUIRE( c[1][0] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::upper, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, a⸆a, `c` in lower triangular -// BOOST_REQUIRE( c[0][1] == 34. ); -// BOOST_REQUIRE( c[1][0] == 9999. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_real_special_case){ -// multi::array const a = { -// { 1., 3., 4.}, -// }; -// { -// multi::array c({1, 1}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// //BOOST_REQUIRE( c[1][0] == 34. ); -// //BOOST_REQUIRE( c[0][1] == 9999. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex_real_case){ -// using complex = std::complex; -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1] == 19. ); -// BOOST_REQUIRE( c[1][2] == 9999. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_complex){ -// using complex = std::complex; -// constexpr auto const I = complex{0., 1.}; -// multi::array const a = { -// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, -// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} -// }; -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1] == complex(-3., -34.) ); -// BOOST_REQUIRE( c[1][2] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0] == complex(18., -21.) ); -// BOOST_REQUIRE( c[0][1] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::upper, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular -// BOOST_REQUIRE( c[0][1] == complex(18., -21.) ); -// BOOST_REQUIRE( c[1][0] == 9999. ); -// } -//} - - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_complex){ -// using complex = std::complex; -// constexpr auto const I = complex{0., 1.}; -// multi::array const a = { -// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, -// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} -// }; -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::filling; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0]==complex(18., -21.) ); -// BOOST_REQUIRE( c[0][1]==9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1]==complex(-3.,-34.) ); -// BOOST_REQUIRE( c[1][2]==9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// using blas::transposed; -// syrk(filling::lower, 1., rotated(a), 0., c); // c⸆=c=a⸆a=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1]==complex(-3.,-34.) ); -// BOOST_REQUIRE( c[1][2]==9999. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_operation_real){ -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::filling; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::filling; -// syrk(filling::upper, 1., a, 0., c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular -// BOOST_REQUIRE( c[0][1] == 34. ); -// BOOST_REQUIRE( c[1][0] == 9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// using multi::blas::filling; -// syrk(filling::lower, 1., rotated(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1] == 19. ); -// BOOST_REQUIRE( c[1][2] == 9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::transposed; -// using blas::filling; -// syrk(filling::lower, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[2][1] == 19. ); -// BOOST_REQUIRE( c[1][2] == 9999. ); -// } -// { -// multi::array c({3, 3}, 9999.); -// namespace blas = multi::blas; -// using blas::transposed; -// using blas::filling; -// syrk(filling::upper, 1., transposed(a), 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in upper triangular -// BOOST_REQUIRE( c[1][2] == 19. ); -// BOOST_REQUIRE( c[2][1] == 9999. ); -// } -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::filling; -// using multi::blas::transposed; -// syrk(filling::upper, 1., a, 0., transposed(c)); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in upper triangular -// BOOST_REQUIRE( c[0][1] == 9999. ); -// BOOST_REQUIRE( c[1][0] == 34. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_implicit_zero){ -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::filling; -// syrk(filling::lower, 1., a, c); // c⸆=c=aa⸆=(aa⸆)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 9999. ); -// } -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_automatic_symmetrization){ -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({2, 2}, 9999.); -// using multi::blas::syrk; -// using multi::blas::gemm; -// using multi::blas::T; -// syrk(1., a, c); // c⸆=c=aa⸆=(aa⸆)⸆ -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 34. ); -// BOOST_REQUIRE( syrk(a) == gemm(a, T(a)) ); -// } -// { -// using multi::blas::syrk; -// multi::array c = syrk(1., a); // c⸆=c=aa⸆=(aa⸆)⸆ -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 34. ); -// } -// { -// using multi::blas::syrk; -// multi::array c = syrk(a); // c⸆=c=aa⸆=(aa⸆)⸆ -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 34. ); -// } -// { -// using multi::blas::transposed; -// using multi::blas::syrk; -// multi::array c = syrk(transposed(a)); // c⸆=c=a⸆a=(a⸆a)⸆ -// BOOST_REQUIRE( c[2][1] == 19. ); -// BOOST_REQUIRE( c[1][2] == 19. ); -// } -//} - -//#if 0 - - - -//} - - - - - - - -//} - - - - - - - -//#if 0 -// { - -// { -// multi::array C({2, 2}, 9999.); -// syrk(1., rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T -// assert( C[1][0] == complex(18., -21.) ); -// } -// { -// multi::array C({2, 2}, 9999.); -// syrk(rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T -// assert( C[1][0] == complex(18., -21.) ); -// } -// { -// complex C[2][2]; -// using multi::rotated; -// syrk(rotated(A), rotated(C)); // C^T=C=A*A^T=(A*A^T)^T -// assert( C[1][0] == complex(18., -21.) ); -// } -// { -// auto C = syrk(1., A); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is everywhere) -// assert( C[1][2]==complex(-3.,-34.) ); -// } -// { -//// what(rotated(syrk(A))); -// multi::array C = rotated(syrk(A)); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is in upper triangular part) -// print(C) <<"---\n"; -// } -// -// } -//#if 0 -// { -// multi::array const A = { -// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, -// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} -// }; -// auto C = rotated(syrk(A)).decay(); // C = C^T = A^T*A, C is a value type matrix (with C-ordering, information is in upper triangular part) -// print(C) <<"---\n"; -//// print(C) <<"---\n"; -// } -// return 0; -// { -// multi::array const A = { -// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, -// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} -// }; -// auto C = syrk(rotated(A)); // C = C^T = A^T*A, C is a value type matrix (with C-ordering) -// print(C) <<"---\n"; -// } -//#endif -//#endif -//} - -//BOOST_AUTO_TEST_CASE(multi_blas_syrk_herk_fallback){ -// multi::array const a = { -// { 1., 3., 4.}, -// { 9., 7., 1.} -// }; -// { -// multi::array c({2, 2}, 9999.); -// namespace blas = multi::blas; -// using blas::filling; -// syrk(filling::lower, 1., a, 0., c); // c⸆=c=a⸆a=(a⸆a)⸆, `c` in lower triangular -// BOOST_REQUIRE( c[1][0] == 34. ); -// BOOST_REQUIRE( c[0][1] == 9999. ); -// } -//} -//#endif - -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/asum.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/asum.cpp deleted file mode 100644 index 901402491b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/asum.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS asum" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas/asum.hpp" -#include "../../blas/cuda.hpp" -#include "../../../array.hpp" -#include "../../../adaptors/cuda.hpp" - -#include -#include - -using std::cout; -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_blas_asum_double){ - multi::array const A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - using multi::blas::asum; - BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a + std::abs(b);})); -} - -BOOST_AUTO_TEST_CASE(multi_blas_asum_complex){ - using Z = std::complex; Z const I{0, 1}; - multi::array const A = { - {1. + 2.*I, 2., 3., 4.}, - {5., 6. + 3.*I, 7., 8.}, - {9., 10., 11.+ 4.*I, 12.} - }; - using multi::blas::asum; - BOOST_REQUIRE(asum(A[1]) == std::accumulate(begin(A[1]), end(A[1]), 0., [](auto&& a, auto&& b){return a + std::abs(real(b)) + std::abs(imag(b));})); -} - -BOOST_AUTO_TEST_CASE(multi_blas_asum_double_cuda){ - multi::cuda::array const A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - using multi::blas::asum; - BOOST_REQUIRE(asum(A[1]) == 26 ); -} - -using complex = std::complex; constexpr complex I{0, 1}; - -BOOST_AUTO_TEST_CASE(multi_blas_asum_complex_cuda){ - namespace blas = multi::blas; - multi::cuda::array const A = { - {1. + 2.*I, 2., 3., 4.}, - {5., 6. + 3.*I, 7., 8.}, - {9., 10., 11.+ 4.*I, 12.} - }; - - BOOST_REQUIRE( blas::asum(A[1]) == 29. ); - BOOST_REQUIRE( blas::asum(A[1]({0, 4})) == 29. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_asum_complex_cuda_mutable){ - using Z = std::complex; Z const I{0, 1}; - multi::cuda::array A = { - {1. + 2.*I, 2., 3., 4.}, - {5., 6. + 3.*I, 7., 8.}, - {9., 10., 11.+ 4.*I, 12.} - }; - using multi::blas::asum; - BOOST_REQUIRE( asum(A[1]) == Z{29.} ); - BOOST_REQUIRE( asum(A[1]({0, 4})) == Z{29.} ); -} - - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/axpy.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/axpy.cpp deleted file mode 100644 index 1ae2a1f1fa..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/axpy.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// © Alfredo A. Correa 2019-2022 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS axpy" -#define BOOST_TEST_DYN_LINK -#include - -#include "config.hpp" - -#include "multi/adaptors/blas/axpy.hpp" -#include "multi/adaptors/blas/operations.hpp" -#include "multi/array.hpp" - -#include - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_real) { - multi::array a = { // NOLINT(readability-identifier-length) BLAS naming - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const AC = a; - multi::array const b = a[2]; // NOLINT(readability-identifier-length) BLAS naming - - blas::axpy(2., b, a[1]); // daxpy - BOOST_REQUIRE( a[1][2] == 2.*b[2] + AC[1][2] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_double) { - multi::array const const_arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - multi::array arr = const_arr; - multi::array const b = const_arr[2]; // NOLINT(readability-identifier-length) conventional name in BLAS - - blas::axpy(2., b, arr[1]); // A[1] = 2*b + A[1], A[1]+= a*A[1] - BOOST_REQUIRE( arr[1][2] == 2.*b[2] + const_arr[1][2] ); - - using complex = std::complex; complex const I = {0, 1}; // NOLINT(readability-identifier-length) imaginary unit - multi::array AC = {1. + 2.*I, 3. + 4.*I, 4. - 8.*I}; - multi::array BC(extensions(AC), complex{0.}); - - blas::axpy(+1., blas::real(AC), blas::real(BC)); - blas::axpy(-1., blas::imag(AC), blas::imag(BC)); - - BOOST_REQUIRE( BC[2] == std::conj(AC[2]) ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex) { - using complex = std::complex; - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const const_arr = arr; - multi::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming - blas::axpy(2., x, arr[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically) - BOOST_REQUIRE( arr[1][2] == 2.*x[2] + const_arr[1][2] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_plus_equal) { - using complex = std::complex; - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const carr = arr; - multi::array const y = arr[2]; // NOLINT(readability-identifier-length) BLAS naming - arr[1] += blas::axpy(2., y); // zaxpy (2. is promoted to 2+I*0 internally and automatically) - BOOST_REQUIRE( arr[1][2] == 2.*y[2] + carr[1][2] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_as_operator_minus_equal) { - using complex = std::complex; - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const AC = arr; - multi::array const x = arr[2]; // NOLINT(readability-identifier-length) BLAS naming - arr[1] -= blas::axpy(2., x); // zaxpy (2. is promoted to 2+I*0 internally and automatically) - BOOST_REQUIRE( arr[1][2] == -2.*x[2] + AC[1][2] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_context) { - using complex = std::complex; - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const arr_copy = arr; - multi::array const arr2 = arr[2]; - blas::axpy(blas::context{}, 2., arr2, arr[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically) - BOOST_REQUIRE( arr[1][2] == 2.*arr2[2] + arr_copy[1][2] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_axpy_operator_minus) { - using complex = std::complex; - multi::array x = {10., 11., 12., 13.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array y = x; // NOLINT(readability-identifier-length) BLAS naming - - using blas::operators::operator-; - - BOOST_REQUIRE( (x - y)[0] == 0. ); - BOOST_REQUIRE( (y - x)[0] == 0. ); - - using blas::operators::operator+; - - BOOST_REQUIRE( (x - (y+y))[0] == -x[0] ); - BOOST_REQUIRE( ((x+x) - y)[0] == +x[0] ); - - multi::array arr = {{1., 2.}, {3., 4.}}; - multi::array arr2 = {1., 2.}; - BOOST_REQUIRE( (arr[0] - arr2)[0] == 0. ); - BOOST_REQUIRE( (arr[0] - arr2)[1] == 0. ); - - multi::array X = {10., 11., 12., 13.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array Y = {10., 11., 12., 13.}; // NOLINT(readability-identifier-length) BLAS naming - - using blas::operators::operator-=; - X -= Y; - BOOST_REQUIRE( X[0] == 0. ); -} - -#if CUDA_FOUND -#include -BOOST_AUTO_TEST_CASE(multi_blas_axpy_complex_thrust) { - using complex = thrust::complex; - multi::array A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - auto const AC = A; - multi::array const B = A[2]; - blas::axpy(2., B, A[1]); // zaxpy (2. is promoted to 2+I*0 internally and automatically) - BOOST_REQUIRE( A[1][2] == 2.*B[2] + AC[1][2] ); -} -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/copy.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/copy.cpp deleted file mode 100644 index 60544fbaae..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/copy.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS copy" -#include - -#include "../../../array.hpp" - -#include "../../blas/copy.hpp" - -#include - -#include "config.hpp" - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(multi_blas_copy_n) { - multi::array const x = {1., 2., 3., 4.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - blas::copy_n(x.begin(), x.size(), y.begin()); - BOOST_REQUIRE( y == x ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_copy_it) { - multi::array const x = {1., 2., 3., 4.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - blas::copy(x.begin(), x.end(), y.begin()); - BOOST_REQUIRE( y == x ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_copy) { - multi::array const x = {1., 2., 3., 4.}; // NOLINT(readability-identifier-length) BLAS naming - { - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - blas::copy(x, y); // segmentation fault in clang-11 - BOOST_REQUIRE( y == x ); - } - { - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(y) == size(x) ); - y = blas::copy(x); - BOOST_REQUIRE( y == x ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_real) { - namespace blas = multi::blas; - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( arr[0][2] == 3. ); - BOOST_REQUIRE( arr[2][2] == 11. ); - - blas::copy(arr[0], arr[2]); - BOOST_REQUIRE( arr[0][2] == 3. ); - BOOST_REQUIRE( arr[2][2] == 3. ); - -// multi::blas::copy(begin(A[1]), end(A[1]), begin(A[2])); // dcopy - blas::copy( arr[1]({0, size(arr[1])}), arr[2]({0, size(arr[1])}) ); - BOOST_REQUIRE( arr[1][3] == 8. ); - BOOST_REQUIRE( arr[2][3] == 8. ); - - multi::array AR3 = blas::copy(rotated(arr)[3]); // dcopy - BOOST_REQUIRE( AR3[1] == arr[1][3] ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_copy_row) { - multi::array const arr = { - {1., 2., 3.}, - {4., 5., 6.}, - {7., 8., 9.} - }; - multi::array y(multi::extensions_t<1>{multi::iextension{3}}); // NOLINT(readability-identifier-length) BLAS naming - blas::copy(rotated(arr)[0], y); - BOOST_REQUIRE( y == rotated(arr)[0] ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_copy_complex) { - using complex = std::complex; constexpr complex I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array arr = { - {1. + 3.*I, 2. + 4.*I, 3. + 5.*I, 4. + 6.*I}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - blas::copy(arr[0], arr[2]); - BOOST_REQUIRE( arr[0][2] == 3. + 5.*I ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_copy_context) { - multi::array const x = {1., 2., 3., 4.}; // NOLINT(readability-identifier-length) BLAS naming - blas::context ctx; - { - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - blas::copy(ctx, x, y); - BOOST_REQUIRE( x == y ); - } - { - multi::array y = {5., 6., 7., 8.}; // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(y) == size(x) ); - y = blas::copy(ctx, x); - BOOST_REQUIRE( x == y ); - } -} - -#if CUDA_FOUND -#include - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_copy_thrust) { - multi::array, 1> const a(multi::extensions_t<1>{multi::iextension{10}}, thrust::complex{}); - multi::array, 1> b(multi::extensions_t<1>{multi::iextension{10}}); - blas::copy(a, b); - - BOOST_REQUIRE( a == b ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_text_copy_interop) { - static_assert( std::is_convertible, thrust::complex>{} ); - static_assert( std::is_convertible, std::complex>{} ); - multi::array, 1> a(multi::extensions_t<1>{multi::iextension{10}}, std::complex{}); - multi::array, 1> b(multi::extensions_t<1>{multi::iextension{10}}); - blas::copy(a, b); - - BOOST_REQUIRE( a == b ); -} -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/dot.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/dot.cpp deleted file mode 100644 index 46767a570b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/dot.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS dot" -#include - -#include "../../blas/dot.hpp" - -#include "../../../array.hpp" - -#include -#include -#include - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(blas_dot_context) { - multi::array const x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - blas::context ctxt; - { - auto res = +blas::dot(&ctxt, x, y); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), 0.F) ); - } - { - float res = +blas::dot(&ctxt, x, y); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), 0.F) ); - } -} - -BOOST_AUTO_TEST_CASE(blas_dot_no_context) { - multi::array const x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - auto res = +blas::dot(x, y); - BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), 0.F) ); -} - - -BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param) { - multi::array const x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - float res = NAN; - blas::dot(x, y, res); - BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), 0.F) ); -} - -BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex) { // if you get a segfaut here, your system may require -DRETURN_BY_STACK - using complex = std::complex; - multi::array const x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - complex res; - blas::dot(x, y, res); - BOOST_REQUIRE_EQUAL( real(res) , real(std::inner_product(begin(x), end(x), begin(y), complex{0.}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return alpha*std::conj(omega);})) ); - BOOST_REQUIRE_EQUAL( imag(res) , imag(std::inner_product(begin(x), end(x), begin(y), complex{0.}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return alpha*std::conj(omega);})) ); -} - -BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C) { - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const x = {1., 2. , 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {1., 2. + 2.*I, 3.}; // NOLINT(readability-identifier-length) BLAS naming - complex res; - blas::dot(blas::C(x), y, res); - BOOST_REQUIRE( res == std::inner_product(begin(x), end(x), begin(y), complex{0.}, std::plus<>{}, [](auto const& alpha, auto const& omega) {return conj(alpha)*omega;}) ); -} - -#if defined(CUDA_FOUND) and CUDA_FOUND -#include -BOOST_AUTO_TEST_CASE(blas_dot_no_context_out_param_complex_C_thrust) { - using complex = thrust::complex; complex const I{0., 1.}; - multi::array const A = {1., 2. , 3.}; - multi::array const B = {1., 2. + 2.*I, 3.}; - complex C; - blas::dot(blas::C(A), B, C); - BOOST_REQUIRE( C == std::inner_product(begin(A), end(A), begin(B), complex{0.}, std::plus<>{}, [](auto& a, auto& b){return conj(a)*b;}) ); -} -#endif - -BOOST_AUTO_TEST_CASE(multi_blas_dot_strided) { - multi::array const CA = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - double res = std::numeric_limits::quiet_NaN(); - blas::dot_n(begin(CA[1]), size(CA[1]), begin(CA[2]), &res); - BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.) ); - - double res2 = blas::dot(CA[1], CA[2]); - BOOST_REQUIRE( res == res2 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_dot_strided_context) { - multi::array const CA = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - double res = std::numeric_limits::quiet_NaN(); - blas::context ctxt; - blas::dot_n(&ctxt, begin(CA[1]), size(CA[1]), begin(CA[2]), &res); - BOOST_REQUIRE( res == std::inner_product(begin(CA[1]), begin(CA[2]), end(CA[1]), 0.) ); - - double res2 = blas::dot(CA[1], CA[2]); - BOOST_REQUIRE( res == res2 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_dot_1d_real) { - multi::array x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array y = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - - using blas::dot; - BOOST_REQUIRE( 14. == dot(x, y) ); - BOOST_REQUIRE( dot(x, y) == 14.F ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_real) { - multi::array const cA = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - { - double res = blas::dot(cA[1], cA[2]); - BOOST_REQUIRE( res == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) ); - } - { - double res = NAN; - blas::dot(cA[1], cA[2], res); - BOOST_REQUIRE( res == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) ); - } - { - double res = NAN; - auto res2 = blas::dot(cA[1], cA[2], res); - BOOST_REQUIRE( res == res2 ); - } - { - double res = blas::dot(cA[1], cA[2]); - BOOST_REQUIRE( res == std::inner_product(begin(cA[1]), begin(cA[2]), end(cA[1]), 0.) ); - BOOST_REQUIRE( blas::dot(cA[1], cA[2]) == blas::dot(cA[2], cA[1]) ); - } -} - -BOOST_AUTO_TEST_CASE(inq_case) { - multi::array x(multi::extensions_t<1>{multi::iextension{10}}, +1.0); // NOLINT(readability-identifier-length) BLAS naming - multi::array y(multi::extensions_t<1>{multi::iextension{10}}, -1.0); // NOLINT(readability-identifier-length) BLAS naming - - using blas::dot; - using blas::hermitized; - using blas::conj; - - auto res = dot(x, y); - auto res2 = dot(hermitized(x), y); - - BOOST_REQUIRE(res == res2); - - auto res3 = dot(blas::conj(x), y); // conjugation doesn't do anything for real array - BOOST_REQUIRE(res3 == res); - - auto d_arr = dot(blas::C(x), y); - BOOST_REQUIRE(d_arr == res); - - static_assert( not std::is_same{}, "!" ); - - using blas::C; - double d_doub = dot(C(x), y); - - BOOST_REQUIRE( d_doub == d_arr ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex) { - namespace blas = multi::blas; - - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - {1. + I, 2. + 3.*I, 3.+2.*I, 4.-9.*I}, - {5. + 2.*I, 6. + 6.*I, 7.+2.*I, 8.-3.*I}, - {9. + 1.*I, 10. + 9.*I, 11.+1.*I, 12.+2.*I} - }; - { - complex c; blas::dot(A[1], A[2], c); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) ); - } - { - complex c = blas::dot(A[1], A[2]); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) ); - } - { - complex c = blas::dot(A[1], blas::C(A[2])); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto alpha, auto omega) {return alpha*conj(omega);}) ); - } - { - complex c = blas::dot(blas::C(A[1]), A[2]); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{}, std::plus<>{}, [](auto alpha, auto omega) {return conj(alpha)*omega;}) ); - } - { - complex c = blas::dot(blas::conj(A[1]), A[2]); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{}, std::plus<>{}, [](auto alpha, auto omega) {return conj(alpha)*omega;}) ); - } - { - complex c = blas::dot(blas::C(A[1]), A[2]); // NOLINT(readability-identifier-length) BLAS naming - BOOST_TEST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto alpha, auto omega) {return conj(alpha)*omega;}) ); - } -} - -#include "config.hpp" // cuda found -#if defined(CUDA_FOUND) and CUDA_FOUND -//#include - -BOOST_AUTO_TEST_CASE(multi_blas_dot_impl_complex_thrust) { - namespace blas = multi::blas; - - using complex = std::complex; complex const I{0, 1}; - multi::array const A = { - {1. + I, 2. + 3.*I, 3.+2.*I, 4.-9.*I}, - {5. + 2.*I, 6. + 6.*I, 7.+2.*I, 8.-3.*I}, - {9. + 1.*I, 10. + 9.*I, 11.+1.*I, 12.+2.*I} - }; - { - complex c; - blas::core::dotu(size(A[1]), A[1].base(), A[1].stride(), A[2].base(), A[2].stride(), &c); - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( c.real() == inner.real() ); - BOOST_REQUIRE( c.imag() == inner.imag() ); - } - { - complex c; - blas::context::dotu(size(A[1]), A[1].base(), A[1].stride(), A[2].base(), A[2].stride(), &c); - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( c.real() == inner.real() ); - BOOST_REQUIRE( c.imag() == inner.imag() ); - } - { - complex c; - blas::dot_n(begin(A[1]), size(A[1]), begin(A[2]), &c); - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( c == inner ); - } - { - complex c; - blas::dot(A[1], A[2], c); - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( c == inner ); - } - { - complex c = blas::dot(A[1], A[2]); - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( c == inner ); - } - { - auto inner = std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0.}); - BOOST_REQUIRE( +blas::dot(A[1], A[2]) == inner ); - } - { - complex c; blas::dot(A[1], A[2], c); - BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) ); - } - { - complex c = blas::dot(A[1], A[2]); - BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}) ); - } - { - complex c = blas::dot(A[1], blas::C(A[2])); - BOOST_REQUIRE( c == std::inner_product(begin(A[1]), end(A[1]), begin(A[2]), complex{0}, std::plus<>{}, [](auto a, auto b){return a*conj(b);}) ); - } -} -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemm.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemm.cpp deleted file mode 100644 index f06999cf1d..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemm.cpp +++ /dev/null @@ -1,1907 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS gemm" -#include - -#include "../../../adaptors/blas/gemm.hpp" -#include "../../../array.hpp" - -#include - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_T_sub) { - multi::array A({100, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 4, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A({0, 100}, {1, 2}), blas::T(B)({0, 1}, {0, 1}), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_sub) { - multi::array A({100, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 4, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({0, 1}, {0, 1}), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_sub_6) { - multi::array A({100, 4}, 2.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 4, 4}, 3.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({0, 1}, {0, 1}), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 6.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H_copy) { - multi::array A({100, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 4, 4}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - auto C = +blas::gemm(1., A({0, 100}, {1, 2}), blas::H(B)({2, 3}, {2, 3})); // c=ab, c⸆=b⸆a⸆ // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1) { - using complex = std::complex; - multi::array A({100, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, B, 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1_T) { - using complex = std::complex; - multi::array A({100, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, blas::T(B), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_complex_100x1_1x1_H) { - using complex = std::complex; // complex const I{0, 1}; - multi::array A({100, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, blas::H(B), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1) { - using complex = std::complex; - multi::array A({100, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, B, 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_T) { - multi::array A({100, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, blas::T(B), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.); -} - -BOOST_AUTO_TEST_CASE(adaptor_blas_double_100x1_1x1_H) { - multi::array A({100, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming - multi::array B({ 1, 1}, 1.0); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({100, 1}, 0.0); // NOLINT(readability-identifier-length) BLAS naming - - blas::gemm(1., A, blas::H(B), 0., C); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(C[99][0] == 1.0); -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemm_square_real) { - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 1.0, 3.0, 4.0}, - { 9.0, 7.0, 1.0}, - { 1.0, 2.0, 3.0} - }; - multi::array const b = { // NOLINT(readability-identifier-length) BLAS naming - {11.0, 12.0, 4.0}, - { 7.0, 19.0, 1.0}, - {11.0, 12.0, 4.0} - }; - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, b, 0., c); - BOOST_REQUIRE( c[2][1] == 86.0 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - BOOST_REQUIRE( size( a) == size( c) ); - BOOST_REQUIRE( size(~b) == size(~c) ); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 86.0 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, blas::T(b), 0., c); - BOOST_REQUIRE( c[2][1] == 48. ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., a.begin(), a.size(), blas::T(b).begin(), 0., c.begin()); - BOOST_REQUIRE( c[2][1] == 48. ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., blas::T(a), b, 0., c); - BOOST_REQUIRE( c[2][1] == 103. ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 103. ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., blas::T(a), blas::T(b), 0., c); - BOOST_REQUIRE( c[2][1] == 50. ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 50 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, blas::T(b), 0., c); - BOOST_REQUIRE( c[2][1] == 48 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(a), size(a), begin(blas::T(b)), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 48 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., blas::T(a), b, 0., c); - BOOST_REQUIRE( c[2][1] == 103 ); - } - { - multi::array c({size(a), size(~b)}, 9999); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 103 ); - } - { - multi::array c({size(a), size(rotated(b))}, 9999); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(2., blas::H(a), blas::H(b), 0., c); - BOOST_REQUIRE( c[2][1] == 100 ); - } - { - multi::array c = blas::gemm(2., blas::H(a), blas::H(b)); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( c[2][1] == 100 ); - } - { - multi::array const c = blas::gemm(2., blas::H(a), blas::H(b)); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( c[2][1] == 100 ); - } - { - multi::array c({size(a), size(rotated(b))}, 9999); // NOLINT(readability-identifier-length) BLAS naming - c = blas::gemm(2., blas::H(a), blas::H(b)); - BOOST_REQUIRE( c[2][1] == 100 ); - } - { - multi::array c; // NOLINT(readability-identifier-length) BLAS naming - c = blas::gemm(2., blas::H(a), blas::H(b)); - BOOST_REQUIRE( c[2][1] == 100 ); - } - { - multi::array c({size(a), size(rotated(b))}, 9999); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm_n(2., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); - BOOST_REQUIRE( c[2][1] == 100 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_square) { - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 1, 3}, - { 9, 7}, - }; - multi::array const b = { // NOLINT(readability-identifier-length) BLAS naming - { 11, 12}, - { 7, 19}, - }; - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 148 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::context ctxt; - blas::gemm_n(ctxt, 1., begin(a), size(a), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[1][0] == 148 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., ~a, b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE(( c[1][1] == 169 and c[1][0] == 82 )); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::context ctxt; - blas::gemm_n(ctxt, 1., begin(~a), size(~a), begin(b), 0., begin( c)); - BOOST_REQUIRE(( c[1][1] == 169 and c[1][0] == 82 )); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::context ctxt; - blas::gemm_n(ctxt, 1., begin(~a), size(~a), begin(b), 0., begin(~c)); - BOOST_REQUIRE( (~c)[1][1] == 169 ); - BOOST_REQUIRE( (~c)[1][0] == 82 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, ~b, 0., c); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == 183 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::context ctxt; - blas::gemm_n(ctxt, 1., begin(a), size(a), begin(~b), 0., begin(c)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == 183 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, ~b, 0., ~c); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( (~c)[1][0] == 183 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(a), size(a), begin(~b), 0., begin(~c)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( (~c)[1][0] == 183 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., ~a, ~b, 0., c); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == 117 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(~a), size(~a), begin(~b), 0., begin(c)); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == 117 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., ~a, ~b, 0., ~c); // c⸆=a⸆b⸆, c=ba - BOOST_REQUIRE( c[0][1] == 117 ); - } - { - multi::array c({2, 2}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(~a), size(~a), begin(~b), 0., begin(~c)); // c⸆=a⸆b⸆, c=ba - BOOST_REQUIRE( c[0][1] == 117 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare) { - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 1, 3, 1}, - { 9, 7, 1}, - }; - multi::array const b = { // NOLINT(readability-identifier-length) BLAS naming - { 11, 12, 1}, - { 7, 19, 1}, - { 1, 1, 1} - }; - { - multi::array c({2, 3}); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 17 ); - } - { - multi::array c({2, 3}); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 17 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_automatic) { - namespace blas = multi::blas; - multi::array const a = { // NOLINT(readability-identifier-length) conventional BLAS naming - { 1., 3., 1.}, - { 9., 7., 1.}, - }; - multi::array const b = { // NOLINT(readability-identifier-length) conventional BLAS naming - { 11., 12., 4., 8.}, - { 7., 19., 2., 7.}, - { 5., 3., 3., 1.} - }; - { - multi::array c({size(a), size(~b)}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53 ); - } - { - multi::array c({size(a), size(~b)}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53 ); - } - { - multi::array c({2, 4}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(0.1, a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } - { - multi::array c({2, 4}); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(0.1, begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } - { - auto c =+ blas::gemm(0.1, a, b); // c=ab, c⸆=b⸆a⸆ // NOLINT(readability-identifier-length) conventional BLAS naming - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } -#if((not defined(__CUDACC_VER_MAJOR__)) or ((__CUDACC_VER_MAJOR__ != 11) or (__CUDACC_VER_MINOR__ != 3))) // bug in nvcc 11.3 -// { -// multi::array c = blas::gemm(0.1, a, b); -// BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); -// } -#endif -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemm_nh) { - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imaginary unit - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - {1.-2.*I, 9.-1.*I}, - {2.+3.*I, 1.-2.*I} - }; - { - auto c =+ blas::gemm(1., a, blas::H(a)); // c=aa†, c†=aa† // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -#if((not defined(__CUDACC_VER_MAJOR__)) or ((__CUDACC_VER_MAJOR__ != 11) or (__CUDACC_VER_MINOR__ != 3))) // bug in nvcc 11.3 - { -// multi::array c = blas::gemm(1., a, blas::H(a)); // c=aa†, c†=aa† -// BOOST_REQUIRE( c[1][0] == 7.-10.*I ); -// BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -#endif - { - multi::array c = blas::gemm(1., a, blas::H(a)); // c=aa†, c†=aa† // NOLINT(readability-identifier-length) conventional BLAS naming - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional BLAS naming - c = blas::gemm(1., a, blas::H(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional BLAS naming - c() = blas::gemm(1., a, blas::H(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm(1., a, blas::H(a), 0., c); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional BLAS naming - blas::gemm_n(1., begin(a), size(a), begin(blas::H(a)), 0., begin(c)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -} - -#if defined(CUDA_FOUND) -#include -BOOST_AUTO_TEST_CASE(multi_blas_gemm_nh_thrust) { - using complex = thrust::complex; complex const I{0, 1}; - multi::array const a = { - {1.-2.*I, 9.-1.*I}, - {2.+3.*I, 1.-2.*I} - }; - { - auto c =+ blas::gemm(1., a, blas::hermitized(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -#if((not defined(__CUDACC_VER_MAJOR__)) or ((__CUDACC_VER_MAJOR__ != 11) or (__CUDACC_VER_MINOR__ != 3))) // bug in nvcc 11.3 - { - multi::array c = blas::gemm(1., a, blas::hermitized(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -#endif - { - multi::array c = blas::gemm(1., a, blas::hermitized(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}); - c = blas::gemm(1., a, blas::hermitized(a)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, blas::hermitized(a), 0., c); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(a)), 0., begin(c)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[1][0] == 7.-10.*I ); - BOOST_REQUIRE( c[0][1] == 7.+10.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemm_elongated) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1.-2.*I, 9.-1.*I} - }; - { - multi::array c({1, 1}); - blas::gemm(1., a, blas::H(a), 0., c); // c=aa†, c†=aa† - BOOST_REQUIRE( c[0][0] == 87. + 0.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(a)), 0., begin(c)); // c=aa†, c†=aa† - BOOST_REQUIRE( c[0][0] == 87. + 0.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1_bisbis) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I}, - {9. - 1.*I}, - {1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I, 7. - 3.*I, 8. - 1.*I} - }; - { - multi::array c({1, 1}); - - BOOST_REQUIRE( size(blas::H(a)) == 1 ); - BOOST_REQUIRE( size(blas::H(b)[0]) == 1 ); - - blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 84.+7.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 84.+7.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_empty) { - multi::array const a({0, 5}); - BOOST_REQUIRE( size( a) == 0 ); - BOOST_REQUIRE( size(~a) == 5 ); - BOOST_REQUIRE( a.is_empty() ); - - multi::array const b({5, 0}); - BOOST_REQUIRE( size( b) == 0 ); - BOOST_REQUIRE( size(~b) == 0 ); - BOOST_REQUIRE( b.is_empty() ); - { - multi::array c; - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - } - { - multi::array c; - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare2) { - multi::array const a = { - { 1, 3}, - { 9, 7}, - { 1, 1} - }; - multi::array const b = { - { 11, 12}, - { 7, 19} - }; - { - multi::array c({size(a), size(~b)}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[2][1] == 31 ); - } - { - multi::array c({size(a), size(~b)}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[2][1] == 31 ); - } - { - multi::array c({size(~b), size(a)}); - blas::gemm(1., a, b, 0., ~c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 31 ); - } - { - multi::array c({size(~b), size(a)}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(~c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 31 ); - } - { - auto ar = +~a; - multi::array c({3, 2}); - blas::gemm(1., ~ar, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[2][1] == 31 ); - } - { - auto ar = +~a; - multi::array c({3, 2}); - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[2][1] == 31 ); - } - { - auto ar = +~a; - multi::array c({2, 3}); - blas::gemm(1., ~ar, b, 0., ~c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 31 ); - } - { - auto ar = +~a; - multi::array c({2, 3}); - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(~c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 31 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x2_2x2) { - multi::array const a = { - { 1, 3}, - { 9, 4}, - }; - multi::array const b = { - { 11, 12}, - { 7, 19}, - }; - { - multi::array c({2, 2}); - blas::gemm(1., ~a, b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 61 ); - - blas::gemm(1., ~a, b, 0., ~c); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[0][1] == 61 ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin( c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 61 ); - - blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin(~c)); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[0][1] == 61 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x2) { - multi::array const a = { - { 1, 3}, - { 9, 4}, - { 1, 5} - }; - multi::array const b = { - { 11, 12}, - { 7, 19}, - { 8, 1 } - }; - { - multi::array c({2, 2}); - blas::gemm(1., ~a, b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 101 ); - - blas::gemm(1., ~a, b, 0., ~c); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[0][1] == 101 ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin( c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 101 ); - - blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin(~c)); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[0][1] == 101 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_3x2) { - multi::array const a = { - {1, 9, 1} - }; - BOOST_REQUIRE( stride(~a) == 1 ); - BOOST_REQUIRE( stride( a) == 3 ); - multi::array const b = { - { 11, 12}, - { 7, 19}, - { 8, 1 } - }; - { - multi::array c({size(a), size(~b)}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 184 ); - } - { - multi::array c({size(a), size(~b)}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 184 ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar)}); - blas::gemm(1., ~ar, b, 0., ~c); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[1][0] == 184 ); - } - { - auto ar = +~a; - BOOST_REQUIRE( size(~ar) == 1 ); - BOOST_REQUIRE( begin(~ar).stride() == 1 ); - BOOST_REQUIRE( begin(~ar)->stride() == 1 ); - BOOST_REQUIRE( begin( ar)->stride() == 1 ); - - multi::array c({size(~b), size(~ar)}); - BOOST_REQUIRE( begin( c).stride() == 1 ); - BOOST_REQUIRE( begin(~c).stride() == 1 ); - BOOST_REQUIRE( begin(c)->stride() == 1 ); - - BOOST_REQUIRE( begin(b) ); - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(~c)); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[1][0] == 184 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complexreal_1x3_3x2) { - using complex = std::complex; - multi::array const a = { - {1, 9, 1} - }; - BOOST_REQUIRE( stride(~a) == 1 ); - BOOST_REQUIRE( stride( a) == 3 ); - multi::array const b = { - { 11, 12}, - { 7, 19}, - { 8, 1 } - }; - { - multi::array c({size(a), size(~b)}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 184. ); - } - { - multi::array c({size(a), size(~b)}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 184. ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar)}); - blas::gemm(1., ~ar, b, 0., ~c); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[1][0] == 184. ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar)}); - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(~c)); // c⸆=a⸆b, c=b⸆a - BOOST_REQUIRE( c[1][0] == 184. ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_part_3x2) { - multi::array const a = { - {1, 9, 1}, - {3, 3, 3} - }; - BOOST_REQUIRE( stride(~a) == 1 ); - BOOST_REQUIRE( stride( a) == 3 ); - multi::array const b = { - { 11, 12}, - { 7, 19}, - { 8, 1 } - }; - { - multi::array c({size(a({0, 1})), size(~b)}); - blas::gemm(1., a({0, 1}), b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[0][1] == 184 ); - } - { - multi::array c({size(a({0, 1})), size(~b)}); - blas::gemm_n(1., begin(a({0, 1})), size(a({0, 1})), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[0][1] == 184 ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm(1., ~(ar(extension(ar), {0, 1})), b, 0., ~c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 184 ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm_n(1., begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0., begin(~c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[1][0] == 184 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complexreal_1x3_part_3x2) { - using complex = std::complex; - multi::array const a = { - {1., 9., 1.}, - {3., 3., 3.} - }; - BOOST_REQUIRE( stride(~a) == 1 ); - BOOST_REQUIRE( stride( a) == 3 ); - multi::array const b = { - { 11., 12.}, - { 7., 19.}, - { 8., 1.} - }; - { - multi::array c({size(a({0, 1})), size(~b)}); - blas::gemm(1., a({0, 1}), b, 0., c); - BOOST_REQUIRE( c[0][1] == 184. ); - } - { - multi::array c({size(a({0, 1})), size(~b)}); - blas::gemm_n(1., begin(a({0, 1})), size(a({0, 1})), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[0][1] == 184. ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm(1., ~(ar(extension(ar), {0, 1})), b, 0., ~c); - BOOST_REQUIRE( c[1][0] == 184. ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm_n(1., begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0., begin(~c)); - BOOST_REQUIRE( c[1][0] == 184. ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x1) { - multi::array const a = { - {1, 9, 1}, - {3, 3, 3} - }; - BOOST_REQUIRE( stride(~a) == 1 ); - BOOST_REQUIRE( stride( a) == 3 ); - multi::array const b = { - { 11}, - { 7}, - { 8} - }; - { - multi::array c({size(a), size(~b)}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 82 ); - BOOST_REQUIRE( c[1][0] == 78 ); - } - { - multi::array c({size(a), size(~b)}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[0][0] == 82 ); - BOOST_REQUIRE( c[1][0] == 78 ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm(1., ~(ar(extension(ar), {0, 1})), b, 0., ~c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - auto ar = +~a; - multi::array c({size(~b), size(~ar(extension(ar), {0, 1}))}); - blas::gemm_n(1., begin(~(ar(extension(ar), {0, 1}))), size(~(ar(extension(ar), {0, 1}))), begin(b), 0., begin(~c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE( c[0][0] == 82 ); - } -} - - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_2x3_3x1_bis) { - multi::array const a = { - {1, 9, 1}, - {3, 4, 5} - }; - multi::array const b = { - { 11}, - { 7}, - { 8} - }; - - { - multi::array c({1, 2}); - blas::gemm(1., a, b, 0., ~c); // c⸆=ab, c=b⸆a⸆ - BOOST_REQUIRE( (~c)[0][0] == 82 ); - BOOST_REQUIRE( (~c)[1][0] == 101 ); - } - { - multi::array c({1, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(~c)); // c⸆=ab, c=b⸆a⸆ - BOOST_REQUIRE( (~c)[0][0] == 82 ); - BOOST_REQUIRE( (~c)[1][0] == 101 ); - } - { - multi::array c({2, 1}); - blas::gemm(1., a, b, 0., c); // c⸆=ab, c=b⸆a⸆ - BOOST_REQUIRE( (~c)[0][1] == 101 ); - BOOST_REQUIRE( c[1][0] == 101 ); - } - { - multi::array c({2, 1}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c⸆=ab, c=b⸆a⸆ - BOOST_REQUIRE( (~c)[0][1] == 101 ); - BOOST_REQUIRE( c[1][0] == 101 ); - } - { - multi::array c({1, 2}); - auto ar = +~a; - blas::gemm(1., ~ar, b, 0., ~c); // c⸆=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 101 ); - } - { - multi::array c({1, 2}); - auto ar = +~a; - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(~c)); // c⸆=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 101 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_1x3_3x1) { - multi::array const a = { - {1, 9, 1} - }; - multi::array const b = { - { 11}, - { 7}, - { 8} - }; - { - multi::array c({1, 1}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - auto ar = +~a; - blas::gemm(1., ~ar, b, 0., c); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - auto ar =+ ~a; - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - auto br =+ ~b; - blas::gemm(1., a, ~br, 0., c); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - BOOST_REQUIRE( begin(c). stride() == 1 ); - BOOST_REQUIRE( begin(c)->stride() == 1 ); - - auto br =+ ~b; - // BOOST_REQUIRE( begin(br). stride() == 1 ); - BOOST_REQUIRE( begin( br)->stride() == 1 ); - - BOOST_REQUIRE(begin(a)->stride() == 1); - BOOST_REQUIRE( begin(~br). stride() == 1 ); - // BOOST_REQUIRE( begin(~br)->stride() == 1 ); - BOOST_REQUIRE(begin(c)->stride() == 1); - BOOST_REQUIRE(begin(c).stride() == 1); - BOOST_REQUIRE(size(a) == 1); - - blas::gemm_n(1., begin(a), size(a), begin(~br), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::gemm(1., a, blas::H(br), 0., c); - BOOST_REQUIRE( c[0][0] == 82 ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::gemm_n(1., begin(a), size(a), begin(blas::H(br)), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 82 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square) { - using complex = std::complex; constexpr complex I{0, 1}; - multi::array const a = { - { 1.+3.*I, 3.+2.*I}, - { 9.+1.*I, 7.+1.*I}, - }; - multi::array const b = { - {11.+2.*I, 12.+4.*I}, - { 7.+1.*I, 19.-9.*I}, - }; - { - multi::array c({2, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., ~a, b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE(( c[1][1] == 170.-8.*I and c[1][0] == 77.+42.*I )); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(~a), size(~a), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE(( c[1][1] == 170.-8.*I and c[1][0] == 77.+42.*I )); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, ~b, 0., c); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == 177.+69.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(~b), 0., begin(c)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == 177.+69.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == 109. + 68.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == 109. + 68.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::T(b), 0., blas::T(c)); // c⸆=a⸆b⸆, c=ba - BOOST_REQUIRE( c[0][1] == 109.+68.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(blas::T(c))); // c⸆=a⸆b⸆, c=ba - BOOST_REQUIRE( c[0][1] == 109.+68.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_1x3_3x1) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I, 9. - 1.*I, 1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I}, - { 7. - 3.*I}, - { 8. - 1.*I} - }; - { - multi::array c({1, 1}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - auto ar = +~a; - blas::gemm(1., ~ar, b, 0., c); // c=ab, c⸆=ba - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - auto ar = +~a; - blas::gemm_n(1., begin(~ar), size(~ar), begin(b), 0., begin(c)); // c=ab, c⸆=ba - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::gemm(1., a, ~br, 0., c); - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::context ctxt; - blas::gemm_n(ctxt, 1., begin(a), size(a), begin(~br), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 84.-7.*I ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::gemm(1., a, blas::H(br), 0., ~c); - BOOST_REQUIRE( c[0][0] == 80. + 53.*I ); - } - { - multi::array c({1, 1}); - auto br = +~b; - blas::gemm_n(1., begin(a), size(a), begin(blas::H(br)), 0., begin(~c)); - BOOST_REQUIRE( c[0][0] == 80. + 53.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_hermitized_square) { - using complex = std::complex; constexpr complex I{0, 1}; - multi::array const a = { - { 1.+3.*I, 3.+2.*I}, - { 9.+1.*I, 7.+1.*I}, - }; - multi::array const b = { - {11.+2.*I, 12.+4.*I}, - { 7.+1.*I, 19.-9.*I}, - }; - { - multi::array c({2, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c†=b†a† - BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c†=b†a† - BOOST_REQUIRE( c[1][0] == 145. + 43.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == 109. - 68.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == 109. - 68.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), blas::H(b), 0., blas::H(c)); // c†=a†b†, c=ba - BOOST_REQUIRE( c[1][0] == 184. - 40.*I ); - } -// { -// multi::array c({2, 2}); -// blas::context ctxt; -// blas::gemm_n(ctxt, 1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(blas::H(c))); // c†=a†b†, c=ba -// BOOST_REQUIRE( c[1][0] == 184. - 40.*I ); -// } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a - BOOST_REQUIRE( c[1][0] == 87. - 16.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a - BOOST_REQUIRE( c[1][0] == 87. - 16.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, blas::H(b), 0., c); // c=ab†, c†=ba† - BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } - { - multi::array c({2, 2}); - c = blas::gemm(1., a, blas::H(b)); // c=ab†, c†=ba† - BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } - { - multi::array c = blas::gemm(1., a, blas::H(b)); // c=ab†, c†=ba† - BOOST_REQUIRE( size(c) == 2 ); - BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } -#if((not defined(__CUDACC_VER_MAJOR__)) or ((__CUDACC_VER_MAJOR__ != 11) or (__CUDACC_VER_MINOR__ != 3))) // bug in nvcc 11.3 - { -// multi::array c = blas::gemm(1., a, blas::H(b)); // CTAD -// BOOST_REQUIRE( c.size() == 2 ); -// BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } -#endif - { - auto c = multi::array(blas::gemm(1., a, blas::H(b))); // c=ab†, c†=ba† - BOOST_REQUIRE( size(c) == 2 ); - BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab†, c†=ba† - BOOST_REQUIRE( c[1][0] == 189. - 23.*I ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == 109. - 68.*I); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == 109. - 68.*I); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I}, - {9. - 1.*I}, - {1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I}, - { 7. - 3.*I}, - { 8. - 1.*I} - }; - { - multi::array c({1, 1}); - blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - } - { - multi::array c({1, 1}); - blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - } - { - multi::array c({1, 1}); - auto ha = +blas::hermitized(a); - blas::gemm(1., ha, b, 0., c); - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - - blas::gemm(1., blas::H(b), a, 0., c); - BOOST_REQUIRE( c[0][0] == 80.+53.*I ); - } - { - multi::array c({1, 1}); - auto ha = +blas::hermitized(a); - blas::gemm_n(1., begin(ha), size(ha), begin(b), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 80.-53.*I ); - - blas::gemm_n(1., begin(blas::H(b)), size(blas::H(b)), begin(a), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 80.+53.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_1x3_3x2) { - using complex = std::complex; constexpr complex I{0, 1}; - multi::array const a = { - {1. + 2.*I, 9. - 1.*I, 1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I, 5. + 2.*I}, - { 7. - 3.*I, 2. + 1.*I}, - { 8. - 1.*I, 1. + 1.*I} - }; - { - multi::array c({1, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 20.+21.*I ); - } - { - multi::array c({1, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 20.+21.*I ); - } - { - auto ar = +~a; - multi::array c({1, 2}); - blas::gemm(1., blas::H(ar), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 28.+3.*I ); - } - { - auto ar = +~a; - multi::array c({1, 2}); - blas::gemm_n(1., begin(blas::H(ar)), size(blas::H(ar)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 28.+3.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x2) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I}, - {9. - 1.*I}, - {1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I, 5. + 2.*I}, - { 7. - 3.*I, 2. + 1.*I}, - { 8. - 1.*I, 1. + 1.*I} - }; - { - multi::array c({1, 2}); - blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 28.+3.*I ); - } - { - multi::array c({1, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][1] == 28.+3.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x2_3x2) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I, 5. + 2.*I}, - {9. - 1.*I, 9. + 1.*I}, - {1. + 1.*I, 2. + 2.*I} - }; - multi::array const b = { - { 11. - 2.*I, 5. + 2.*I}, - { 7. - 3.*I, 2. + 1.*I}, - { 8. - 1.*I, 1. + 1.*I} - }; - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 125.-84.*I ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 125.-84.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x2_3x1) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I, 5. + 2.*I}, - {9. - 1.*I, 9. + 1.*I}, - {1. + 1.*I, 2. + 2.*I} - }; - multi::array const b = { - { 11. - 2.*I}, - { 7. - 3.*I}, - { 8. - 1.*I} - }; - { - multi::array c({2, 1}); - blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 125.-84.*I ); - } - { - multi::array c({2, 1}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 125.-84.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_3x1_3x1_bis) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - {1. + 2.*I}, - {9. - 1.*I}, - {1. + 1.*I} - }; - multi::array const b = { - { 11. - 2.*I}, - { 7. - 3.*I}, - { 8. - 1.*I} - }; - { - multi::array c({1, 1}); - blas::gemm(1., blas::H(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 80. - 53.*I ); - } - { - multi::array c({1, 1}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[0][0] == 80. - 53.*I ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_square_automatic) { - multi::array const a = { - { 1., 3.}, - { 9., 7.}, - }; - multi::array const b = { - { 11., 12.}, - { 7., 19.}, - }; - { - multi::array c({2, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 148 and c[1][1] == 241 ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == 148 and c[1][1] == 241 ); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][1] == 196. ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][1] == 169. ); - BOOST_REQUIRE( c[1][0] == 82. ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][1] == 154. ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1. + 2.*I, 3. - 3.*I}, - { 9. + 1.*I, 7. + 4.*I}, - }; - multi::array const b = { - { 11. + 1.*I, 12. + 1.*I}, - { 7. + 8.*I, 19. - 2.*I}, - }; - namespace blas = multi::blas; - { - multi::array c({2, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == complex(115, 104) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == complex(115, 104) ); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, blas::T(b), 0., c); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == complex(178, 75) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(blas::T(b)), 0., begin(c)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][0] == complex(178, 75) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic_part2) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1. + 2.*I, 3. - 3.*I}, - { 9. + 1.*I, 7. + 4.*I}, - }; - multi::array const b = { - { 11. + 1.*I, 12. + 1.*I}, - { 7. + 8.*I, 19. - 2.*I}, - }; - namespace blas = multi::blas; - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), b, 0., c); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE(( c[1][1] == complex(180, 29) and c[1][0] == complex(53, 54) )); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(b), 0., begin(c)); // c=a⸆b, c⸆=b⸆a - BOOST_REQUIRE(( c[1][1] == complex(180, 29) and c[1][0] == complex(53, 54) )); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(( c[1][1] == complex(186, 65) and c[1][0] == complex(116, 25) )); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE(( c[1][1] == complex(186, 65) and c[1][0] == complex(116, 25) )); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == complex(115, 104) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][0] == complex(115, 104) ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), b, 0., c); // c=a†b, c†=b†a - BOOST_REQUIRE( c[1][0] == complex(111, 64) and c[1][1] == complex(158, -51) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_square_automatic_part3) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1. + 2.*I, 3. - 3.*I}, - { 9. + 1.*I, 7. + 4.*I}, - }; - multi::array const b = { - { 11. + 1.*I, 12. + 1.*I}, - { 7. + 8.*I, 19. - 2.*I}, - }; - namespace blas = multi::blas; - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(b), 0., begin(c)); // c=a†b, c†=b†a - BOOST_REQUIRE( c[1][0] == complex(111, 64) and c[1][1] == complex(158, -51) ); - } - { - multi::array c({2, 2}); - blas::gemm(1., a, blas::H(b), 0., c); // c=ab†, c†=ba† - BOOST_REQUIRE( c[1][0] == complex(188, 43) and c[1][1] == complex(196, 25) ); - auto c2 =+ blas::gemm(1., a, blas::H(b)); - BOOST_REQUIRE( c2 == c ); - } - { - multi::array c({2, 2}); - blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab†, c†=ba† - auto const c2 =+ blas::gemm(0.1, a, blas::H(b)); - BOOST_REQUIRE( c2 == c ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::H(a), blas::H(b), 0., c); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == complex(116, -25) and c[1][1] == complex(186, -65) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::H(a)), size(blas::H(a)), begin(blas::H(b)), 0., begin(c)); // c=a†b†, c†=ba - BOOST_REQUIRE( c[1][0] == complex(116, -25) and c[1][1] == complex(186, -65) ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::H(b), 0., c); // c=a⸆b†, c†=ba⸆† - BOOST_REQUIRE( c[1][0] == complex(118, 5) and c[1][1] == complex(122, 45) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::H(b)), 0., begin(c)); // c=a⸆b†, c†=ba⸆† - BOOST_REQUIRE( c[1][0] == complex(118, 5) and c[1][1] == complex(122, 45) ); - } - { - multi::array c({2, 2}); - blas::gemm(1., blas::T(a), blas::T(b), 0., c); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == complex(116, 25) and c[1][1] == complex(186, 65) ); - } - { - multi::array c({2, 2}); - blas::gemm_n(1., begin(blas::T(a)), size(blas::T(a)), begin(blas::T(b)), 0., begin(c)); // c=a⸆b⸆, c⸆=ba - BOOST_REQUIRE( c[1][0] == complex(116, 25) and c[1][1] == complex(186, 65) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_nonsquare_automatic) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1. + 2.*I, 3. - 3.*I, 1.-9.*I}, - { 9. + 1.*I, 7. + 4.*I, 1.-8.*I}, - }; - multi::array const b = { - { 11.+1.*I, 12.+1.*I, 4.+1.*I, 8.-2.*I}, - { 7.+8.*I, 19.-2.*I, 2.+1.*I, 7.+1.*I}, - { 5.+1.*I, 3.-1.*I, 3.+8.*I, 1.+1.*I} - }; - { - multi::array c({2, 4}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(112, 12) ); - } - { - multi::array c({2, 4}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(112, 12) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_realcomplex_complex_nonsquare_automatic) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1., 3., 1.}, - { 9., 7., 1.}, - }; - multi::array const b = { - { 11.+1.*I, 12.+1.*I, 4.+1.*I, 8.-2.*I}, - { 7.+8.*I, 19.-2.*I, 2.+1.*I, 7.+1.*I}, - { 5.+1.*I, 3.-1.*I, 3.+8.*I, 1.+1.*I} - }; - { - multi::array c = blas::gemm(1., a, b); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(53, 24) ); - } - { - multi::array c({2, 4}); - c = blas::gemm(1., a, b); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(53, 24) ); - } - { - multi::array c({2, 4}); - blas::gemm(1., a, b, 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(53, 24) ); - } - { - multi::array c({2, 4}); - blas::gemm_n(1., begin(a), size(a), begin(b), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == complex(53, 24) ); - } - { - multi::array const a_real = { - { 1., 3., 1.}, - { 9., 7., 1.}, - }; - multi::array c({2, 4}); - blas::real_doubled(c) = blas::gemm(1., a_real, blas::real_doubled(b)); - - BOOST_REQUIRE( c[1][2] == complex(53, 24) ); - } -} - -BOOST_AUTO_TEST_CASE(submatrix_result_issue_97) { - using complex = std::complex; constexpr complex I{0, 1}; - multi::array M = { - {2. + 3.*I, 2. + 1.*I, 1. + 2.*I}, - {4. + 2.*I, 2. + 4.*I, 3. + 1.*I}, - {7. + 1.*I, 1. + 5.*I, 0. + 3.*I} - }; - auto M2 = +M({0, 3}, {0, 1}); - BOOST_REQUIRE( M2 == M({0, 3}, {0, 1}) ); -} - - -BOOST_AUTO_TEST_CASE(blas_context_gemm) { - using complex = std::complex; static constexpr complex I{0, 1}; - auto rand = [d=std::normal_distribution<>{}, g=std::mt19937{}]()mutable{return d(g) + d(g)*I;}; // NOLINT(cert-msc32-c, cert-msc51-cpp): test purposes - - multi::array A({30, 40}); - multi::array B({40, 50}); - - std::generate(A.elements().begin(), A.elements().end(), rand); - std::generate(B.elements().begin(), B.elements().end(), rand); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_hermitized_second_gemm_range) { - multi::array const a({2, 3}, 0.); - multi::array const b({4, 3}, 0.); - { - multi::array c({2, 4}); - c() = blas::gemm(0.1, a, blas::H(b)); - BOOST_REQUIRE_CLOSE( c[1][2], 0., 0.00001 ); - } - { - multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][2] == 0. ); - } - { - multi::array const a = { - {1, 3, 1}, - {9, 7, 1}, - }; - (void)a; - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_nonsquare_hermitized_second_gemm_range) { - using complex = std::complex; - multi::array const a({2, 3}, 0.); - multi::array const b({4, 3}, 0.); - { - multi::array c({2, 4}, 999.); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); - BOOST_REQUIRE( c[1][2] != 999. ); - } - { - multi::array c = blas::gemm(1., a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE( c[1][2] == 0. ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_real_nonsquare_hermitized_second) { - namespace blas = multi::blas; - multi::array const a = { - {1, 3, 1}, - {9, 7, 1}, - }; - multi::array const b = { - {11, 7, 5}, - {12, 19, 3}, - { 4, 2, 3}, - { 8, 7, 1} - }; - { - multi::array c({2, 4}); - blas::gemm(1., a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53. ); - } - { - multi::array c({2, 4}); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53. ); - } - { - multi::array c({2, 4}); - blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } - { - multi::array c({2, 4}); - blas::gemm_n(0.1, begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } - { - multi::array c({2, 4}); - c() = blas::gemm(0.1, a, blas::H(b)); - } - { - multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE_CLOSE( c[1][2] , 5.3 , 0.00001 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_gemm_complex_real_nonsquare_hermitized_second) { - namespace blas = multi::blas; - using complex = std::complex; - multi::array const a = { - {1., 3., 1.}, - {9., 7., 1.}, - }; - multi::array const b = { - {11., 7., 5.}, - {12., 19., 3.}, - { 4., 2., 3.}, - { 8., 7., 1.} - }; - { - multi::array c({2, 4}); - blas::gemm(1., a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53. ); - } - { - multi::array c({2, 4}); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE( c[1][2] == 53. ); - } - { - multi::array c({2, 4}); - blas::gemm(0.1, a, blas::H(b), 0., c); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( real(c[1][2]) , 5.3 , 0.00001 ); - } - { - multi::array c({2, 4}); - blas::gemm_n(0.1, begin(a), size(a), begin(blas::H(b)), 0., begin(c)); // c=ab, c⸆=b⸆a⸆ - BOOST_REQUIRE_CLOSE( real(c[1][2]) , 5.3 , 0.00001 ); - } - { - multi::array c({2, 4}); - c() = blas::gemm(0.1, a, blas::H(b)); - } - { - multi::array c = blas::gemm(0.1, a, blas::H(b)); // c=ab⸆, c⸆=ba⸆ - BOOST_REQUIRE_CLOSE( real(c[1][2]) , 5.3 , 0.00001 ); - } -} - -BOOST_AUTO_TEST_CASE(blas_gemm_1xn_complex) { - using complex = std::complex; - multi::array const a({1, 100}, 1.); - multi::array const b({1, 100}, 1.); - - multi::array c({1, 1}, 999.); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); - BOOST_REQUIRE( c[0][0] == 100. ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_complex_inq_hydrogen_case) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a({3, 1}, 2. + 1.*I); - multi::array const b({1, 1}, 3. + 4.*I); - - multi::array c({3, 1}, 999.); - blas::gemm_n(1., begin(a), size(a), begin(blas::H(b)), 0., begin(c)); - BOOST_TEST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); - BOOST_TEST_REQUIRE( c[1][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); - BOOST_TEST_REQUIRE( c[2][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_complex_inq_hydrogen_case_no_n_interface) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a({3 , 1}, 2. + 1.*I); - multi::array const b({10, 1}, 3. + 4.*I); - - multi::array c({3, 10}, 999.); - blas::gemm(1., a, blas::H(b), 0., c); - BOOST_TEST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); - BOOST_TEST_REQUIRE( c[1][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); - BOOST_TEST_REQUIRE( c[0][1] == (2. + 1.*I)*std::conj(3. + 4.*I) ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case_complex_value_hermitized) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a({1, 1}, 2. + 1.*I); - multi::array const b({1, 1}, 3. + 4.*I); - - multi::array c({1, 1}, 999.); - c = blas::gemm(1., a, blas::H(b)); - BOOST_REQUIRE( c[0][0] == (2. + 1.*I)*std::conj(3. + 4.*I) ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case_complex_value) { - using complex = std::complex; complex const I{0, 1}; - multi::array const a({1, 1}, 2. + 1.*I); - multi::array const b({1, 1}, 3. + 4.*I); - - multi::array c({1, 1}, 999.); - c = blas::gemm(1., a, b); - BOOST_REQUIRE( c[0][0] == (2. + 1.*I)*(3. + 4.*I) ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_nx1_times_1x1_1x1_complex_inq_hydrogen_case) { - using complex = std::complex; - multi::array const a({1, 1}, 2.); - multi::array const b({1, 1}, 3.); - - multi::array c({1, 1}, 999.); - c = blas::gemm(1., a, b); - BOOST_REQUIRE( c[0][0] == 6. ); -} - -BOOST_AUTO_TEST_CASE(blas_gemm_inq_case) { // https://gitlab.com/correaa/boost-multi/-/issues/97 - using complex = std::complex; complex const I{0, 1}; - multi::array mat({10, 2}, 1.0 + 3.*I); - multi::array vec({10, 1}, -2.0 + 4.*I); - - mat({0, 10}, {1, 2}) = vec; - - namespace blas = multi::blas; - - { - auto olap1 =+ blas::gemm(1., blas::H(mat) , vec); - auto olap2 =+ blas::gemm(1., blas::H(mat({0, 10}, {0, 1})), vec); - - BOOST_REQUIRE( blas::H(mat)[1].size() == (~vec)[0].size() ); - BOOST_REQUIRE( blas::dot(blas::H(mat)[0], (~vec)[0]) == olap1[0][0] ); - BOOST_REQUIRE( std::inner_product(blas::H(mat)[0].begin(), blas::H(mat)[0].end(), (~vec)[0].begin(), complex{0}) == olap1[0][0] ); - - multi::array mat2 = mat({0, 10}, {0, 1}); - auto olap3 =+ blas::gemm(1., blas::H(mat2), vec); - - BOOST_REQUIRE(olap1[0][0] == olap2[0][0]); - BOOST_REQUIRE(olap3[0][0] == olap2[0][0]); - } - { - multi::array mat2 = mat({0, 3}, {0, 1}); - auto olap3 =+ blas::gemm(1., blas::H(mat({0, 3}, {0, 1})), vec); - BOOST_REQUIRE( (+blas::gemm(1., blas::H(mat2), vec))[0][0] == (+blas::gemm(1., blas::H(mat({0, 3}, {0, 1})), vec))[0][0] ); - } -} -#endif - -BOOST_AUTO_TEST_CASE(blas_issue_109_part2) { - multi::array const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array const B({ 2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({2, 4}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., ~C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[0][1] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109) { - multi::array const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array const B({ 2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({4, 2}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[0][1] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109_part2_complex) { - multi::array, 2> const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array, 2> const B({ 2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array, 2> C({2, 4}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., ~C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[0][1] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109_complex) { - multi::array, 2> const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array, 2> const B({ 2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array, 2> C({4, 2}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[0][1] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109_complex_mx2) { - multi::array, 2> const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array, 2> const B({ 2, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array, 2> C({4, 2}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109_complex_mx1) { - multi::array, 2> const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array, 2> const B({ 1, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array, 2> C({4, 1}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} - -BOOST_AUTO_TEST_CASE(blas_issue_109_double_mx1) { - multi::array const A({ 3, 4}, 5.); // NOLINT(readability-identifier-length) BLAS naming - multi::array const B({ 1, 3}, 7.); // NOLINT(readability-identifier-length) BLAS naming - - multi::array C({4, 1}, 999.); // NOLINT(readability-identifier-length) BLAS naming - blas::gemm(1., ~A, ~B, 0., C); - - BOOST_TEST_REQUIRE( C[0][0] == 105. ); - BOOST_TEST_REQUIRE( C[1][0] == 105. ); -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemv.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemv.cpp deleted file mode 100644 index 7cb3bd3a84..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/gemv.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// © Alfredo A. Correa 2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS gemv" -#define BOOST_TEST_DYN_LINK -#include - -#include "config.hpp" - -#include "../../../adaptors/blas/gemv.hpp" -#include "../../../array.hpp" - -#include "../../../utility.hpp" - -#include "../../blas/axpy.hpp" -#include "../../blas/dot.hpp" -#include "../../blas/gemm.hpp" -#include "../../blas/nrm2.hpp" - -#include - -namespace multi = boost::multi; -namespace blas = multi::blas; - -template void what(T&&) = delete; - -template -void MV(M const& a, VI const& x, VO&& y) { // NOLINT(readability-identifier-naming,readability-identifier-length) BLAS naming - std::transform( - begin(a), end(a), begin(y), - [&x](auto&& row){return std::inner_product(begin(row), end(row), begin(x), 0.);} - ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemv) { - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} - }; - multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming - blas::gemv_n(1., begin(a), size(a), begin(x), 0., begin(y)); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.0001 ); - BOOST_REQUIRE_CLOSE( y[2] , +blas::dot(a[2], x) , 0.0001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming - multi::array const aT = ~a; - blas::gemv_n(1., begin(~aT), size(~aT), begin(x), 0., begin(y)); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.0001 ); - BOOST_REQUIRE_CLOSE( y[2] , +blas::dot(a[2], x), 0.0001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming - auto mv = blas::gemv(1., a, x); - copy_n(mv.begin(), mv.size(), y.begin()); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - - multi::array w2(multi::extensions_t<1>{multi::iextension{size(a)}}); - MV(a, x, w2); - BOOST_REQUIRE_CLOSE( w2[0] , y[0], 0.00001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) BLAS naming - y = blas::gemv(1., a, x); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y = blas::gemv(1., a, x); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}, 0.); // NOLINT(readability-identifier-length) BLAS naming - y += blas::gemv(1., a, x); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y = {4., 5., 6.}; // NOLINT(readability-identifier-length) BLAS naming - blas::gemv(1.1, a, x, 1., y); // y = a*M*x + b*y - BOOST_REQUIRE_CLOSE( y[1] , 105.43 , 0.00001 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemv_real) { - namespace blas = multi::blas; - - using std::abs; - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} - }; - multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming - { - multi::array y = {4., 5., 6.}; // NOLINT(readability-identifier-length) BLAS naming - double const alpha = 1.1; - double const beta = 1.2; - blas::gemv(alpha, a, x, beta, y); // y = a*M*x + b*y - - multi::array const y3 = {214.02, 106.43, 188.37}; - BOOST_REQUIRE( abs(y[1] - y3[1]) < 2e-14 ); - } - { - auto Y = +blas::gemv(1., a, x); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE_CLOSE( Y[0] , +blas::dot(a[0], x) , 0.00001 ); - BOOST_REQUIRE_CLOSE( Y[1] , +blas::dot(a[1], x) , 0.00001 ); - BOOST_REQUIRE_CLOSE( Y[2] , +blas::dot(a[2], x) , 0.00001 ); - } - { - multi::array const x = {1., 2., 3.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const y = {4., 5., 6.}; // NOLINT(readability-identifier-length) BLAS naming - multi::array const dot = blas::gemv(1., multi::array({x}), y); - BOOST_REQUIRE( dot[0] == blas::dot(x, y) ); - } - { - using blas::operators::operator%; - using blas::operators::operator-; - using blas::operators::operator^; - BOOST_REQUIRE_SMALL( ((~+~a)%x - a%x)^2 , 1e-13 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemv_real_complex) { - namespace blas = multi::blas; - using complex = std::complex; //#define I *std::complex(0, 1) - using std::abs; - multi::array const M = { // NOLINT(readability-identifier-length) BLAS naming - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} - }; - multi::array const X = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) BLAS naming - { - multi::array Y = {4., 5., 6.}; // NOLINT(readability-identifier-length) BLAS naming - double const alpha = 1.1; - double const beta = 1.2; - blas::gemv(alpha, M, X, beta, Y); // y = a*M*x + b*y - - multi::array const Y3 = {214.02, 106.43, 188.37}; - - using blas::operators::operator-; - double const n2{blas::nrm2(Y - Y3)}; - BOOST_REQUIRE_SMALL( n2 , 1e-13); - } -} - -#if CUDA_FOUND -#include -BOOST_AUTO_TEST_CASE(multi_blas_gemv_real_complex_thrust) { - namespace blas = multi::blas; - using complex = thrust::complex; //#define I *std::complex(0, 1) - using std::abs; - multi::array const M = { - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} - }; - multi::array const X = {1.1, 2.1, 3.1, 4.1}; - { - multi::array Y = {4., 5., 6.}; - double const a = 1.1; - double const b = 1.2; - blas::gemv(a, M, X, b, Y); // y = a*M*x + b*y - - multi::array const Y3 = {214.02, 106.43, 188.37}; - } - { - multi::array Y = {4., 5., 6.}; - blas::gemv(1.1, M, X, 1., Y); // y = a*M*x + b*y - BOOST_REQUIRE( Y[1] == 105.43 ); - } -} -#endif - -BOOST_AUTO_TEST_CASE(multi_blas_gemv_complex) { - namespace blas = multi::blas; - using complex = std::complex; std::complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - using std::abs; - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - {2. + 3.*I, 2. + 1.*I, 1. + 2.*I}, - {4. + 2.*I, 2. + 4.*I, 3. + 1.*I}, - {7. + 1.*I, 1. + 5.*I, 0. + 3.*I} - }; - multi::array const x = {1. + 2.*I, 2. + 1.*I, 9. + 2.*I}; // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE(( +blas::gemv(1., a, x) == multi::array{4. + 31.*I, 25. + 35.*I, -4. + 53.*I} )); - - auto aT = +~a; - BOOST_REQUIRE(( +blas::gemv(1., ~aT, x) == multi::array{4. + 31.*I, 25. + 35.*I, -4. + 53.*I} )); - - BOOST_REQUIRE( +blas::gemv(1., ~a, x) == (multi::array{63. + 38.*I, -1. + 62.*I, -4. + 36.*I}) ); - BOOST_REQUIRE( +blas::gemv(1., ~a, x) == +blas::gemv(1., aT, x) ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemv_temporary) { - using complex = std::complex; - - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - {1., 0., 0.}, - {0., 1., 0.}, - {0., 0., 1.} - }; - - auto const B = [](auto array) { // NOLINT(readability-identifier-length) BLAS naming - auto rand = [gauss = std::normal_distribution<>{}, gen = std::mt19937{1}]() mutable {return complex{gauss(gen), gauss(gen)};}; // NOLINT(cert-msc32-c,cert-msc51-cpp) test purposes - std::generate(array.elements().begin(), array.elements().end(), rand); - return array; - }(multi::array({3, 3})); - - using blas::operators::operator*; - using blas::operators::operator-; - using blas::operators::operator^; - BOOST_REQUIRE( (((A*B)[0] - B[0])^2) == 0. ); - BOOST_REQUIRE( (((A*B)[1] - B[1])^2) == 0. ); - BOOST_REQUIRE( (((A*B)[2] - B[2])^2) == 0. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_gemv_context) { - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} - }; - multi::array const x = {1.1, 2.1, 3.1, 4.1}; // NOLINT(readability-identifier-length) conventional name in BLAS - - blas::context ctxt; - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS - blas::gemv_n(ctxt, 1., begin(a), size(a), begin(x), 0., begin(y)); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.0001 ); - BOOST_REQUIRE_CLOSE( y[2] , +blas::dot(a[2], x) , 0.0001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS - multi::array const aT = ~a; - blas::gemv_n(ctxt, 1., begin(~aT), size(~aT), begin(x), 0., begin(y)); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - BOOST_REQUIRE_CLOSE( y[2] , +blas::dot(a[2], x) , 0.00001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS - auto&& mv = blas::gemv(ctxt, 1., a, x); - copy_n(mv.begin(), mv.size(), y.begin()); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}); // NOLINT(readability-identifier-length) conventional name in BLAS - y = blas::gemv(ctxt, 1., a, x); - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y = blas::gemv(ctxt, 1., a, x); // NOLINT(readability-identifier-length) conventional name in BLAS - BOOST_REQUIRE_CLOSE( y[1] , 91.3 , 0.00001 ); - } - { - multi::array y(multi::extensions_t<1>{multi::iextension{size(a)}}, 0.); // NOLINT(readability-identifier-length) conventional name in BLAS - y += blas::gemv(ctxt, 1., a, x); - BOOST_REQUIRE_CLOSE( y[1] , 91.3, 0.00001 ); - } - { - multi::array y = {4., 5., 6.}; // NOLINT(readability-identifier-length) conventional name in BLAS - y += blas::gemv(ctxt, 1.1, a, x); - BOOST_REQUIRE_CLOSE( y[1] , 105.43, 0.00001 ); - } -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/herk.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/herk.cpp deleted file mode 100644 index ca03999d35..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/herk.cpp +++ /dev/null @@ -1,216 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2021 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS herk" -#include - -#include "../../../adaptors/blas/gemm.hpp" -#include "../../../adaptors/blas/herk.hpp" -#include "../../../adaptors/blas/nrm2.hpp" - -#include "../../../array.hpp" - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_blas_herk) { - namespace blas = multi::blas; - using complex = std::complex; constexpr complex I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array const a = { // NOLINT(readability-identifier-length) conventional name in BLAS - { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, - { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} - }; - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional name in BLAS - blas::herk(a, c); - BOOST_REQUIRE( c[1][0] == complex(50., -49.) ); - BOOST_REQUIRE( c[0][1] == complex(50., +49.) ); - - multi::array const c_copy = blas::herk(1., a); - BOOST_REQUIRE( c == c_copy ); - - BOOST_REQUIRE( +blas::gemm(1., a, blas::H(a)) == blas::herk(a) ); - } -} - -BOOST_AUTO_TEST_CASE(inq_case) { - namespace blas = multi::blas; - multi::array const a = { // NOLINT(readability-identifier-length) conventional name in BLAS - {0., 1., 2.}, - {3., 4., 5.}, - {6., 7., 8.}, - {9., 10., 11.} - }; - BOOST_REQUIRE( (+blas::gemm(1., a, blas::T(a)))[1][2] == 86. ); - { - multi::array c({4, 4}); // NOLINT(readability-identifier-length) conventional name in BLAS - blas::herk(1.0, a, c); - BOOST_REQUIRE( c[1][2] == (+blas::gemm(1., a, blas::T(a)))[1][2] ); - // BOOST_REQUIRE( c[2][1] == (+blas::gemm(1., a, blas::T(a)))[2][1] ); - } - { - multi::array c = blas::herk(1.0, a); // NOLINT(readability-identifier-length) conventional name in BLAS - BOOST_REQUIRE( c == +blas::gemm(1., a, blas::T(a)) ); - BOOST_REQUIRE( blas::herk(a) == +blas::gemm(1., a, blas::T(a)) ); - BOOST_REQUIRE( blas::herk(2.0, a) == +blas::gemm(2.0, a, blas::T(a)) ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk_real) { - namespace blas = multi::blas; - multi::array const a = { // NOLINT(readability-identifier-length) BLAS naming - { 1., 3., 4.}, - { 9., 7., 1.} - }; - { - multi::array c({2, 2}, 9999); // NOLINT(readability-identifier-length) BLAS naming - blas::herk(1., a, c); -// BOOST_REQUIRE( c[1][0] == 34. ); - BOOST_REQUIRE( c[0][1] == 34. ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case) { - namespace blas = multi::blas; - multi::array const a = {{1., 2., 3.}}; // NOLINT(readability-identifier-length) BLAS naming - multi::array b = blas::herk(a); // NOLINT(readability-identifier-length) BLAS naming - - BOOST_REQUIRE( size(b) == 1 ); - BOOST_REQUIRE( b[0][0] == 1.*1. + 2.*2. + 3.*3. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_case_scale) { - namespace blas = multi::blas; - multi::array const a = {{1., 2., 3.}}; // NOLINT(readability-identifier-length) BLAS naming - multi::array b = blas::herk(0.1, a); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(b) == 1 ); - BOOST_TEST( b[0][0] == (1.*1. + 2.*2. + 3.*3.)*0.1 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case) { - namespace blas = multi::blas; - using complex = std::complex; - multi::array const a = { {1., 2., 3.} }; // NOLINT(readability-identifier-length) BLAS naming - multi::array b = blas::herk(1.0, a); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(b) == 1 ); - BOOST_REQUIRE( b[0][0] == 1.*1. + 2.*2. + 3.*3. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_real_case_scale, *boost::unit_test::tolerance(0.00001)) { - namespace blas = multi::blas; - using complex = std::complex; - multi::array const a = {{1., 2., 3.}}; // NOLINT(readability-identifier-length) BLAS naming - multi::array b = blas::herk(0.1, a); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(b) == 1 ); - BOOST_TEST( real( b[0][0]/0.1 ) == 1.*1. + 2.*2. + 3.*3. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case) { - namespace blas = multi::blas; - using complex = std::complex; auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array const a = {{1. + 2.*I, 2.+3.*I, 3. + 4.*I}}; // NOLINT(readability-identifier-length) BLAS naming - multi::array b = blas::herk(a); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(b) == 1 ); - BOOST_REQUIRE( b[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) ); - - BOOST_TEST( std::sqrt(real(blas::herk(a)[0][0])) == blas::nrm2(a[0])() ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_out_param) { - namespace blas = multi::blas; - using complex = std::complex; auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array const a = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}}; // NOLINT(readability-identifier-length) BLAS naming - multi::array b({1, 1}); // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( size(b) == 1 ); - - blas::herk(blas::filling::upper, 1.0, blas::H(a), 0.0, b); - - BOOST_REQUIRE( b[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) ); - - BOOST_TEST( std::sqrt(real(b[0][0])) == blas::nrm2(blas::T(a)[0])() ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized) { - using complex = std::complex; auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) conventional name in BLAS - - multi::array a = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}}; // NOLINT(readability-identifier-length) BLAS naming - - namespace blas = multi::blas; - multi::array b = blas::herk(blas::H(a)); // NOLINT(readability-identifier-length) BLAS naming - - BOOST_REQUIRE( size(b) == 1 ); - BOOST_REQUIRE( b[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) ); - - BOOST_TEST( std::sqrt(real(blas::herk(blas::H(a))[0][0])) == blas::nrm2(rotated(a)[0])() ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk1x1_complex_case_hermitized_auto) { - namespace blas = multi::blas; - using complex = std::complex; auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array arr = {{1. + 2.*I}, {2.+3.*I}, {3. + 4.*I}}; - auto arr2 = blas::herk(1., blas::hermitized(arr)); - static_assert( std::is_same>{}, "!" ); - BOOST_REQUIRE( size(arr2) == 1 ); - BOOST_REQUIRE( arr2[0][0] == std::norm(1. + 2.*I) + std::norm(2.+3.*I) + std::norm(3. + 4.*I) ); - - BOOST_TEST( std::sqrt(real(blas::herk(blas::H(arr))[0][0])) == blas::nrm2(rotated(arr)[0])() ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_herk_complex_identity) { - namespace blas = multi::blas; - using complex = std::complex; auto const I = complex{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array const arr = { // NOLINT(readability-identifier-length) : conventional one-letter operation BLASs - { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, - { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} - }; - - { - multi::array arr2({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional one-letter operation BLASs - blas::herk(blas::filling::lower, 1., arr, 0., arr2); // c†=c=aa†=(aa†)†, `c` in lower triangular - BOOST_REQUIRE( arr2[1][0]==complex(50., -49.) ); - BOOST_REQUIRE( arr2[0][1]==9999. ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) conventional one-letter operation BLASs - static_assert(blas::is_conjugated{}, "!" ); - - blas::herk(blas::filling::lower, 1., arr, 0., blas::H(c)); // c†=c=aa†=(aa†)†, `c` in upper triangular - - BOOST_REQUIRE( blas::H(c)[1][0]==complex(50., -49.) ); - BOOST_REQUIRE( blas::H(c)[0][1]==9999. ); - } - { - multi::array c({3, 3}, 9999.); // NOLINT(readability-identifier-length) : conventional one-letter operation BLASs - herk(blas::filling::lower, 1., blas::T(arr), 0., blas::T(c)); // c†=c=aT(aT)† not supported - BOOST_REQUIRE( transposed(c)[1][0]==complex(52., -90.) ); - BOOST_REQUIRE( transposed(c)[0][1]==9999. ); - } - { - multi::array c({3, 3}, 9999.); // NOLINT(readability-identifier-length) : conventional one-letter operation BLASs - blas::herk(blas::filling::lower, 1., blas::T(arr), 0., blas::H(blas::T(c))); // c†=c=aT(aT)† not supported - BOOST_REQUIRE( blas::H(blas::T(c))[1][0]==complex(52., -90.) ); - BOOST_REQUIRE( blas::H(blas::T(c))[0][1]==9999. ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS - blas::herk(blas::U, 1., arr, 0., c); // c†=c=aa†=(aa†)†, `c` in upper triangular - BOOST_REQUIRE( c[0][1] == complex(50., +49.) ); - BOOST_REQUIRE( c[1][0] == 9999. ); - } - { - multi::array c({2, 2}, 9999.); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS - blas::herk(1., arr, c); // c†=c=aa†=(aa†)† - BOOST_REQUIRE( c[0][1] == complex(50., +49.) ); - BOOST_REQUIRE( c[1][0] == complex(50., -49.) ); - } - { - multi::array c({3, 3}, 9999.); // NOLINT(readability-identifier-length) : conventional one-letter operation BLAS - blas::herk(blas::L, 1., blas::H(arr), 0., c); // c†=c=aa†=(aa†)†, `c` in lower triangular - BOOST_REQUIRE( c[1][0] == complex(52., 90.) ); - BOOST_REQUIRE( c[0][1] == 9999. ); - } -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/iamax.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/iamax.cpp deleted file mode 100644 index 3b998d4355..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/iamax.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS/cuBLAS iamax" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas/iamax.hpp" - -#include "../../../array.hpp" -#include "../../../adaptors/cuda.hpp" -#include "../../../adaptors/blas/cuda.hpp" - -#include - -using std::cout; -namespace multi = boost::multi; -namespace blas = multi::blas; - -using complex = std::complex; constexpr complex I{0, 1}; - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax){ - multi::array const A = { - {1. + 2.*I, 2., 3., 4.}, - {5., 6. + 3.*I, 7., 8.}, - {9., 10., 11.+ 4.*I, 12.} - }; - using blas::iamax; - auto chess = [](auto const& a, auto const& b){ - using std::abs; - return abs(real(a))+abs(imag(a)) < abs(real(b))+abs(imag(b)); - }; - BOOST_REQUIRE(iamax(A[1])==std::max_element(begin(A[1]), end(A[1]), chess)-begin(A[1])); - BOOST_REQUIRE(A[1][iamax(A[1])]==*std::max_element(begin(A[1]), end(A[1]), chess)); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_iamax_cuda){ - multi::cuda::array const A = { - {1. + 2.*I, 2. , 3. , 4.}, - {5. , 6. + 3.*I, 7. , 8.}, - {9. , 10. , 11.+ 4.*I, 12.} - }; - using blas::iamax; - BOOST_REQUIRE(iamax(A[1])==1); -} - - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/nrm2.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/nrm2.cpp deleted file mode 100644 index a6109ae093..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/nrm2.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS nrm2" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas.hpp" -#include "../../../array.hpp" -#include "../../../adaptors/cuda.hpp" -#include "../../../adaptors/blas/cuda.hpp" - -#include - -namespace multi = boost::multi; - -using complex = std::complex; constexpr complex I{0,1}; - -BOOST_AUTO_TEST_CASE(multi_blas_nrm2){ - namespace blas = multi::blas; - multi::array const A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( blas::nrm2(A[1]) == std::sqrt(blas::dot(A[1], A[1])) ); - - { - multi::array A = {1.+I, 3.+2.*I, 3.+4.*I}; - BOOST_REQUIRE( blas::dot(A, A)() == (1.+I)*(1.+I) + (3.+2.*I)*(3.+2.*I) + (3.+4.*I)*(3.+4.*I) ); - } - { - multi::cuda::array const Agpu = A; - multi::cuda::static_array n = 1.2; - blas::nrm2(Agpu[1], n); - } - { - multi::cuda::array Agpu = A; - double n = 99.; - blas::nrm2(Agpu[1], n); // cuda supports putting scalar results in CPU - double n2{blas::nrm2(Agpu[1])}; - BOOST_REQUIRE( n == n2 ); - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/numeric.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/numeric.cpp deleted file mode 100644 index 633f562e00..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/numeric.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS numeric" -#include - -#include "../../../array.hpp" -#include "../../blas/numeric.hpp" -#include "../../blas/operations.hpp" - -#include - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_numeric_imag) { - using complex = std::complex; constexpr complex I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - namespace blas = multi::blas; - multi::array array = { 1. + 2.*I, 3. + 5.*I, 9. + 2.*I }; - BOOST_REQUIRE( blas::imag(array)[2] == 2. ); - BOOST_REQUIRE( blas::real(array)[2] == 9. ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_conjugated) { - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array array = { - {1. - 3.*I, 6. + 2.*I}, - {8. + 2.*I, 2. + 4.*I}, - {2. - 1.*I, 1. + 1.*I} - }; - BOOST_REQUIRE( array[0][0] == 1. - 3.*I ); - - multi::array const carray = { - {1. - 3.*I, 6. + 2.*I}, - {8. + 2.*I, 2. + 4.*I}, - {2. - 1.*I, 1. + 1.*I} - }; - BOOST_REQUIRE( carray[0][0] == 1. - 3.*I ); - - namespace blas = multi::blas; - auto conjr = blas::make_conjugater(array.data_elements()); - - decltype(blas::make_conjugater(carray.data_elements())) ppp;// = BdataC; - ppp = conjr; - - BOOST_REQUIRE( *ppp == 1. + 3.*I ); - -// static_assert( multi::blas::is_complex_array, 2>>{}, "!"); - static_assert( blas::is_complex_array{} ); - static_assert(not blas::is_conjugated{} ); - - auto&& conjd_array = blas::conj(array); - static_assert( blas::is_conjugated{} ); - - BOOST_REQUIRE( conjd_array[0][0] == 1. + 3.*I ); - BOOST_REQUIRE( imag(*base(conjd_array)) == +3 ); - -// BOOST_TEST_REQUIRE( base(Bconj)->imag() == +3 ); - BOOST_REQUIRE( rotated(conjd_array)[1][0] == conjd_array[0][1] ); - -// BOOST_REQUIRE( base(Bconj) == -3.*I ); - static_assert( blas::is_complex_array{} ); - - BOOST_REQUIRE( blas::conj(conjd_array) == array ); - - BOOST_REQUIRE( blas::conj(array)[1][0] == std::conj(array[1][0]) ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay) { - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array arr = { - { 1. - 3.*I, 6. + 2.*I, 9. + 3.*I}, - { 8. + 2.*I, 2. + 4.*I, 9. + 3.*I}, - { 2. - 1.*I, 1. + 1.*I, 9. + 3.*I}, - { 9. + 3.*I, 9. + 3.*I, 9. + 3.*I} - }; - - namespace blas = multi::blas; - multi::array conj_arr = blas::conj(arr); - - BOOST_REQUIRE( conj_arr[2][1] == std::conj(arr[2][1]) ); - BOOST_REQUIRE( blas::conj(arr)[2][1] == std::conj(arr[2][1]) ); - - BOOST_REQUIRE( blas::transposed(arr)[1][2] == arr[2][1] ); - BOOST_REQUIRE( blas::transposed(arr) == ~arr ); - - BOOST_REQUIRE( blas::hermitized(arr)[2][1] == blas::conj(arr)[1][2] ); - BOOST_REQUIRE( blas::hermitized(arr) == blas::conj(blas::transposed(arr)) ); - - BOOST_REQUIRE( blas::real(arr)[2][1] == std::real(arr[2][1]) ); - BOOST_REQUIRE( blas::imag(arr)[2][1] == std::imag(arr[2][1]) ); - - multi::array B_real_doubled = { - { 1., -3., 6., 2., 9., 3.}, - { 8., 2., 2., 4., 9., 3.}, - { 2., -1., 1., 1., 9., 3.}, - { 9., 3., 9., 3., 9., 3.} - }; - BOOST_REQUIRE( sizes(blas::real_doubled(arr)) == sizes(B_real_doubled) ); - BOOST_REQUIRE( blas::real_doubled(arr) == B_real_doubled ); -} - -#if defined(CUDA_FOUND) and CUDA_FOUND -#include - -BOOST_AUTO_TEST_CASE(multi_blas_numeric_decay_thrust) { - using complex = thrust::complex; complex const I{0, 1}; - - multi::array B = { - {1. - 3.*I, 6. + 2.*I}, - {8. + 2.*I, 2. + 4.*I}, - {2. - 1.*I, 1. + 1.*I} - }; - - namespace blas = multi::blas; - multi::array conjB = blas::conj(B); - BOOST_REQUIRE( conjB[1][2] == conj(B[1][2]) ); -} -#endif - -BOOST_AUTO_TEST_CASE(multi_blas_numeric_real_imag_part) { - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - - multi::array arr = { - {1., 3., 4.}, - {9., 7., 1.} - }; - multi::array complex_arr = arr; - BOOST_REQUIRE( complex_arr[1][1] == arr[1][1] ); - - multi::array arr2 = { - {1. - 3.*I, 6. + 2.*I}, - {8. + 2.*I, 2. + 4.*I}, - {2. - 1.*I, 1. + 1.*I} - }; - - multi::array arr2_real = { - {1., 6.}, - {8., 2.}, - {2., 1.} - }; - multi::array arr2_imag = { - {-3., +2.}, - {+2., +4.}, - {-1., +1.} - }; - - using multi::blas::real; - using multi::blas::imag; - - BOOST_REQUIRE( arr2_real == real(arr2) ); - BOOST_REQUIRE( real(arr2) == arr2_real ); - BOOST_REQUIRE( imag(arr2) == arr2_imag ); - - BOOST_REQUIRE( arr2[1][0] == 8. + 2.*I ); - BOOST_REQUIRE( arr2[1][0].imag() == 2. ); - - namespace blas = multi::blas; - - BOOST_REQUIRE( blas::hermitized(arr2)[1][2] == std::conj( arr2[2][1] ) ); - - blas::hermitized(arr2)[1][2] = 20. + 30.*I; - BOOST_REQUIRE( arr2[2][1] == 20. - 30.*I ); -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/operations.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/operations.cpp deleted file mode 100644 index 13fe75c9c0..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/operations.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x `pkg-config --libs blas` -lcudart -lcublas -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS operations and cuda" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas/dot.hpp" - -#include "../../../array.hpp" -#include "../../blas/cuda.hpp" - -#include "../../../adaptors/cuda.hpp" -#include "../../../complex.hpp" - -#include -#include -#include - -using std::cout; -namespace multi = boost::multi; -namespace blas = multi::blas; - -using complex = std::complex; constexpr complex I{0, 1}; - -BOOST_AUTO_TEST_CASE(blas_conjugated_cpu){ - multi::array const a = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I}; - BOOST_REQUIRE( blas::C(a)[1] == conj(a[1]) ); - - namespace cuda = multi::cuda; - - cuda::array const agpu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I}; - BOOST_REQUIRE( blas::C(agpu)[1] == conj(agpu[1]) ); -} - -BOOST_AUTO_TEST_CASE(blas_conjugated_gpu){ -#if 0 - cuda::array const acu = {1. + I, 2. + 3.*I, 3. + 2.*I, 4. - 9.*I}; - cuda::array const bcu = {5. + 2.*I, 6. + 6.*I, 7. + 2.*I, 8. - 3.*I}; - - { - cuda::array ccu; - blas::dot(acu, bcu, ccu); - BOOST_REQUIRE( ccu() == 19. - 27.*I ); - } - BOOST_REQUIRE( blas::C(bcu)[1] == 2. - 3.*I ); -#endif -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/scal.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/scal.cpp deleted file mode 100644 index 864f190867..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/scal.cpp +++ /dev/null @@ -1,147 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS scal" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas/scal.hpp" - -#include "../../../array.hpp" - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_n) { - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( (arr[0][2] == 3.) and (arr[2][2] == 11.) ); - - blas::scal_n(2., arr[2].begin(), arr[2].size()); - BOOST_REQUIRE( arr[0][2] == 3. and arr[2][2] == 11.*2. ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_it) { - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( arr[0][2] == 3. ); - BOOST_REQUIRE( arr[2][2] == 11.); - - blas::scal(2., arr[2].begin(), arr[2].end()); - BOOST_REQUIRE( arr[0][2] == 3. ); - BOOST_REQUIRE(arr[2][2] == 11.*2. ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_real) { - multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( arr[0][2] == 3. ); - BOOST_REQUIRE( arr[2][2] == 11. ); - - BOOST_REQUIRE( blas::scal(1., arr[2]) == arr[2] ); - BOOST_REQUIRE( &blas::scal(1., arr[2]) == &arr[2] ); - BOOST_REQUIRE( +blas::scal(1., arr[2]) == arr[2] ); - - blas::scal(2., arr[2]); - BOOST_REQUIRE( arr[0][2] == 3. and arr[2][2] == 11.*2. ); - - BOOST_REQUIRE( &blas::scal(1., arr[2]) == &arr[2] ); -} - -//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_complex_real_case){ -// using complex = std::complex; -// multi::array A = { -// {1., 2., 3., 4.}, -// {5., 6., 7., 8.}, -// {9., 10., 11., 12.} -// }; -// BOOST_TEST( A[0][2] == 3. ); -// BOOST_TEST( A[2][2] == 11. ); - -// blas::scal(2., A[2]); // zscal (2. is promoted to complex later) -// BOOST_TEST( A[0][2] == 3. ); -// BOOST_REQUIRE( A[2][2] == 11.*2. ); - -// blas::scal(1./2, A[2]); // zdscal -// BOOST_TEST( A[0][2] == 3. ); -// BOOST_TEST( A[2][1] == 10. ); -// BOOST_TEST( A[2][2] == 11. ); - -//} - -//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_complex){ -// multi::array A = { -// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I}, -// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I}, -// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I} -// }; -// blas::scal(2., A[1]); // zscal (2. is promoted to complex later) -// BOOST_TEST( A[1][2] == 14. + 8.*I ); - -// blas::scal(3.*I, A[0]); -// BOOST_TEST( A[0][1] == (2. + 3.*I)*3.*I ); - -// blas::scal(2., blas::imag(A[2])); -// assert( A[2][1] == 2. + 4.*I ); -//} - -////BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_noconst){ -//// namespace cuda = multi::cuda; -//// cuda::array A = { -//// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I}, -//// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I}, -//// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I} -//// }; -//// blas::scal(2., A[1]); // zscal (2. is promoted to complex later) -//// BOOST_REQUIRE( A[1][2] == 14. + 8.*I ); - -//// cuda::array a = {1. + 10.*I, 2. + 20.*I, 3. + 30.*I}; -//// blas::scal(2., a); -//// BOOST_REQUIRE(( a[1] == complex{4, 40} )); - -////// blas::scal(3., blas::imag(a)); // gives internal compilation error in gcc -////// BOOST_REQUIRE(( a[1] == complex{4, 120} )); -////} - -////BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_const){ -//// namespace cuda = multi::cuda; -//// cuda::array const A = { -//// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I}, -//// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I}, -//// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I} -//// }; -//// auto A1cpy = blas::scal(2., A[1]); // zscal (2. is promoted to complex later) -//// BOOST_REQUIRE( A1cpy[2] == 14. + 8.*I ); - -////// cuda::array a = {1. + 10.*I, 2. + 20.*I, 3. + 30.*I}; -////// blas::scal(2., a); -////// BOOST_REQUIRE(( a[1] == complex{4, 40} )); - -////// blas::scal(3., blas::imag(a)); -////// BOOST_REQUIRE(( a[1] == complex{4, 120} )); -////} - -//#if 0 -//BOOST_AUTO_TEST_CASE(multi_adaptors_blas_test_scal_cuda_managed){ -// cuda::managed::array A = { -// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I}, -// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I}, -// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I} -// }; -// using blas::scal; -// scal(2., A[1]); -// BOOST_REQUIRE( A[1][2] == 14. + 8.*I ); - -// scal(2., blas::imag(A[1])); -// BOOST_REQUIRE( A[1][2] == 14. + 16.*I ); -//} -//#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/swap.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/swap.cpp deleted file mode 100644 index 4032dc0390..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/swap.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x; exit -#endif -// © Alfredo A. Correa 2019-2020 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS swap" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../blas.hpp" - -#include "../../../array.hpp" - -#include -#include - -using std::cout; -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001) ){ - { - multi::array A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( A[0][2] == 3. ); - BOOST_REQUIRE( A[2][2] == 11. ); - - multi::blas::swap(A[0], A[2]); // blas swap - BOOST_REQUIRE( A[0][2] == 11. ); - BOOST_REQUIRE( A[2][2] == 3. ); - - swap(A[0], A[2]); // built-in swap - BOOST_REQUIRE( A[0][2] == 3. ); - BOOST_REQUIRE( A[2][2] == 11. ); - } - { - multi::array A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( A[0][0] == 1. ); - BOOST_REQUIRE( A[0][3] == 4. ); - - multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep) - BOOST_REQUIRE( A[0][0] == 4. ); - BOOST_REQUIRE( A[0][3] == 1. ); - - swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep) - BOOST_REQUIRE( A[0][0] == 1. ); - BOOST_REQUIRE( A[0][3] == 4. ); - } - { - using complex = std::complex; complex const I{0, 1}; - multi::array A = { - {1.+ 2.*I, 2., 3., 4. + 3.*I}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( A[0][0] == 1.+ 2.*I ); - BOOST_REQUIRE( A[0][3] == 4. + 3.*I ); - multi::blas::swap(rotated(A)[0], rotated(A)[3]); // blas swap (deep) - BOOST_REQUIRE( A[0][0] == 4. + 3.*I ); - BOOST_REQUIRE( A[0][3] == 1.+ 2.*I ); - swap(rotated(A)[0], rotated(A)[3]); // built-in swap (deep) - BOOST_REQUIRE( A[0][0] == 1.+ 2.*I ); - BOOST_REQUIRE( A[0][3] == 4. + 3.*I ); - } - { - multi::array A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.} - }; - BOOST_REQUIRE( A[0][2] == 3. ); - BOOST_REQUIRE( A[2][2] == 11. ); - - auto it = multi::blas::swap(begin(A[0]), end(A[0]) - 1, begin(A[2])); // blas swap - BOOST_REQUIRE( it == end(A[2]) - 1 ); - BOOST_REQUIRE( A[0][2] == 11. ); - BOOST_REQUIRE( A[2][2] == 3. ); - using std::swap_ranges; - swap_ranges(begin(A[0]), end(A[0]), begin(A[2])); // built-in swap - BOOST_REQUIRE( A[0][2] == 3. ); - BOOST_REQUIRE( A[2][2] == 11. ); - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/traits.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/traits.cpp deleted file mode 100644 index 44facb9310..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/traits.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs blas cuda-11.0` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit -#endif - -#include "../../blas/traits.hpp" - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS traits" -#define BOOST_TEST_DYN_LINK -#include - -#include "./config.hpp" - -#include - -namespace multi = boost::multi; -namespace blas = multi::blas; - -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_traits) { - static_assert( blas::is_d{} ); - static_assert( blas::is_s{} ); - - static_assert( blas::is_c>{} ); - static_assert( blas::is_z>{} ); -} - -#if CUDA_FOUND -#include -BOOST_AUTO_TEST_CASE(multi_adaptors_blas_traits_thrust) { - static_assert( blas::is_c>{} ); - static_assert( blas::is_z>{} ); -} -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsm.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsm.cpp deleted file mode 100644 index 977a2fe78e..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/test/trsm.cpp +++ /dev/null @@ -1,610 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -$CXX $0 -o $0x -lcudart -lcublas `pkg-config --libs blas` -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2021 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi BLAS trsm" -#define BOOST_TEST_DYN_LINK -#include - -//#include "../../../memory/adaptors/cuda/managed/ptr.hpp" - -#include "../../../adaptors/blas/gemm.hpp" -#include "../../../adaptors/blas/trsm.hpp" -//#include "../../../adaptors/blas/cuda.hpp" - -//#include "../../../adaptors/cuda.hpp" -#include "../../../array.hpp" - -#include - -namespace multi = boost::multi; - -template -auto triangular(multi::blas::filling f, Matrix const& m) { // NOLINT(readability-identifier-length) BLAS naming - auto ret =+ m; - switch(f) { - case multi::blas::filling::upper: - for(multi::size_type i = 0; i != size( ret); ++i) { - for(multi::size_type j = 0; j != std::min(i, size(~ret)); ++j) { - ret[i][j] = 0.; - } - } - break; - case multi::blas::filling::lower: - for(multi::size_type j = 0; j != size(~ret); ++j) { - for(multi::size_type i = 0; i != std::min(j, size( ret)); ++i) { - ret[i][j] = 0.; - } - } - break; - } - return ret; -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_0x0) { - namespace blas = multi::blas; - multi::array const A; // NOLINT(readability-identifier-length) BLAS naming - - { - multi::array B; // NOLINT(readability-identifier-length) BLAS naming - // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_1x1) { - namespace blas = multi::blas; - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - {10., }, - }; - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {3., }, - }; - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B); - // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - BOOST_REQUIRE_CLOSE( B[0][0] , 3./10. , 0.00001 ); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][0] , B_cpy[0][0] , 0.00001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {3., }, - }; - auto const B_cpy = B; - // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 2., A, B); - BOOST_REQUIRE_CLOSE( B[0][0] , 2.*3./10. , 0.00001 ); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][0] , 2.*B_cpy[0][0] , 0.00001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {3., 4., 5.}, - }; - auto const B_cpy = B; - // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 1., A, B); - BOOST_REQUIRE_CLOSE( B[0][0] , 3./10. , 0.00001 ); - BOOST_REQUIRE_CLOSE( B[0][1] , 4./10. , 0.00001 ); - BOOST_REQUIRE_CLOSE( B[0][2] , 5./10. , 0.00001 ); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A, B))[0][1] , B_cpy[0][1] , 0.00001 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_square) { - namespace blas = multi::blas; - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1., 3., 4.}, - { NAN, 7., 1.}, - { NAN, NAN, 8.} - }; - auto const A_cpy = triangular(blas::filling::upper, A); - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4.}, - {2., 7., 1.}, - {3., 4., 2.} - }; - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); - BOOST_REQUIRE( (+blas::gemm(1., A_cpy, B))[1][2] == B_cpy[1][2] ); - } - { - auto const AT =+ ~A; - auto const AT_cpy = triangular(blas::filling::lower, AT); - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4.}, - {2., 7., 1.}, - {3., 4., 2.} - }; - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), B); - BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); - BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), B))[1][2] == B_cpy[1][2] ); - } - { - auto const AT =+ ~A; - auto const AT_cpy = triangular(blas::filling::lower, AT); - multi::array const B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4.}, - {2., 7., 1.}, - {3., 4., 2.} - }; - auto BT =+ ~B; - blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), blas::T(BT)); - BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2] , 0.107143 , 0.001 ); - BOOST_REQUIRE( (+blas::gemm(1., blas::T(AT_cpy), blas::T(BT)))[1][2] == B[1][2] ); - } - { - multi::array const B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4.}, - {2., 7., 1.}, - {3., 4., 2.} - }; - auto BT =+ ~B; - blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT)); - BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143 , 0.001 ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex) { - namespace blas = multi::blas; - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1. - 9.*I, 3. + 2.*I, 4. + 3.*I}, - {2. - 2.*I, 7. - 2.*I, 1. - 1.*I}, - {3. + 1.*I, 4. + 8.*I, 2. + 7.*I} - }; - blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - BOOST_REQUIRE_CLOSE( real(B[1][2]) , 2.33846 , 0.0001 ); - BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.0923077 , 0.0001 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_rectangular) { - namespace blas = multi::blas; - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1. - 9.*I, 3. + 2.*I}, - {2. - 2.*I, 7. - 2.*I}, - {3. + 1.*I, 4. + 8.*I} - }; - blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 ); - BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column) { - namespace blas = multi::blas; - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1. - 9.*I}, - {2. - 2.*I}, - {3. + 1.*I} - }; - blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001); - BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cpu) { - namespace blas = multi::blas; - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imaginary unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1. - 9.*I}, - {2. - 2.*I}, - {3. + 1.*I} - }; - blas::trsm(blas::side::left, blas::filling::lower, 2.+1.*I, blas::H(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - BOOST_REQUIRE_CLOSE( real(B[2][0]) , -4.16471 , 0.0001 ); - BOOST_REQUIRE_CLOSE( imag(B[2][0]) , 8.25882 , 0.0001 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_real) { - namespace blas = multi::blas; - multi::array const A = {{2., }, }; // NOLINT(readability-identifier-length) BLAS naming - { - multi::array B = {{1., 2., 3.}, }; // NOLINT(readability-identifier-length) BLAS naming - BOOST_REQUIRE( B.size() == 1 ); - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::lower, 1., A, B); - BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1.}, - {2.}, - {3.}, - }; - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::lower, 1., A, blas::T(B)); - BOOST_REQUIRE( blas::T(B)[0][1] == blas::T(B_cpy)[0][1]/A[0][0] ); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_hydrogen_inq_case_complex) { - namespace blas = multi::blas; - using complex = std::complex; - multi::array const A = {{2., }, }; // NOLINT(readability-identifier-length) BLAS naming - - { - multi::array B = {{1., 2., 3.}, }; // NOLINT(readability-identifier-length) BLAS naming - auto const B_cpy = B; - blas::trsm(blas::side::left, blas::filling::lower, 1., A, B); - BOOST_REQUIRE( B[0][1] == B_cpy[0][1]/A[0][0] ); - } - multi::array B1 = { - {1.}, - {2.}, - {3.}, - }; - multi::array B2 = { - {1.}, - {2.}, - {3.}, - }; - { - // auto const B_cpy = B1; - blas::trsm(blas::side::left, blas::filling::lower, 1., A, blas::H(B1)); - // BOOST_REQUIRE( (+blas::gemm(1., A, blas::H(B1)))[0][1] == blas::H(B_cpy)[0][1] ); - } - - { - auto const B_cpy = B2; - blas::trsm(blas::side::right, blas::filling::upper, 1., blas::H(A), B2); - // BOOST_REQUIRE( (+blas::gemm(1., A, blas::H(B)))[0][1] == blas::H(B_cpy)[0][1] ); - BOOST_REQUIRE( (+blas::gemm(1., B2, blas::H(A)))[1][0] == B_cpy[1][0] ); - } - BOOST_REQUIRE( B1 == B2 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_nonsquare) { - namespace blas = multi::blas; - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1., 3., 4.}, - { NAN, 7., 1.}, - { NAN, NAN, 8.} - }; - auto const A_cpy = triangular(blas::filling::upper, A); - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4., 8.}, - {2., 7., 1., 9.}, - {3., 4., 2., 1.}, - }; - auto const B_cpy =+ B; - multi::array BT =+ ~B; - BOOST_REQUIRE( BT == ~B ); - blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][2] , B_cpy[1][2] , 0.001); - - auto const BT_cpy = BT; - blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT)); - BOOST_REQUIRE_CLOSE( blas::T(BT)[1][2], 0.107143, 0.001 ); - - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, blas::T(BT)))[1][2] , blas::T(BT_cpy)[1][2] , 0.00001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1., 3., 4., 8.}, - {2., 7., 1., 9.}, - {3., 4., 2., 1.}, - }; - multi::array AT = ~A; - multi::array BT = ~B; - blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - BOOST_REQUIRE_CLOSE( B[1][2] , 0.107143 , 0.001 ); - - blas::trsm(blas::side::left, blas::filling::upper, 1., blas::T(AT), blas::T(BT)); - BOOST_REQUIRE_CLOSE( (~BT)[1][2] , 0.107143, 0.001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1.}, - {2.}, - {3.}, - }; - auto const B_cpy =+ B; - blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); // B=Solve(A.X=alpha*B, X) B=A⁻¹B, B⊤=B⊤.(A⊤)⁻¹, A upper triangular (implicit zeros below) - BOOST_REQUIRE_CLOSE( B[2][0] , 0.375 , 0.00001 ); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1.}, - {2.}, - {3.}, - }; - auto const B_cpy =+ B; - blas::trsm(blas::side::left, blas::filling::upper, 1.2, A, B); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1., A_cpy, B))[1][0] , 1.2*B_cpy[1][0] , 0.00001 ); - BOOST_REQUIRE_CLOSE( (+blas::gemm(1./1.2, A_cpy, B))[1][0] , B_cpy[1][0] , 0.00001 ); - } - { - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1.}, - {2.}, - {3.}, - }; - multi::array BT = rotated(B); - blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::T(BT)); - BOOST_REQUIRE_CLOSE( (~BT)[2][0] , 0.375 , 0.00001); - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check_no_const) { - namespace blas = multi::blas; - using complex = std::complex; complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - multi::array const A = { // NOLINT(readability-identifier-length) BLAS naming - { 1. + 4.*I, 3. , 4.- 10.*I}, - { 0. , 7.- 3.*I, 1. }, - { 0. , 0. , 8.- 2.*I} - }; - multi::array B = { // NOLINT(readability-identifier-length) BLAS naming - {1. + 1.*I, 2. + 1.*I, 3. + 1.*I}, - {5. + 3.*I, 9. + 3.*I, 1. - 1.*I} - }; - - using multi::blas::trsm; - using multi::blas::filling; - using multi::blas::hermitized; - blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // B†←A⁻¹.B†, B←B.A⁻¹†, B←(A⁻¹.B†)† - BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001); -} - -#if 0 -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_nonsquare_default_diagonal_hermitized_gemm_check) { - using complex = std::complex; complex const I{0, 1}; - multi::array const A = { - { 1. + 4.*I, 3., 4.- 10.*I}, - { 0., 7.- 3.*I, 1.}, - { 0., 0., 8.- 2.*I} - }; - namespace blas = multi::blas; - - { - { - multi::array B = { - {1. + 1.*I, 5. + 3.*I}, - {2. + 1.*I, 9. + 3.*I}, - {3. + 1.*I, 1. - 1.*I}, - }; - auto S = blas::trsm(blas::side::left, blas::filling::lower, 1., blas::H(A), B); // S = A⁻¹†.B, S† = B†.A⁻¹ - BOOST_REQUIRE_CLOSE( real(S[2][1]) , 1.71608 , 0.001 ); - } - { - multi::array B = { - {1. + 1.*I, 2. + 1.*I, 3. + 1.*I}, - {5. + 3.*I, 9. + 3.*I, 1. - 1.*I} - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹† - BOOST_REQUIRE_CLOSE( imag(S[2][1]) , +0.147059 , 0.001); - BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059 , 0.001); - } - { - multi::array B = { - {1. + 1.*I, 2. + 1.*I, 3. + 1.*I}, - {5. + 3.*I, 9. + 3.*I, 1. - 1.*I} - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 2., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹† - BOOST_REQUIRE_CLOSE( imag(S[2][1]) , +0.147059*2. , 0.001 ); - BOOST_REQUIRE_CLOSE( imag(B[1][2]) , -0.147059*2. , 0.001 ); - } - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_real_1x1_check) { - namespace blas = multi::blas; - multi::array const A = { - { 4.}, - }; - { - { - multi::array B = { - {5.}, - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, blas::diagonal::general, 3., A, B); - BOOST_REQUIRE( S[0][0] == 3.*5./4. ); - } - { - multi::array B = { - {5.}, - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); - BOOST_REQUIRE( S[0][0] == 1.*5./4. ); - } - { - multi::array B = { - {5.}, - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, B); - BOOST_REQUIRE( S[0][0] == 1.*5./4. ); - } - } -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_1x1_check) { - using complex = std::complex; complex const I = complex{0, 1}; - multi::array const A = { - { 4. + 2.*I}, - }; - namespace blas = multi::blas; - { - multi::array B = { - {5. + 1.*I}, - }; - auto const B_cpy =+ B; - - blas::trsm(blas::side::left, blas::filling::upper, 3.+5.*I, A, B); - BOOST_REQUIRE_CLOSE( real((+blas::gemm(1., A, B))[0][0]) , real((3.+5.*I)*B_cpy[0][0]) , 0.00001 ); - BOOST_REQUIRE_CLOSE( imag((+blas::gemm(1., A, B))[0][0]) , imag((3.+5.*I)*B_cpy[0][0]) , 0.00001 ); - - BOOST_REQUIRE_CLOSE( real((+blas::gemm(1./(3.+5.*I), A, B))[0][0]) , real(B_cpy[0][0]) , 0.00001 ); - BOOST_REQUIRE_CLOSE( imag((+blas::gemm(1./(3.+5.*I), A, B))[0][0]) , imag(B_cpy[0][0]) , 0.00001 ); - } -} - -#if defined(CUDA_FOUND) and CUDA_FOUND -#include - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_thrust_nonsquare_default_diagonal_hermitized_gemm_check) { - namespace blas = multi::blas; - using complex = thrust::complex; complex const I{0, 1}; - multi::array const A = { - { 1. + 4.*I, 3. , 4.- 10.*I}, - { 0. , 7.- 3.*I, 1. }, - { 0. , 0. , 8.- 2.*I} - }; - { - { - multi::array B = { - {1. + 1.*I, 5. + 3.*I}, - {2. + 1.*I, 9. + 3.*I}, - {3. + 1.*I, 1. - 1.*I}, - }; - auto S = blas::trsm(blas::side::left, blas::filling::lower, 1., blas::H(A), B); // S = A⁻¹†.B, S† = B†.A⁻¹ - BOOST_REQUIRE_CLOSE( S[2][1].real() , 1.71608 , 0.001 ); - BOOST_REQUIRE( S == B ); - } - { - multi::array B = { - {1. + 1.*I, 2. + 1.*I, 3. + 1.*I}, - {5. + 3.*I, 9. + 3.*I, 1. - 1.*I} - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 1., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹† - BOOST_REQUIRE_CLOSE( B[1][2].imag() , -0.147059 , 0.001 ); - BOOST_REQUIRE( S == blas::H(B) ); - } - { - multi::array B = { - {1. + 1.*I, 2. + 1.*I, 3. + 1.*I}, - {5. + 3.*I, 9. + 3.*I, 1. - 1.*I} - }; - auto S =+ blas::trsm(blas::side::left, blas::filling::upper, 2., A, blas::H(B)); // S = A⁻¹B†, S†=B.A⁻¹†, S=(B.A⁻¹)†, B <- S†, B <- B.A⁻¹† - BOOST_REQUIRE_CLOSE( B[1][2].imag() , -0.147059*2. , 0.001 ); - BOOST_REQUIRE( S == blas::H(B) ); - } - } -} -//BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cuda, *utf::tolerance(0.00001)){ -// namespace cuda = multi::cuda; -// cuda::array A = { -// { 1., 3., 4.}, -// {NAN, 7., 1.}, -// {NAN, NAN, 8.} -// }; -//// multi::cuda::array const B = { -//// {1.}, -//// {2.}, -//// {3.} -//// }; -// namespace blas = multi::blas; -//// auto Bcpy = blas::trsm(blas::filling::upper, 1., A, B); // B ⬅ α Inv[A].B, B† ⬅ B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) -//// multi::array Bcpu = Bcpy; -//// BOOST_TEST_REQUIRE( std::real(Bcpu[2][0]) == 0.375 ); -//// BOOST_TEST_REQUIRE( std::imag(Bcpu[2][0]) == 0. ); -//} -#endif - -#endif -#if 0 - -//template void what(T&&) = delete; - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_double_column_cuda, *utf::tolerance(0.00001)) { - multi::cuda::array const A = { - { 1., 3., 4.}, - {NAN, 7., 1.}, - {NAN, NAN, 8.} - }; - multi::cuda::array B = { - {1.}, - {2.}, - {3.} - }; - namespace blas = multi::blas; - using blas::filling; - using blas::hermitized; - trsm(filling::upper, 1., A, B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - BOOST_REQUIRE( B[2][0] == 0.375 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_trsm_complex_column_cuda2, *utf::tolerance(0.00001)) { - multi::cuda::array const A = { - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::cuda::array B = { - {1. - 9.*I}, - {2. - 2.*I}, - {3. + 1.*I} - }; - namespace blas = multi::blas; - using blas::filling; - using blas::hermitized; - trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - multi::array Bcpu = B; - BOOST_REQUIRE( real(Bcpu[2][0]) == -4.16471 ); - BOOST_REQUIRE( imag(Bcpu[2][0]) == 8.25882 ); -} - -BOOST_AUTO_TEST_CASE(multi_blas_cuda_trsm_complex, *utf::tolerance(0.00001)) { - multi::cuda::array const A = { - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::cuda::array const B = { - {1. - 9.*I, 3. + 2.*I, 4. + 3.*I}, - {2. - 2.*I, 7. - 2.*I, 1. - 1.*I}, - {3. + 1.*I, 4. + 8.*I, 2. + 7.*I} - }; - - namespace blas = multi::blas; - using blas::filling; - using blas::hermitized; -// auto C = trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) - auto C = trsm(filling::lower, 1., hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit -} - -BOOST_AUTO_TEST_CASE(multi_blas_cuda_managed_trsm_complex, *utf::tolerance(0.00001)) { - multi::cuda::managed::array const A = { - { 1. + 2.*I, 3. - 1.*I, 4. + 9.*I}, - {NAN , 7. + 4.*I, 1. + 8.*I}, - {NAN , NAN , 8. + 2.*I} - }; - multi::cuda::managed::array const B = { - {1. - 9.*I, 3. + 2.*I, 4. + 3.*I}, - {2. - 2.*I, 7. - 2.*I, 1. - 1.*I}, - {3. + 1.*I, 4. + 8.*I, 2. + 7.*I} - }; - - namespace blas = multi::blas; - using blas::filling; - using blas::hermitized; - auto C = trsm(filling::lower, 2.+1.*I, hermitized(A), B); // B=alpha Inv[A†].B, B†=B†.Inv[A], Solve(A†.X=B, X), Solve(X†.A=B†, X), A is upper triangular (with implicit zeros below) -} -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/traits.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/traits.hpp deleted file mode 100644 index 6ec6b2c45a..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/traits.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef MULTI_ADAPTORS_BLAS_TRAITS_HPP// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -#define MULTI_ADAPTORS_BLAS_TRAITS_HPP -// Copyright 2019-2021 Alfredo A. Correa - -#include -#include - -namespace boost::multi::blas{ - -// TODO(correaa) : create a BinaryDouble concept? - - template()/std::declval()), float>{} >> - auto is_s_aux(F&&) -> std::true_type ; - auto is_s_aux(...) -> std::false_type; - - template struct is_s : decltype(is_s_aux(std::declval())){using archetype = float;}; - - template()/std::declval()), double>{}>> - auto is_d_aux(D&&) -> std::true_type ; - auto is_d_aux(...) -> std::false_type; - - template struct is_d : decltype(is_d_aux(std::declval())){using archetype = double;}; - - template) and is_s().real())>{} and is_s().imag())>{}>> - auto is_c_aux(C&&) -> std::true_type; - auto is_c_aux(...) -> std::false_type; - - template struct is_c : decltype(is_c_aux(std::declval())){using archetype = std::complex;}; - - template) and is_d().real())>{} and is_d().imag())>{}>> - auto is_z_aux(Z&&) -> std::true_type ; - auto is_z_aux(...) -> std::false_type; - - template struct is_z : decltype(is_z_aux(std::declval())){using archetype = std::complex;}; - -} // end namespace boost::multi::blas -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/blas/trsm.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/blas/trsm.hpp deleted file mode 100644 index 2367542e7a..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/blas/trsm.hpp +++ /dev/null @@ -1,102 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_TRSM_HPP -#define MULTI_ADAPTORS_BLAS_TRSM_HPP - -#include "../blas/core.hpp" -#include "../blas/filling.hpp" -#include "../blas/operations.hpp" // uplo -#include "../blas/side.hpp" - -namespace boost::multi::blas { - -enum class diagonal : char { - unit = 'U', - non_unit = 'N', general = non_unit -}; - -using core::trsm; - -template -auto trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b) // NOLINT(readability-function-cognitive-complexity,readability-identifier-length) cognitive load 115, BLAS naming --> B2D&& { - if(a_side == blas::side::left ) {assert(size(~a) >= size( b));} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - if(a_side == blas::side::right) {assert(size( a) >= size(~b));} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - - assert( stride( a) == 1 or stride(~a) == 1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - assert( stride( b) == 1 or stride(~b) == 1 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - - if(size(b)!=0){ - #define CTXT std::forward(ctxt) - if constexpr(not is_conjugated{} and not is_conjugated{}){ - if (stride( a)==1 and stride( b)==1) {CTXT->trsm(char{ (a_side)}, static_cast(-a_fill), 'N', static_cast(a_diag), size( b), size(~b), alpha , base(a) , stride(~a), base(b) , stride(~b));} - else if(stride(~a)==1 and stride(~b)==1) {CTXT->trsm(char{swap(a_side)}, static_cast(+a_fill), 'N', static_cast(a_diag), size(~b), size( b), alpha , base(a) , stride( a), base(b) , stride( b));} - else if(stride( a)==1 and stride(~b)==1) {CTXT->trsm(char{swap(a_side)}, static_cast(-a_fill), 'T', static_cast(a_diag), size(~b), size( b), alpha , base(a) , stride(~a), base(b) , stride( b));} - else if(stride(~a)==1 and stride( b)==1) {CTXT->trsm(char{ (a_side)}, static_cast(+a_fill), 'T', static_cast(a_diag), size( b), size(~b), alpha , base(a) , stride( a), base(b) , stride(~b));} - else {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - }else if constexpr( is_conjugated{} and not is_conjugated{}){ - if (stride( a)==1 and stride(~b)==1) {CTXT->trsm(char{swap(a_side)}, static_cast(-a_fill), 'C', static_cast(a_diag), size(~b), size( b), alpha , underlying(base(a)), stride(~a), base(b) , stride( b));} - else if(stride(~a)==1 and stride( b)==1) {CTXT->trsm(char{ (a_side)}, static_cast(+a_fill), 'C', static_cast(a_diag), size( b), size(~b), alpha , underlying(base(a)), stride( a), base(b) , stride(~b));} - // else if(stride( a)==1 and stride( b)==1) {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - // else if(stride(~a)==1 and stride(~b)==1) {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - else {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - }else if constexpr(not is_conjugated{} and is_conjugated{}){ - if (stride(~a)==1 and stride( b)==1) {CTXT->trsm(char{ (a_side)}, static_cast(+a_fill), 'C', static_cast(a_diag), size( b), size(~b), conj(alpha), base(a) , stride( a), underlying(base(b)), stride(~b));} - else if(stride( a)==1 and stride(~b)==1) {CTXT->trsm(char{swap(a_side)}, static_cast(-a_fill), 'C', static_cast(a_diag), size(~b), size( b), conj(alpha), base(a) , stride(~a), underlying(base(b)), stride( b));} - // else if(stride(~a)==1 and stride(~b)==1) {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - // else if(stride( a)==1 and stride( b)==1) {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - else {assert(0 && "not implemented in blas");} // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) - }else if constexpr( is_conjugated{} and is_conjugated{}){ - if (stride( a)==1 and stride(~b)==1) {CTXT->trsm(char{swap(a_side)}, static_cast(-a_fill), 'T', static_cast(a_diag), size(~b), size( b), conj(alpha), underlying(base(a)), stride(~a), underlying(base(b)), stride( b));} - else if(stride(~a)==1 and stride( b)==1) {CTXT->trsm(char{ (a_side)}, static_cast(+a_fill), 'T', static_cast(a_diag), size( b), size(~b), conj(alpha), underlying(base(a)), stride( a), underlying(base(b)), stride(~b));} - // else if(stride(~a)==1 and stride(~b)==1) {assert(0 && "not implemented in blas");} - // else if(stride( a)==1 and stride( b)==1) {assert(0 && "not implemented in blas");} - else {assert(0 && "not implemented in blas");} - } - #undef CTXT - } - return std::forward(b); -} - -template -auto trsm(blas::side a_side, blas::filling a_fill, blas::diagonal a_diag, typename A2D::element_type alpha, A2D const& a, B2D&& b) -> decltype(auto) { // NOLINT(readability-identifier-length) BLAS naming - if constexpr(not is_conjugated{}) {return trsm(default_context_of( a.base() ), a_side, a_fill, a_diag, alpha, a, std::forward(b));} - else {return trsm(default_context_of(underlying(a.base())), a_side, a_fill, a_diag, alpha, a, std::forward(b));} -} - -template -auto trsm(Context&& ctxt, blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b) // NOLINT(readability-identifier-length) BLAS naming -->decltype(trsm(std::forward(ctxt), a_side, a_fill, blas::diagonal::general, alpha, a, std::forward(b))) { - return trsm(std::forward(ctxt), a_side, a_fill, blas::diagonal::general, alpha, a, std::forward(b)); } - -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif -template -auto trsm(blas::side a_side, blas::filling a_fill, typename A2D::element_type alpha, A2D const& a, B2D&& b) -> decltype(auto) { // NOLINT(readability-identifier-length) BLAS naming - if constexpr(not is_conjugated{}) {return trsm(default_context_of( a.base() ), a_side, a_fill, alpha, a, std::forward(b));} - else {return trsm(default_context_of(underlying(a.base())), a_side, a_fill, alpha, a, std::forward(b));} -} -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #pragma diagnostic pop - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif - -} // end namespace boost::multi::blas - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/cuda.hpp deleted file mode 100644 index fed35b9310..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda.hpp +++ /dev/null @@ -1,283 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -$CXX $0 -o $0x -lcudart -lboost_unit_test_framework -lboost_timer -ldl&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef MULTI_ADAPTORS_CUDA_HPP -#define MULTI_ADAPTORS_CUDA_HPP - -#include "../memory/adaptors/cuda/allocator.hpp" -#include "../memory/adaptors/cuda/managed/allocator.hpp" -#include "../adaptors/cuda/algorithms/copy.hpp" - -#include "../array.hpp" - -namespace boost{ -namespace multi{ -namespace cuda{ - - template - using allocator = multi::memory::cuda::allocator; - - template using ptr = multi::memory::cuda::ptr; - - template - using array = multi::array>; - - template - using array_ref = multi::array_ref>; - - template - using static_array = multi::static_array>; - -// template auto raw_array_cast(A&& a) -// ->decltype(static_array_cast(a))))>(std::forward(a))){ -// return static_array_cast(a))))>(std::forward(a));} - - template auto raw_array_cast(A&& a) - ->decltype(std::forward(a).template static_array_cast(a))))>()){ - return std::forward(a).template static_array_cast(a))))>();} - - namespace managed{ - template - using allocator = multi::memory::cuda::managed::allocator; - - template using ptr = multi::memory::cuda::managed::ptr; - - template - using array = multi::array>; - - template - using array_ref = multi::array>; - - template - using static_array = multi::array>; - } - -} - -/* -auto copy(const double* first, const double* last, boost::multi::array_iterator, double&> d_first){ - return copy( - boost::multi::array_iterator(first), - boost::multi::array_iterator(last), - d_first - ); -}*/ - -}} - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA adaptor" -#define BOOST_TEST_DYN_LINK -#include -#include - -#include -#include - -template -__attribute__((always_inline)) inline void DoNotOptimize(const T &value) { - asm volatile("" : "+m"(const_cast(value))); -} - -struct watch : private std::chrono::high_resolution_clock{ - std::string label_; time_point start_; - watch(std::string label ="") : label_{label}, start_{now()}{} - ~watch(){ - std::cerr<< label_<<": "<< std::chrono::duration(now() - start_).count() <<" sec"< A(4, 99.); - cuda::array Agpu{A}; - BOOST_REQUIRE( extensions(A) == extensions(Agpu) ); - BOOST_REQUIRE( Agpu == A ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_1d){ - multi::array A(4, 99.); - cuda::array Agpu(4); - BOOST_REQUIRE( extensions(A) == extensions(Agpu) ); - Agpu({0, 4}) = A({0, 4}); - BOOST_REQUIRE( Agpu == A ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_construct_2d){ - multi::array A({4, 6}, 99.); - cuda::array Agpu{A}; - BOOST_REQUIRE( extensions(A) == extensions(Agpu) ); - BOOST_REQUIRE( Agpu == A ); - - A[1][1] = Agpu[1][1]; -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_2d){ - multi::array A({4, 6}); std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 1.); - cuda::array Agpu({4, 6}, 99.); - BOOST_REQUIRE( extensions(A) == extensions(Agpu) ); - Agpu({0, 4}, {1, 6}) = A({0, 4}, {1, 6}); - BOOST_REQUIRE( Agpu != A ); - Agpu = A; - BOOST_REQUIRE( Agpu == A ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_1d_initializer_list){ - cuda::array Bgpu = {1., 2., 3., 4.}; - BOOST_REQUIRE( Bgpu[1] == 2. ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_2d_initializer_list){ - cuda::array Bgpu = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 10., 11., 12.}, - }; - BOOST_REQUIRE( size(Bgpu) == 3 ); - BOOST_REQUIRE( Bgpu[1][1] == 6. ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_2d_initializer_list_bis){ - multi::array A({3, 4}); std::iota(A.data_elements(), A.data_elements() + A.num_elements(), 1.); - - cuda::array Agpu({3, 4}, 99.); - BOOST_REQUIRE( extensions(A) == extensions(Agpu) ); - Agpu({0, 3}, {1, 4}) = A({0, 3}, {1, 4}); - BOOST_REQUIRE( Agpu != A ); - Agpu = A; - BOOST_REQUIRE( Agpu == A ); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_vs_move){ - cuda::array Agpu({30, 100, 100, 100}, 99.); - [&, _ = watch{utf::current_test_case().full_name()+" COPY"}]{ - cuda::array Agpu_cpy = Agpu; - BOOST_REQUIRE( &Agpu_cpy[1][2][3][4] != &Agpu[1][2][3][4] ); - BOOST_REQUIRE( Agpu_cpy[1][2][3][4] == Agpu[1][2][3][4] ); - }(); - [&, _ = watch{utf::current_test_case().full_name()+" MOVE"}]{ - cuda::array Agpu_mov = std::move(Agpu); - BOOST_REQUIRE( Agpu.empty() ); - BOOST_REQUIRE( Agpu_mov.size() == 30 ); - }(); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_copy_vs_move_complex){ - using complex = std::complex; - cuda::array Agpu({30, 100, 100, 100}, 99.); - [&, _ = watch{utf::current_test_case().full_name()+" COPY"}]{ - cuda::array Agpu_cpy = Agpu; - BOOST_REQUIRE( &Agpu_cpy[1][2][3][4] != &Agpu[1][2][3][4] ); - BOOST_REQUIRE( Agpu_cpy[1][2][3][4] == Agpu[1][2][3][4] ); - }(); - [&, _ = watch{utf::current_test_case().full_name()+" MOVE"}]{ - cuda::array Agpu_mov = std::move(Agpu); - BOOST_REQUIRE( Agpu.empty() ); - BOOST_REQUIRE( Agpu_mov.size() == 30 ); - }(); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_managed_double){ - cuda::managed::array A({2,3,4,5}); - cuda::managed::array B({2,3,4,5}, 0.); - cuda::managed::array C({2,3,4,5}, 5.); -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda_managed_ai3){ - using ai3 = std::array; - cuda::managed::array A({2,3,4,5}); // default initialize elements - cuda::managed::array B({2,3,4,5}, ai3{} ); // value initialize elements - cuda::managed::array C({2,3,4,5}, ai3{11, 22, 33} ); // value initialize elements -} - -BOOST_AUTO_TEST_CASE(multi_adaptor_cuda_decay){ - cuda::array A = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {1., 2., 3., 4.} - }; - cuda::array A1 = A[1]; - -// cuda::array A = { -// {1. + 2.*I, 2. + 3.*I, 3. + 4.*I, 4. + 5.*I}, -// {5. + 2.*I, 6. + 3.*I, 7. + 4.*I, 8. + 5.*I}, -// {1. + 1.*I, 2. + 2.*I, 3. + 3.*I, 4. + 4.*I} -// }; -// cuda::array A({4, 5}, 99.); - cuda::array Agpu = A; - -} - -BOOST_AUTO_TEST_CASE(multi_adaptors_cuda){ - - multi::array A({4, 5}, 99.); - cuda::array Agpu = A; - assert( Agpu == A ); - - cuda::managed::array Amng = A; - assert( Amng == Agpu ); - - cuda::array_ref Rgpu(data_elements(Agpu), extensions(Agpu)); - - {std::allocator a = get_allocator(A);} - - { - cuda::ptr p; - using multi::get_allocator; - cuda::allocator a = get_allocator(p); (void)a; - } - { - cuda::managed::ptr p; - using multi::get_allocator; - cuda::managed::allocator a = get_allocator(p); (void)a; - } - { - double* p = nullptr; - using multi::get_allocator; - std::allocator a = get_allocator(p); (void)a; - } - { - multi::array arr; - std::allocator a = get_allocator(arr); - } - { - cuda::array arr; - cuda::allocator a = get_allocator(arr); (void)a; - } - { -// cuda::array arr = 45.; -// BOOST_REQUIRE( arr() == 45. ); - } - { -// cuda::managed::array arr = 45.; -// BOOST_REQUIRE( arr() == 45. ); - } - { - cuda::managed::array arr = {1.2, 3.4, 4.5}; - } - { - using complex = std::complex; - cuda::managed::array a({1000, 1000}, 99.); - BOOST_REQUIRE( size(a) == 1000 ); - cuda::managed::array b; - b = std::move(a); - BOOST_REQUIRE( size(b) == 1000 ); - BOOST_REQUIRE( size(a) == 0 ); - } -} -#endif -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/call.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/call.hpp deleted file mode 100644 index 3b10e644d4..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/call.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP -#define MULTI_ADAPTORS_CUDA_CUBLAS_CALL_HPP - -#include "../cublas/error.hpp" - -#include // cudaDeviceSynchronize - -namespace boost{ -namespace multi::cuda::cublas{ - -template // needs C++17 -void call(Args... args){ - auto e = static_cast(Function(args...)); - if(e != cublas::error::success) throw std::system_error{e, "cannot call function "+ std::string{__PRETTY_FUNCTION__}}; -} - -#define CUBLAS_(F) call - -} -} -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/context.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/context.hpp deleted file mode 100644 index fd43da9291..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/context.hpp +++ /dev/null @@ -1,264 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa -#pragma once - -#include "../../../config/MARK.hpp" -#include "../../../adaptors/cuda/cublas/call.hpp" - -#include "../../../adaptors/blas/traits.hpp" -#include "../../../adaptors/blas/core.hpp" - -#include "../../../memory/adaptors/cuda/ptr.hpp" -#include "../../../memory/adaptors/cuda/managed/ptr.hpp" - -#include - -#include - -namespace boost { -namespace multi::cuda::cublas { - -class operation { - cublasOperation_t impl_; - - public: - explicit operation(char trans) : impl_{[=]{ - switch(trans) { - case 'N': return CUBLAS_OP_N; - case 'T': return CUBLAS_OP_T; - case 'C': return CUBLAS_OP_C; - default : assert(0); - } - return cublasOperation_t{}; - }()} {} - operator cublasOperation_t() const{return impl_;} -}; - -class side { - cublasSideMode_t impl_; - - public: - explicit side(char trans) : impl_{[=] { - switch(trans) { - case 'L': return CUBLAS_SIDE_LEFT; - case 'R': return CUBLAS_SIDE_RIGHT; - } - assert(0); return cublasSideMode_t{}; - }()} {} - operator cublasSideMode_t() const {return impl_;} -}; - -class filling { - cublasFillMode_t impl_; - - public: - explicit filling(char trans) : impl_{[=] { - switch(trans) { - case 'L': return CUBLAS_FILL_MODE_LOWER; - case 'U': return CUBLAS_FILL_MODE_UPPER; - } - assert(0); return cublasFillMode_t{}; - }()} {} - operator cublasFillMode_t() const {return impl_;} -}; - -class diagonal { - cublasDiagType_t impl_; - - public: - explicit diagonal(char trans) : impl_{[=] { - switch(trans) { - case 'N': return CUBLAS_DIAG_NON_UNIT; - case 'U': return CUBLAS_DIAG_UNIT; - } - assert(0); return cublasDiagType_t{}; - }()} {} - operator cublasDiagType_t() const {return impl_;} -}; - -using blas::is_z; -using blas::is_d; -using std::is_assignable; -using std::is_assignable_v; -using std::is_convertible_v; - -class context : private std::unique_ptr, decltype(&cublasDestroy)> { - using pimpl_t = std::unique_ptr, decltype(&cublasDestroy)>; - cudaStream_t stream() const {cudaStream_t streamId; cublas::call(this->get(), &streamId); return streamId;} - template - void sync_call(Args... args) { - call(this->get(), args...); - this->synchronize(); - } - - public: - using pimpl_t::get; - static context& get_instance() { - thread_local context ctxt; - return ctxt; - }; - context() : pimpl_t{[] {cublasHandle_t h; cublasCreate(&h); return h;}(), &cublasDestroy} {} - using ssize_t = int; - static int version() {int ret; cublas::call(nullptr, &ret); return ret;} - void synchronize() { - cudaError_t e = cudaDeviceSynchronize(); - //cudaError_t e = cudaStreamSynchronize(stream()); - if(e != cudaSuccess) {throw std::runtime_error{"cannot synchronize stream in cublas context"};} - } - template::element_type, class YP, class Y = typename std::pointer_traits::element_type, - std::enable_if_t{} and is_d{}, int> = 0 - // std::enable_if_t{} and is_d{} and is_assignable{} and is_convertible_v> and is_convertible_v>, int> = 0 - > - void axpy(ssize_t n, ALPHA const* alpha, XP x, ssize_t incx, YP y, ssize_t incy) { - sync_call( - n, - (double const*)alpha, // TODO(correaa) use static_cast - (double const*)raw_pointer_cast(x), incx, - (double*)raw_pointer_cast(y), incy - ); - } - - template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, class BETA, class CCP, class CC = typename std::pointer_traits::element_type, - std::enable_if_t< - is_z{} and is_z{} and is_z{} and is_z{} and is_z{} and is_assignable{} and - std::is_convertible_v> and std::is_convertible_v> and std::is_convertible_v> - ,int> =0 - > - void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { - MULTI_MARK_SCOPE("cublasZgemm"); - sync_call(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (cuDoubleComplex const*)alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (cuDoubleComplex const*)raw_pointer_cast(bb), ldb, (cuDoubleComplex const*)beta, (cuDoubleComplex*)raw_pointer_cast(cc), ldc); - } - template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, class BETA, class CCP, class CC = typename std::pointer_traits::element_type, - std::enable_if_t< - is_d{} and is_d{} and is_d{} and is_assignable{} and - std::is_convertible_v> and std::is_convertible_v> and std::is_convertible_v> - ,int> =0 - > - void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { - MULTI_MARK_SCOPE("cublasDgemm"); - sync_call(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (double const*)alpha, (double const*)raw_pointer_cast(aa), lda, (double const*)raw_pointer_cast(bb), ldb, (double const*)beta, (double*)raw_pointer_cast(cc), ldc); - } - - template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, class BETA, class CCP, class CC = typename std::pointer_traits::element_type, - std::enable_if_t< - is_z{} and is_z{} and is_z{} and is_z{} and is_z{} and is_assignable{} and - std::is_convertible_v> and std::is_convertible_v> and std::is_convertible_v> - ,int> =0 - > - void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { - MULTI_MARK_SCOPE("cublasZgemm"); - sync_call(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (cuDoubleComplex const*)alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (cuDoubleComplex const*)raw_pointer_cast(bb), ldb, (cuDoubleComplex const*)beta, (cuDoubleComplex*)raw_pointer_cast(cc), ldc); - } - template::element_type, class BBP, class BB = typename std::pointer_traits::element_type, class BETA, class CCP, class CC = typename std::pointer_traits::element_type, - std::enable_if_t< - is_d{} and is_d{} and is_d{} and is_assignable{} and - std::is_convertible_v> and std::is_convertible_v> and std::is_convertible_v> - ,int> =0 - > - void gemm(char transA, char transB, ssize_t m, ssize_t n, ssize_t k, ALPHA const* alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb, BETA const* beta, CCP cc, ssize_t ldc) { - MULTI_MARK_SCOPE("cublasDgemm"); - sync_call(cublas::operation{transA}, cublas::operation{transB}, m, n, k, (double const*)alpha, (double const*)raw_pointer_cast(aa), lda, (double const*)raw_pointer_cast(bb), ldb, (double const*)beta, (double*)raw_pointer_cast(cc), ldc); - } - - template::element_type, class BBP, class BB = typename pointer_traits::element_type, - std::enable_if_t< - is_z{} and is_z{} and is_assignable{} and is_assignable{} and - is_convertible_v> and is_convertible_v> - ,int> =0 - > - void trsm(char side, char ul, char transA, char diag, ssize_t m, ssize_t n, ALPHA alpha, AAP aa, ssize_t lda, BBP bb, ssize_t ldb) { - sync_call(cublas::side{side}, cublas::filling{ul}, cublas::operation{transA}, cublas::diagonal{diag}, m, n, (cuDoubleComplex const*)&alpha, (cuDoubleComplex const*)raw_pointer_cast(aa), lda, (cuDoubleComplex*)raw_pointer_cast(bb), ldb); - } - - template< - class XXP, class XX = typename std::pointer_traits::element_type, - class YYP, class YY = typename std::pointer_traits::element_type, - class RRP, class RR = typename std::pointer_traits::element_type, - std::enable_if_t< - is_d{} and is_d{} and is_d{} and is_assignable{} and - is_convertible_v> and is_convertible_v> and is_convertible_v - , int> =0 - > - void dot(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { - cublasPointerMode_t mode; - auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS ); - assert( mode == CUBLAS_POINTER_MODE_HOST ); - sync_call(n, raw_pointer_cast(xx), incx, raw_pointer_cast(yy), incy, rr); - } - - template< - class XXP, class XX = typename std::pointer_traits::element_type, - class YYP, class YY = typename std::pointer_traits::element_type, - class RRP, class RR = typename std::pointer_traits::element_type, - std::enable_if_t< - is_d{} and is_d{} and is_d{} and is_assignable{} and - is_convertible_v> and is_convertible_v> and is_convertible_v - , int> =0 - > - void dot(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { - cublasPointerMode_t mode; - auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS ); - assert( mode == CUBLAS_POINTER_MODE_HOST ); - sync_call(n, raw_pointer_cast(xx), incx, raw_pointer_cast(yy), incy, rr); - } - - template< - class XXP, class XX = typename std::pointer_traits::element_type, - class YYP, class YY = typename std::pointer_traits::element_type, - class RRP, class RR = typename std::pointer_traits::element_type, - std::enable_if_t< - is_z{} and is_z{} and is_z{} and is_assignable{} and - is_convertible_v> and is_convertible_v> and is_convertible_v - , int> =0 - > - void dotc(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { - cublasPointerMode_t mode; - auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS ); - assert( mode == CUBLAS_POINTER_MODE_HOST ); - sync_call(n, (cuDoubleComplex const*)raw_pointer_cast(xx), incx, (cuDoubleComplex const*)raw_pointer_cast(yy), incy, (cuDoubleComplex*)rr); - } - - template< - class XXP, class XX = typename std::pointer_traits::element_type, - class YYP, class YY = typename std::pointer_traits::element_type, - class RRP, class RR = typename std::pointer_traits::element_type, - std::enable_if_t< - is_z{} and is_z{} and is_z{} and std::is_assignable_v and - is_convertible_v> and is_convertible_v> and is_convertible_v - , int> =0 - > - void dotc(int n, XXP xx, int incx, YYP yy, int incy, RRP rr) { - cublasPointerMode_t mode; - auto s = cublasGetPointerMode(get(), &mode); assert( s == CUBLAS_STATUS_SUCCESS ); - assert( mode == CUBLAS_POINTER_MODE_HOST ); - sync_call(n, (cuDoubleComplex const*)raw_pointer_cast(xx), incx, (cuDoubleComplex const*)raw_pointer_cast(yy), incy, (cuDoubleComplex*)rr); - } -}; - -} // end namespace multi::cuda::cublas -} // end namespace boost - -namespace boost::multi::blas { - - template<> struct is_context : std::true_type {}; - template<> struct is_context : std::true_type {}; - - template::element_type, std::enable_if_t>{}, int> =0> - boost::multi::cuda::cublas::context* default_context_of(Ptr const&) { - namespace multi = boost::multi; - return &multi::cuda::cublas::context::get_instance(); - } - - template - boost::multi::cuda::cublas::context* default_context_of(boost::multi::memory::cuda::managed::ptr const&) { - namespace multi = boost::multi; - return &multi::cuda::cublas::context::get_instance(); - } - - template - boost::multi::cuda::cublas::context* default_context_of(::thrust::pointer const&) { - namespace multi = boost::multi; - return &multi::cuda::cublas::context::get_instance(); - } - -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/error.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/error.hpp deleted file mode 100644 index 2ed9c57c12..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/error.hpp +++ /dev/null @@ -1,93 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0 cublas-11.0 blas` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit -#endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP -#define MULTI_ADAPTORS_CUDA_CUBLAS_ERROR_HPP - -#include // cublasStatus_t - -#include -#include // std::error_category -#include // std::underlying_type - -namespace boost{ -namespace multi::cuda::cublas{ - -enum class error : typename std::underlying_type::type{ - success = CUBLAS_STATUS_SUCCESS, - not_initialized = CUBLAS_STATUS_NOT_INITIALIZED, - allocation_failed = CUBLAS_STATUS_ALLOC_FAILED, - invalid_value = CUBLAS_STATUS_INVALID_VALUE, - architecture_mismatch = CUBLAS_STATUS_ARCH_MISMATCH, - mapping_error = CUBLAS_STATUS_MAPPING_ERROR, - execution_failed = CUBLAS_STATUS_EXECUTION_FAILED, - internal_error = CUBLAS_STATUS_INTERNAL_ERROR, - not_supported = CUBLAS_STATUS_NOT_SUPPORTED, - license_error = CUBLAS_STATUS_LICENSE_ERROR -}; - -std::string inline error_string(enum cublas::error err){ //https://stackoverflow.com/questions/13041399/equivalent-of-cudageterrorstring-for-cublas - switch(err){ - case cublas::error::success : return "CUBLAS_STATUS_SUCCESS" ; - case cublas::error::not_initialized : return "CUBLAS_STATUS_NOT_INITIALIZED" ; - case cublas::error::allocation_failed : return "CUBLAS_STATUS_ALLOC_FAILED" ; - case cublas::error::invalid_value : return "CUBLAS_STATUS_INVALID_VALUE" ; - case cublas::error::architecture_mismatch: return "CUBLAS_STATUS_ARCH_MISMATCH" ; - case cublas::error::mapping_error : return "CUBLAS_STATUS_MAPPING_ERROR" ; - case cublas::error::execution_failed : return "CUBLAS_STATUS_EXECUTION_FAILED"; - case cublas::error::internal_error : return "CUBLAS_STATUS_INTERNAL_ERROR" ; - case cublas::error::not_supported : return "CUBLAS_STATUS_NOT_SUPPORTED" ; - case cublas::error::license_error : return "CUBLAS_STATUS_LICENSE_ERROR" ; - } - return "cublas status "; -} - -struct error_category : std::error_category{ - char const* name() const noexcept override{return "cublas wrapper";} - std::string message(int err) const override{return error_string(static_cast(err));} - static error_category& instance(){static cublas::error_category instance; return instance;} -}; - -inline std::error_code make_error_code(cublas::error err) noexcept{ - return std::error_code(int(err), cublas::error_category::instance()); -} - -} -} - -namespace std{ - template<> struct is_error_code_enum<::boost::multi::cuda::cublas::error> : true_type{}; -} - -#if not __INCLUDE_LEVEL__ // _TEST_MULTI_ADAPTORS_BLAS_CUDA - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuBLAS" -#define BOOST_TEST_DYN_LINK -#include - -//#include "../../array.hpp" -//#include "../../utility.hpp" - -//#include "../../adaptors/cuda.hpp" -//#include "../../adaptors/blas.hpp" -//#include "../../adaptors/blas/cuda.hpp" - -#include - -namespace multi = boost::multi; - - -BOOST_AUTO_TEST_CASE(multi_cublas_error){ - - BOOST_CHECK_THROW( - throw (std::system_error{multi::cuda::cublas::make_error_code(multi::cuda::cublas::error::not_initialized), "error test"}), - std::system_error - ); - -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/CMakeLists.txt deleted file mode 100644 index cf9da27509..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -cmake_minimum_required(VERSION 3.11) - -set(CMAKE_VERBOSE_MAKEFILE ON) - -#project( -# boost-multi-adaptors-cuda-cublas-test -# VERSION 0.1 -# LANGUAGES CXX CUDA -#) - -find_package(Boost REQUIRED COMPONENTS unit_test_framework) - -find_package(BLAS REQUIRED) -find_path( - BLAS_INCLUDE_DIRS - cblas.h - /usr/include - /usr/local/include - $ENV{BLAS_HOME}/include -) - -link_libraries(${BLAS_LIBRARIES}) -include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -if(ENABLE_CUDA OR DEFINED CXXCUDA) - enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"") -endif() - -find_package(CUDA) - -enable_testing() -list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.17 -include(CTest) - -include_directories(${CMAKE_BINARY_DIR}) - -# file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) -set(TEST_SRCS # herk.cu - gemm.cu -) - -foreach(TEST_FILE ${TEST_SRCS}) - set(TEST_EXE "${TEST_FILE}.x") - add_executable(${TEST_EXE} ${TEST_FILE}) - if(ENABLE_CUDA OR DEFINED CXXCUDA) - set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - target_compile_options(${TEST_EXE} PRIVATE -std=c++17) - endif() - # target_compile_features (${TEST_EXE} PUBLIC cxx_std_17) - target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") - target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) - - target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) - target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - target_include_directories(${TEST_EXE} PRIVATE ${CUDA_INCLUDE_DIRS}) - - target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) - target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) - - target_link_libraries(${TEST_EXE} PRIVATE ${CUDA_LIBRARIES}) - target_include_directories(${TEST_EXE} PRIVATE /opt/nvidia/hpc_sdk/Linux_x86_64/22.3/math_libs/include) - - # if(NOT ENABLE_CUDA) target_compile_options (${TEST_EXE} PRIVATE $<$: -Werror -Wall -Wextra -fno-common -Wpedantic -Wformat-truncation -fstack-usage>#-Wconversion - # $<$,$>: - # -Werror -Wall -Wextra -fno-common -Wpedantic -Wmove> $<$: -Werror -Wall -Wextra -fno-common -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized - # -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings - # -Werror -diag-error:3846 > $<$: /W4>) endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) -endforeach() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/gemm.cu b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/gemm.cu deleted file mode 100644 index 5d5b60b28f..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/cublas/test/gemm.cu +++ /dev/null @@ -1,75 +0,0 @@ -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUBLAS herk" -#define BOOST_TEST_DYN_LINK -#include - -//#include "../../../../adaptors/cuda.hpp" // multi::cuda ns -#include "../../../../adaptors/blas/gemm.hpp" -#include "../../../../adaptors/cuda/cublas.hpp" - -#include "../../../../adaptors/thrust.hpp" -// #include "../../../complex.hpp" - -#include - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_cublas_gemm_double){ - multi::array const a = { - { 1., 3., 4.}, - { 9., 7., 1.} - }; -// multi::thrust::cuda::array const a_gpu = a; -} - -BOOST_AUTO_TEST_CASE(multi_cublas_gemm_complex){ - using complex = std::complex; complex const I{0, 1}; - multi::array const a = { - { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, - { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} - }; -// multi::thrust::cuda::array const a_gpu = a; -} - -//BOOST_AUTO_TEST_CASE(multi_cublas_gemm_thrust_complex){ -// using complex = thrust::complex; complex const I{0, 1}; -// multi::array const a = { -// { 1. + 3.*I, 3.- 2.*I, 4.+ 1.*I}, -// { 9. + 1.*I, 7.- 8.*I, 1.- 3.*I} -// }; -//// multi::thrust::cuda::array const a_gpu = a; -//} - -BOOST_AUTO_TEST_CASE(multi_cublas_gemm_complex2){ -// using complex = std::complex; complex const I{0, 1}; -// multi::array const a = { -// {1. + 2.*I, 5. + 2.*I}, -// {9. - 1.*I, 9. + 1.*I}, -// {1. + 1.*I, 2. + 2.*I} -// }; -// multi::array const b = { -// { 11. - 2.*I, 5. + 2.*I}, -// { 7. - 3.*I, 2. + 1.*I}, -// { 8. - 1.*I, 1. + 1.*I} -// }; -//// multi::thrust::cuda::array const a_gpu = a; -//// multi::thrust::cuda::array const b_gpu = b; -// namespace blas = multi::blas; -// { -// multi::array c({3, 3}, 9999.); -// // blas::gemm(1., a, blas::H(b), 0., c); - -// // multi::thrust::cuda::array const c_gpu; -// // blas::gemm(1., a_gpu, b_gpu, c_gpu); -// // BOOST_REQUIRE( c == c_gpu ); -// } -// { -// multi::array c({3, 3}, 9999.); -// blas::herk(1., blas::H(a), c); -// BOOST_REQUIRE( c[2][1] == complex(41, +2) ); -// BOOST_REQUIRE( c[1][2] == complex(41, -2) ); - -// multi::array const c_copy = blas::herk(1., blas::H(a)); -// BOOST_REQUIRE( c_copy == c ); -// } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/test/array.cu b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/test/array.cu deleted file mode 100644 index 61fd434503..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/test/array.cu +++ /dev/null @@ -1,8 +0,0 @@ -#include "../../adaptors/thrust.hpp" - -int main(){ - - - -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/tests/array.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/cuda/tests/array.cpp deleted file mode 100644 index bf3dcd7bbd..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cuda/tests/array.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS//-*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*- -$CXX $0 -o $0x -lcudart -lboost_timer -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuda adaptor" -#define BOOST_TEST_DYN_LINK -#include -namespace utf = boost::unit_test; -#include - -#include "../../../adaptors/cuda.hpp" - -#include - -namespace boost::multi::memory::cuda{ -template -void copy(array_iterator>, array_iterator>, array_iterator>){ - assert(0); -} -//std::copy, boost::multi::memory::cuda::ref >, boost::multi::array_iterator, boost::multi::memory::cuda::ref > > -} - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(cudart_double, *utf::tolerance(0.00001)*utf::timeout(10)){ - - auto const in = []{ - multi::array r({32, 90, 98, 96}); - std::generate(data_elements(r), data_elements(r)+num_elements(r), &std::rand); - return r; - }(); - std::cout<<"memory size "<< in.num_elements()*sizeof(decltype(in)::element)/1e6 <<" MB\n"; - - { - boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"}; - multi::cuda::array const in_gpu = in; - - multi::array const in_cpy = in_gpu; - BOOST_REQUIRE( in == in_cpy ); - } - { - boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"}; - multi::cuda::array const in_gpu = in; - } - { - multi::cuda::array const in_gpu = in; - multi::cuda::array out_gpu = in; - boost::timer::auto_cpu_timer t{"copy assign gpu____ %ws wall, CPU (%p%)\n"}; - out_gpu = in_gpu; - auto c = static_cast(out_gpu[1][2][3][4]); (void)c; - - (out_gpu << 1) = (in_gpu << 1); - } - { - multi::cuda::managed::array const in_mng = in; - multi::cuda::managed::array out_mng = in; - { - boost::timer::auto_cpu_timer t{"copy assign mng____ %ws wall, CPU (%p%)\n"}; - out_mng = in_mng; - auto c = static_cast(out_mng[1][2][3][4]); (void)c; - } - { - boost::timer::auto_cpu_timer t{"copy assign mng_hot %ws wall, CPU (%p%)\n"}; - out_mng = in_mng; - auto c = static_cast(out_mng[1][2][3][4]); (void)c; - } - { - boost::timer::auto_cpu_timer t{"copy assign mng loop %ws wall, CPU (%p%)\n"}; - out_mng() = in_mng(); - auto c = static_cast(out_mng[1][2][3][4]); (void)c; - } - } -} - -BOOST_AUTO_TEST_CASE(cudart_complex, *utf::tolerance(0.00001)*utf::timeout(10)){ - - using complex = std::complex; - - auto const in = []{ - multi::array r({32, 90, 98, 96}); - std::generate(data_elements(r), data_elements(r)+num_elements(r), &std::rand); - return r; - }(); - std::cout<<"memory size "<< in.num_elements()*sizeof(decltype(in)::element)/1e6 <<" MB\n"; - - { - boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"}; - multi::cuda::array const in_gpu = in; - } - { - boost::timer::auto_cpu_timer t{"%ws wall, CPU (%p%)\n"}; - multi::cuda::array const in_gpu = in; - } - -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cufft.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/cufft.hpp deleted file mode 100644 index 47ab4cb607..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cufft.hpp +++ /dev/null @@ -1,451 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_CUFFTW_HPP -#define MULTI_ADAPTORS_CUFFTW_HPP - -#include "../config/MARK.hpp" - -#include "../adaptors/../utility.hpp" -#include "../adaptors/../array.hpp" -#include "../adaptors/../config/NODISCARD.hpp" - -#include "../adaptors/cuda.hpp" - -#include - -#include // std::apply -#include - -#include - -#include "../complex.hpp" - -#include - -namespace boost{ -namespace multi{ -namespace cufft{ - -class sign { - int impl_; - - public: - sign() = default; - constexpr sign(int i) : impl_{i} {} - constexpr operator int() const {return impl_;} -}; - -constexpr sign forward{CUFFT_FORWARD}; -constexpr sign none{0}; -constexpr sign backward{CUFFT_INVERSE}; - -static_assert(forward != none and none != backward and backward != forward, "!"); - -class plan { - using complex_type = cufftDoubleComplex; - complex_type const* idata_ = nullptr; - complex_type* odata_ = nullptr; - int direction_ = 0; - cufftHandle h_; - -public: - template - static - std::map, std::array, int, int, std::array, int, int, int>, cufftHandle>& - cache() { - static std::map, std::array, int, int, std::array, int, int, int>, cufftHandle> cache_; - return cache_; - } - - private: - plan() = default; - plan(plan const&) = delete; - plan(plan&& other) - : idata_{std::exchange(other.idata_, nullptr)} - , odata_{std::exchange(other.odata_, nullptr)} - , direction_{std::exchange(other.direction_, 0)} - , h_{std::exchange(other.h_, {})} - {} // needed in <=C++14 for return - void ExecZ2Z(complex_type const* idata, complex_type* odata, int direction) const{ - ++tl_execute_count; - // assert(idata_ and odata_); - // assert(direction_!=0); - cufftResult r = ::cufftExecZ2Z(h_, const_cast(idata), odata, direction); - switch(r){ - case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan." - case CUFFT_INVALID_PLAN : throw std::runtime_error{"The plan parameter is not a valid handle."}; - // case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."}; - // case CUFFT_INVALID_TYPE : throw std::runtime_error{"The user requests an unsupported type."}; - case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."}; - case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."}; - case CUFFT_EXEC_FAILED : throw std::runtime_error{"CUFFT failed to execute an FFT on the GPU."}; - case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."}; - // case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."}; - // case CUFFT_UNALIGNED_DATA : throw std::runtime_error{"Unaligned data."}; - // case CUFFT_INCOMPLETE_PARAMETER_LIST: throw std::runtime_error{"Incomplete parameter list."}; - // case CUFFT_INVALID_DEVICE : throw std::runtime_error{"Invalid device."}; - // case CUFFT_PARSE_ERROR : throw std::runtime_error{"Parse error."}; - // case CUFFT_NO_WORKSPACE : throw std::runtime_error{"No workspace."}; - // case CUFFT_NOT_IMPLEMENTED: throw std::runtime_error{"Not implemented."}; - // case CUFFT_LICENSE_ERROR : throw std::runtime_error{"License error."}; - // case CUFFT_NOT_SUPPORTED : throw std::runtime_error{"CUFFT_NOT_SUPPORTED"}; - default : throw std::runtime_error{"cufftExecZ2Z unknown error"}; - } - cudaDeviceSynchronize(); - } - void swap(plan& other) { - using std::swap; - swap(idata_, other.idata_); - swap(odata_, other.odata_); - swap(direction_, other.direction_); - swap(h_, other.h_); - } - - public: - thread_local static int tl_execute_count; - plan& operator=(plan other) {swap(other); return *this;} - void operator()() const {ExecZ2Z(idata_, odata_, direction_);} - template - O&& execute_dft(I&& i, O&& o, int direction) const { - ExecZ2Z( - const_cast(reinterpret_cast(base(i))), - const_cast(reinterpret_cast(base(o))), - direction - ); - return std::forward(o); - } - template - void execute_dft(I&& i, O&& o) const{execute_dft(std::forward(i), std::forward(o), direction_);} - ~plan() { - MULTI_MARK_SCOPE("cufft plan dtor"); - // if(h_) cufftDestroy(h_); - } - using size_type = int; - using ssize_type = int; - - template =0, - dimensionality_type D = I::dimensionality, - typename = decltype(raw_pointer_cast(base(std::declval())), reinterpret_cast(raw_pointer_cast(base(std::declval())))) - > - plan(I const& i, O&& o, sign s) - : idata_{ reinterpret_cast(raw_pointer_cast(base(i))) } - , odata_{const_cast(reinterpret_cast(raw_pointer_cast(base(o))))} - , direction_{s} - { - MULTI_MARK_SCOPE("cufft plan ctor"); - - assert( I::dimensionality < 4 ); - assert( CUFFT_FORWARD == s or CUFFT_INVERSE == s or s == 0 ); - assert( sizes(i) == sizes(o) ); - - auto ion = std::apply([](auto... t){return std::array< size_type, D>{static_cast< size_type>(t)...};}, sizes (i)); - auto istrides = std::apply([](auto... t){return std::array{static_cast(t)...};}, strides(i)); - auto ostrides = std::apply([](auto... t){return std::array{static_cast(t)...};}, strides(o)); - - std::array, I::dimensionality> ssn; - for(std::size_t i = 0; i != ssn.size(); ++i) {ssn[i] = std::make_tuple(istrides[i], ostrides[i], ion[i]);} - std::sort(ssn.begin(), ssn.end(), std::greater<>{}); - - for(std::size_t i = 0; i != ssn.size(); ++i) { - istrides[i] = std::get<0>(ssn[i]); - ostrides[i] = std::get<1>(ssn[i]); - ion[i] = std::get<2>(ssn[i]); - }// = std::tuple(istrides[i], ostrides[i], ion[i]); - - int istride = istrides.back(); - auto inembed = istrides; inembed.fill(0); - int ostride = ostrides.back(); - auto onembed = ostrides; onembed.fill(0); - for(std::size_t i = 1; i != onembed.size(); ++i) { - assert(ostrides[i-1] >= ostrides[i]); // otherwise ordering is incompatible - assert(ostrides[i-1]%ostrides[i]==0); - onembed[i]=ostrides[i-1]/ostrides[i]; // assert( onembed[i] <= ion[i] ); - assert(istrides[i-1]%istrides[i]==0); - inembed[i]=istrides[i-1]/istrides[i]; // assert( inembed[i] <= ion[i] ); - } - - direction_ = s; - idata_ = reinterpret_cast(raw_pointer_cast(base(i))) ; - odata_ = const_cast(reinterpret_cast(raw_pointer_cast(base(o)))); - - auto it = cache().find(std::make_tuple(ion, inembed, istride, 1, onembed, ostride, 1, 1)); - if(it != cache().end()) { - h_ = it->second; - }else{ - switch(::cufftPlanMany( - /*cufftHandle *plan*/ &h_, - /*int rank*/ ion.size(), - /*int *n*/ ion.data(), // /*NX*/ last - first, - /*int *inembed*/ inembed.data(), - /*int istride*/ istride, - /*int idist*/ 1, //stride(first), - /*int *onembed*/ onembed.data(), - /*int ostride*/ ostride, - /*int odist*/ 1, //stride(d_first), - /*cufftType type*/ CUFFT_Z2Z, - /*int batch*/ 1 //BATCH - )) { - case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan." - case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."}; - case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."}; - case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."}; - case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."}; - case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."}; - default : throw std::runtime_error{"cufftPlanMany unknown error"}; - } - cache().insert(std::make_pair(std::make_tuple(ion, inembed, istride, 1, onembed, ostride, 1, 1), h_)); - } - assert(h_); - } -#ifndef __INTEL_COMPILER - template - static auto many(It1 first, It1 last, It2 d_first, int sign = 0, unsigned = 0) - ->std::decay_t(reinterpret_cast(raw_pointer_cast(base(d_first)))), std::declval())> -#else - template(reinterpret_cast(raw_pointer_cast(It2{}.base()))) - > - static auto many(It1 first, It1 last, It2 d_first, int sign = 0, unsigned = 0) -#endif - { - MULTI_MARK_SCOPE("cufft plan many factory"); - - assert( CUFFT_FORWARD == sign or CUFFT_INVERSE == sign or sign == 0 ); - assert( sizes(*first) == sizes(*d_first) ); - - auto ion = std::apply([](auto... t){return std::array< size_type, D>{static_cast< size_type>(t)...};}, sizes (* first)); - - assert(strides(*first) == strides(*last)); - auto istrides = std::apply([](auto... t){return std::array{static_cast(t)...};}, strides(* first)); - auto ostrides = std::apply([](auto... t){return std::array{static_cast(t)...};}, strides(*d_first)); - - std::array, std::decay_t::dimensionality> ssn; - for(std::size_t i = 0; i != ssn.size(); ++i) ssn[i] = std::make_tuple(istrides[i], ostrides[i], ion[i]); - std::sort(ssn.begin(), ssn.end(), std::greater<>{}); - - for(std::size_t i = 0; i != ssn.size(); ++i){ - istrides[i] = std::get<0>(ssn[i]); - ostrides[i] = std::get<1>(ssn[i]); - ion[i] = std::get<2>(ssn[i]); - } - - int istride = istrides.back(); - auto inembed = istrides; inembed.fill(0); - int ostride = ostrides.back(); - auto onembed = ostrides; onembed.fill(0); - for(std::size_t i = 1; i != onembed.size(); ++i) { - assert(ostrides[i-1] >= ostrides[i]); // otherwise ordering is incompatible - assert(ostrides[i-1]%ostrides[i]==0); - onembed[i]=ostrides[i-1]/ostrides[i]; // assert( onembed[i] <= ion[i] ); - assert(istrides[i-1]%istrides[i]==0); - inembed[i]=istrides[i-1]/istrides[i]; // assert( inembed[i] <= ion[i] ); - } - - plan ret; - ret.direction_ = sign; - ret.idata_ = reinterpret_cast(raw_pointer_cast( first.base())) ; - ret.odata_ = const_cast(reinterpret_cast(raw_pointer_cast(d_first.base()))); - - auto it = cache().find(std::make_tuple(ion, inembed, istride, stride(first), onembed, ostride, stride(d_first), last - first)); - if(it != cache().end()) { - ret.h_ = it->second; - } else { - switch(::cufftPlanMany( - /*cufftHandle *plan*/ &ret.h_, - /*int rank*/ ion.size(), - /*int *n*/ ion.data(), // /*NX*/ last - first, - /*int *inembed*/ inembed.data(), - /*int istride*/ istride, - /*int idist*/ stride(first), - /*int *onembed*/ onembed.data(), - /*int ostride*/ ostride, - /*int odist*/ stride(d_first), - /*cufftType type*/ CUFFT_Z2Z, - /*int batch*/ last - first //BATCH - )) { - case CUFFT_SUCCESS : break;// "cuFFT successfully executed the FFT plan." - // case CUFFT_INVALID_PLAN : throw std::runtime_error{"The plan parameter is not a valid handle."}; - case CUFFT_ALLOC_FAILED : throw std::runtime_error{"CUFFT failed to allocate GPU memory."}; - // case CUFFT_INVALID_TYPE : throw std::runtime_error{"The user requests an unsupported type."}; - case CUFFT_INVALID_VALUE : throw std::runtime_error{"At least one of the parameters idata, odata, and direction is not valid."}; - case CUFFT_INTERNAL_ERROR : throw std::runtime_error{"Used for all internal driver errors."}; - // case CUFFT_EXEC_FAILED : throw std::runtime_error{"CUFFT failed to execute an FFT on the GPU."}; - case CUFFT_SETUP_FAILED : throw std::runtime_error{"The cuFFT library failed to initialize."}; - case CUFFT_INVALID_SIZE : throw std::runtime_error{"The user specifies an unsupported FFT size."}; - // case CUFFT_UNALIGNED_DATA : throw std::runtime_error{"Unaligned data."}; - // case CUFFT_INCOMPLETE_PARAMETER_LIST: throw std::runtime_error{"Incomplete parameter list."}; - // case CUFFT_INVALID_DEVICE : throw std::runtime_error{"Invalid device."}; - // case CUFFT_PARSE_ERROR : throw std::runtime_error{"Parse error."}; - // case CUFFT_NO_WORKSPACE : throw std::runtime_error{"No workspace."}; - // case CUFFT_NOT_IMPLEMENTED: throw std::runtime_error{"Not implemented."}; - // case CUFFT_LICENSE_ERROR : throw std::runtime_error{"License error."}; - // case CUFFT_NOT_SUPPORTED : throw std::runtime_error{"CUFFT_NOT_SUPPORTED"}; - default : throw std::logic_error{"cufftPlanMany unknown error"}; - } - cache().insert(std::make_pair(std::make_tuple(ion, inembed, istride, stride(first), onembed, ostride, stride(d_first), last - first), ret.h_)); - } - assert(ret.h_); - return ret; - } -}; - -thread_local int plan::tl_execute_count = 0; - -template -auto dft(In const& i, Out&& o, int s) -->decltype(cufft::plan{i, o, s}(), std::forward(o)) { - return cufft::plan{i, o, s}(), std::forward(o); } - -template()))>> -NODISCARD("when first argument is const") -R dft(In const& i, int s) { - static_assert(std::is_trivially_default_constructible{}, "!"); - R ret(extensions(i), get_allocator(i)); - cufft::dft(i, ret, s); - if(cudaDeviceSynchronize() != cudaSuccess) throw std::runtime_error{"Cuda error: Failed to synchronize"}; - return ret; -} - -#ifndef __INTEL_COMPILER -template -auto many_dft(It1 first, It1 last, It2 d_first, sign s) -->decltype(plan::many(first, last, d_first, s)(), d_first + (last - first)) { - return plan::many(first, last, d_first, s)(), d_first + (last - first); } -#else -template -auto many_dft(It1 first, It1 last, It2 d_first, sign s) -->decltype(plan::many(first, last, d_first, s)(), d_first + (last - first)) { - return plan::many(first, last, d_first, s)(), d_first + (last - first); } -#endif - -template = 0> -Out&& dft(std::array which, In const& i, Out&& o, int s) { - if(which[0]) return cufft::dft(i, std::forward(o), s); - else return std::forward(std::forward(o) = i); -} - -template -constexpr auto array_tail_impl(Array const& t, std::index_sequence) { - return std::array{} - 1>{std::get(t)...}; -} - -template -constexpr auto array_tail(Array const& t) -->decltype(array_tail_impl(t, std::make_index_sequence{} - 1>())) { - return array_tail_impl(t, std::make_index_sequence{} - 1>()); } - -template1), int> = 0> -auto dft(std::array which, In const& i, Out&& o, int s) -->decltype(many_dft(i.begin(), i.end(), o.begin(), s),std::forward(o)) -{ - assert(extension(i) == extension(o)); - auto ff = std::find(begin(which)+1, end(which), false); - if(which[0] == true) { - if(ff==end(which)) {cufft::dft(i, std::forward(o), s);} - else { - auto const n = ff - which.begin(); - std::rotate(begin(which), ff, end(which)); - // TODO(correaa) : make this more elegant - switch(n) { - case 0: dft(which, i , o , s); break; - case 1: dft(which, i.rotated() , o.rotated() , s); break; - case 2: dft(which, i.rotated().rotated() , o.rotated().rotated() , s); break; - case 3: dft(which, i.rotated().rotated().rotated(), o.rotated().rotated().rotated(), s); break; - default: assert(0); - } - } - } else if(which[0]==false) { - if(D==1 or std::none_of(begin(which)+1, end(which), [](auto e){return e;})){ - if(base(o) != base(i)) std::forward(o) = i; - else if(o.layout() != i.layout()) std::forward(o) = +i; - } - else if(ff==end(which)) many_dft(i.begin(), i.end(), o.begin(), s); - else{ - std::array tail = array_tail(which); - if(which[1] == false and i.is_flattable() and o.is_flattable()) cufft::dft(tail, i.flatted(), o.flatted(), s); - else{ - auto d_min = 0; auto n_min = size(i); - for(auto d = 0; d != D - 1; ++d) { - switch(d) { - case 0: if( (size(i ) < n_min) and (tail[d] == false)) {n_min = size(i ); d_min = d;} break; - case 1: if( (size(i.rotated() ) < n_min) and (tail[d] == false)) {n_min = size(i.rotated() ); d_min = d;} break; - case 2: if( (size(i.rotated().rotated() ) < n_min) and (tail[d] == false)) {n_min = size(i.rotated().rotated() ); d_min = d;} break; - case 3: if( (size(i.rotated().rotated().rotated()) < n_min) and (tail[d] == false)) {n_min = size(i.rotated().rotated().rotated()); d_min = d;} break; - default: assert(0); - } - // if((size(i<(o); -} - -template -NODISCARD("when passing a const argument") -auto dft(std::array which, In const& i, int sign)->std::decay_t{return -dft(which, i, typename In::decay_type(extensions(i), get_allocator(i)), sign);} - -template -auto dft(std::array which, In&& i, int sign) -->decltype(dft(which, i, i, sign), std::forward(i)){ - return dft(which, i, i, sign), std::forward(i);} - -//template auto dft_forward(A&&... a) -//->decltype(cufft::dft(std::forward(a)..., cufft::forward)){ -// return cufft::dft(std::forward(a)..., cufft::forward);} - -template NODISCARD("when passing a const argument") -auto dft_forward(Array arr, A const& a) -->decltype(cufft::dft(arr, a, cufft::forward)){ - return cufft::dft(arr, a, cufft::forward);} - -template NODISCARD("when passing a const argument") -auto dft_forward(Array arr, multi::cuda::array, D>&& a) -->decltype(cufft::dft(arr, a, cufft::forward), multi::cuda::array, D>{}){//assert(0); - return cufft::dft(arr, a, cufft::forward), std::move(a);} - -template NODISCARD("when passing a const argument") -auto dft_forward(A const& a) -->decltype(cufft::dft(a, cufft::forward)){ - return cufft::dft(a, cufft::forward);} - -template auto dft_backward(A&&... a) -->decltype(cufft::dft(std::forward(a)..., cufft::backward)){ - return cufft::dft(std::forward(a)..., cufft::backward);} - -template NODISCARD("when passing a const argument") -auto dft_backward(Array arr, A const& a) -->decltype(cufft::dft(arr, a, cufft::backward)){ - return cufft::dft(arr, a, cufft::backward);} - -template NODISCARD("when passing a const argument") -auto dft_backward(A const& a) -->decltype(cufft::dft(a, cufft::backward)){ - return cufft::dft(a, cufft::backward);} - -} - -}} -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/cufft.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/cufft.cpp deleted file mode 100644 index 88af65cfe6..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/cufft/test/cufft.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*- -$CXX $0 -o $0x -lcudart -lcufft `pkg-config --libs fftw3` -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020-2021 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuFFT adaptor" -#define BOOST_TEST_DYN_LINK -#include - -#include - -#include "../../../adaptors/cuda.hpp" -#include "../../../adaptors/fftw.hpp" -#include "../../../adaptors/cufft.hpp" - -#include -#include -#include "../../../complex.hpp" - -#include // cudaDeviceSynchronize - -#include - -namespace multi = boost::multi; -using complex = std::complex; -namespace utf = boost::unit_test; - - -template -__attribute__((always_inline)) inline void DoNotOptimize(const T &value) { - asm volatile("" : "+m"(const_cast(value))); -} - -struct watch : private std::chrono::high_resolution_clock{ - std::string label_; time_point start_; - watch(std::string label ="") : label_{label}, start_{}{ - cudaDeviceSynchronize(); - start_ = now(); - } - ~watch(){ - cudaDeviceSynchronize(); - auto const count = std::chrono::duration(now() - start_).count(); - std::cerr<< label_<<": "<< count <<" sec"<>::const_void_pointer cvp1 = nullptr; -// std::allocator_traits>>::const_void_pointer cvp2 = nullptr; - - multi::array const in_cpu = { - { 1. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - multi::array fw_cpu(extensions(in_cpu)); - multi::fftw::dft(in_cpu, fw_cpu, multi::fftw::forward); - - multi::cuda::array const in_gpu = in_cpu; - multi::cuda::array fw_gpu(extensions(in_gpu)); - multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward); - - BOOST_TEST( std::imag(static_cast(fw_gpu[3][2]) - fw_cpu[3][2]) == 0. ); - - auto fw2_gpu = multi::cufft::dft(in_gpu, multi::cufft::forward); - BOOST_TEST( std::imag(static_cast(fw2_gpu[3][1]) - fw_cpu[3][1]) == 0. ); - - multi::cuda::managed::array const in_mng = in_cpu; - multi::cuda::managed::array fw_mng(extensions(in_gpu)); - multi::cufft::dft(in_mng, fw_mng, multi::cufft::forward); - - BOOST_TEST( std::imag(fw_mng[3][2] - fw_cpu[3][2]) == 0. ); - -// auto fw2_mng = multi::fftw::dft(in_mng, multi::fftw::forward); -// BOOST_TEST( std::imag(fw2_mng[3][1] - fw_cpu[3][1]) == 0. ); - -} - -BOOST_AUTO_TEST_CASE(cufft_3D_timing, *boost::unit_test::tolerance(0.0001)){ - - auto x = std::make_tuple(300, 300, 300); - { - multi::array const in_cpu(x, 10.); - BOOST_ASSERT( in_cpu.num_elements()*sizeof(complex) < 2e9 ); - multi::array fw_cpu(extensions(in_cpu), 99.); - { - // boost::timer::auto_cpu_timer t; // 1.041691s wall, 1.030000s user + 0.000000s system = 1.030000s CPU (98.9%) - multi::fftw::dft(in_cpu, fw_cpu, multi::fftw::forward); - BOOST_TEST( fw_cpu[8][9][10] != 99. ); - } - } - { - multi::cuda::array const in_gpu(x, 10.); - multi::cuda::array fw_gpu(extensions(in_gpu), 99.); - { - // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - multi::cufft::dft(in_gpu, fw_gpu, multi::fftw::forward); - - BOOST_TEST( static_cast(fw_gpu[8][9][10]) != 99. ); - } - } - { - multi::cuda::managed::array const in_gpu(x, 10.); - multi::cuda::managed::array fw_gpu(extensions(in_gpu), 99.); - { - // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward); - // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); - } - { - // boost::timer::auto_cpu_timer t; // 0.208237s wall, 0.200000s user + 0.010000s system = 0.210000s CPU (100.8%) - multi::cufft::dft(in_gpu, fw_gpu, multi::cufft::forward); - // BOOST_TEST( fw_gpu[8][9][10].operator complex() != 99. ); - } - } -} - -BOOST_AUTO_TEST_CASE(cufft_combinations, *utf::tolerance(0.00001)){ - - auto const in = []{ - multi::array ret({32, 90, 98, 96}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - ); - return ret; - }(); - std::clog<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n"; - - multi::cuda::array const in_gpu = in; - multi::cuda::managed::array const in_mng = in; - - using std::clog; - for(auto c : std::vector>{ - {false, true , true , true }, - {false, true , true , false}, - {true , false, false, false}, - {true , true , false, false}, - {false, false, true , false}, - {false, false, false, false}, - }){ - std::clog<<"case "; copy(begin(c), end(c), std::ostream_iterator{std::clog,", "}); std::clog< out = in; - multi::array in_rw = in; - [&, _ = watch{"cpu_opl "}]{ - multi::fftw::dft_forward(c, in, out); - }(); - [&, _ = watch{"cpu_ipl "}]{ - multi::fftw::dft(c, in_rw, multi::fftw::forward); - BOOST_TEST( abs( static_cast>(in_rw[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); - }(); - { - multi::array in_rw2 = in; - [&, _ = watch{"cpu_mov "}]{ - multi::array const out_mov = multi::fftw::dft_forward(c, std::move(in_rw2)); - // what(out_mov); - BOOST_TEST( abs( static_cast>(out_mov[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); - BOOST_REQUIRE( is_empty(in_rw2) ); - BOOST_REQUIRE( extensions(out_mov) == extensions(in) ); - }(); - } - - - [&, _ = watch{"cpu_new "}]{ - auto const out_cpy = multi::fftw::dft_forward(c, in); - BOOST_TEST( abs( static_cast>(out_cpy[5][4][3][1]) - multi::complex(out[5][4][3][1]) ) == 0. ); - }(); - multi::cuda::array out_gpu(extensions(in_gpu)); - [&, _ = watch{"gpu_opl "}]{ - multi::cufft::dft(c, in_gpu , out_gpu, multi::cufft::forward); - BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - }(); - { - multi::cuda::array in_rw_gpu = in_gpu; - [&, _ = watch{"gpu_ipl "}]{ - multi::cufft::dft(c, in_rw_gpu, multi::cufft::forward); - BOOST_TEST( abs( static_cast(in_rw_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - }(); - } - { - multi::cuda::array in_rw_gpu = in_gpu; - [&, _ = watch{"gpu_mov "}]{ - multi::cuda::array const out_mov = multi::cufft::dft_forward(c, std::move(in_rw_gpu)); - // BOOST_REQUIRE( in_rw_gpu.empty() ); - // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - }(); - } - { - multi::cuda::array in_rw_gpu = in_gpu; - [&, _ = watch{"gpu_mov "}]{ - multi::cuda::array out_mov = std::move(in_rw_gpu); - multi::cufft::dft(c, out_mov, multi::cufft::forward); - // BOOST_REQUIRE( in_rw_gpu.empty() ); - // BOOST_TEST( abs( static_cast(out_mov[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - }(); - } - cudaDeviceSynchronize(); - [&, _ = watch{"gpu_new "}]{ - multi::cuda::array const out_cpy = multi::cufft::dft(c, in_gpu, multi::cufft::forward); - }(); - multi::cuda::managed::array out_mng(extensions(in_mng)); - [&, _ = watch{"mng_cld "}]{ - multi::cufft::dft(c, in_mng, out_mng, multi::cufft::forward); - BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - }(); - [&, _ = watch{"mng_hot "}]{ - multi::cufft::dft(c, in_mng , out_mng, multi::cufft::forward); - BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - }(); - [&, _ = watch{"mng_new "}]{ - auto const out_mng = multi::cufft::dft(c, in_mng, multi::cufft::forward); - BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - }(); - } - std::clog<<"cache size " - << multi::cufft::plan::cache<1>().size() <<' ' - << multi::cufft::plan::cache<2>().size() <<' ' - << multi::cufft::plan::cache<3>().size() <<' ' - << multi::cufft::plan::cache<4>().size() <<' ' - < ret({45, 18, 32, 16}); - std::generate( - ret.data_elements(), ret.data_elements() + ret.num_elements(), - [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - ); - return ret; - }(); - - multi::cuda::array const in = in_cpu; - multi::cuda::array out(extensions(in)); - -#if 0 - multi::cufft::many_dft(begin(unrotated(in)), end(unrotated(in)), begin(unrotated(out)), +1); - - multi::array out_cpu(extensions(in)); - multi::fft::many_dft(begin(unrotated(in_cpu)), end(unrotated(in_cpu)), begin(unrotated(out_cpu)), +1); - - BOOST_TEST( imag( static_cast(out[5][4][3][2]) - out_cpu[5][4][3][2]) == 0. ); -#endif -} - -BOOST_AUTO_TEST_CASE(cufft_4D, *utf::tolerance(0.00001) ){ - auto const in = []{ - multi::array ret({10, 10, 10}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [](){return complex{std::rand()*1./RAND_MAX, std::rand()*1./RAND_MAX};} - ); - return ret; - }(); - - multi::array out(extensions(in)); -// multi::fftw::dft({true, false, true}, in, out, multi::fftw::forward); - multi::fftw::many_dft(begin(in<<1), end(in<<1), begin(out<<1), multi::fftw::forward); - - multi::cuda::array in_gpu = in; - multi::cuda::array out_gpu(extensions(in)); - -// multi::cufft::dft({true, false, true}, in_gpu, out_gpu, multi::fft::forward);//multi::cufft::forward); - multi::cufft::many_dft(begin(in_gpu<<1), end(in_gpu<<1), begin(out_gpu<<1), multi::fftw::forward); - BOOST_TEST( imag( static_cast(out_gpu[5][4][3]) - out[5][4][3]) == 0. ); -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fft.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/fft.hpp deleted file mode 100644 index 3b4474c519..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fft.hpp +++ /dev/null @@ -1,131 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -$CXX $0 -o $0x -lcudart -lcufft `pkg-config --libs fftw3` -lboost_timer -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_ADAPTORS_FFT_HPP -#define MULTI_ADAPTORS_FFT_HPP - -#include "../adaptors/fftw.hpp" -#include "../adaptors/cufft.hpp" - -namespace boost{ -namespace multi{ -namespace fft{ - - static constexpr int forward = fftw::forward;//FFTW_FORWARD; - static constexpr int none = 0; - static constexpr int backward = fftw::backward;//FFTW_BACKWARD; - - static_assert( forward != none and none != backward and backward != forward, "!"); - - template struct priority : std::conditional_t>{}; - - template auto dft_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft(std::forward(args)...)) - template auto dft_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft(std::forward(args)...)) - template auto dft(Args&&... args) DECLRETURN(dft_aux_(priority<1>{}, std::forward(args)...)) - - template auto dft(std::array::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_aux_(priority<1>{}, which, std::forward(in), std::forward(args)...)) - - template auto many_dft_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::many_dft(std::forward(args)...)) - template auto many_dft_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::many_dft(std::forward(args)...)) - template auto many_dft(Args&&... args) DECLRETURN(many_dft_aux_(priority<1>{}, std::forward(args)...)) - - template auto dft_forward_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft_forward(std::forward(args)...)) - template auto dft_forward_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft_forward(std::forward(args)...)) - template auto dft_forward(Args&&... args) DECLRETURN(dft_forward_aux_(priority<1>{}, std::forward(args)...)) - template auto dft_forward(std::array::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_forward_aux_(priority<1>{}, which, std::forward(in), std::forward(args)...)) - - template auto dft_backward_aux_(priority<0>, Args&&... args) DECLRETURN( fftw::dft_backward(std::forward(args)...)) - template auto dft_backward_aux_(priority<1>, Args&&... args) DECLRETURN(cufft ::dft_backward(std::forward(args)...)) - template auto dft_backward(Args&&... args) DECLRETURN(dft_backward_aux_(priority<1>{}, std::forward(args)...)) - template auto dft_backward(std::array::dimensionality> which, In&& in, Args&&... args) DECLRETURN(dft_backward_aux_(priority<1>{}, which, std::forward(in), std::forward(args)...)) - -}}} - -#if not __INCLUDE_LEVEL__ - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFT adaptor" -#define BOOST_TEST_DYN_LINK -#include - -#include -#include - -namespace utf = boost::unit_test; - -using complex = std::complex; -namespace multi = boost::multi; - -using std::cout; - -BOOST_AUTO_TEST_CASE(fft_combinations, *utf::tolerance(0.00001)){ - cout<< "# threads is " << multi::fftw::plan::with_nthreads() <<"\n"; - cout<<"=========================================================\n"; - cout<< BOOST_PLATFORM <<' '<< BOOST_COMPILER <<' '<< __DATE__<<'\n'; - - auto const in = []{ - multi::array ret({32, 90, 98, 96}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [](){return complex{std::rand()/1./RAND_MAX, std::rand()/1./RAND_MAX};} - ); - return ret; - }(); - std::cout<<"memory size "<< in.num_elements()*sizeof(complex)/1e6 <<" MB\n"; - - multi::cuda::array const in_gpu = in; - multi::cuda::managed::array const in_mng = in; - - std::vector> cases = { - {false, true , true , true }, - {false, true , true , false}, - {true , false, false, false}, - {true , true , false, false}, - {false, false, true , false}, - {false, false, false, false}, - }; - - for(auto c : cases){ - cout<<"case: "<{cout,", "}); cout<<"\n"; - - multi::array out(extensions(in)); - { - cout<<"flops "<< multi::fftw::plan(c, in, out, multi::fft::forward).flops() <<"\n"; - boost::timer::auto_cpu_timer t{"cpu____ %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in, out, multi::fft::forward); - } - { - boost::timer::auto_cpu_timer t{"cpu_hot %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in, out, multi::fft::forward); - } - multi::cuda::array out_gpu(extensions(in_gpu)); - { - boost::timer::auto_cpu_timer t{"gpu_cld %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in_gpu , out_gpu , multi::fft::forward); - BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - } - { - boost::timer::auto_cpu_timer t{"gpu_hot %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in_gpu , out_gpu , multi::fft::forward); -// BOOST_TEST( abs( static_cast(out_gpu[5][4][3][1]) - out[5][4][3][1] ) == 0. ); - } - multi::cuda::managed::array out_mng(extensions(in_mng)); - { - boost::timer::auto_cpu_timer t{"mng_cld %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in_mng , out_mng , multi::fft::forward); - cudaDeviceSynchronize(); - BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - } - { - /// boost::timer::auto_cpu_timer t{"mng_hot %ws wall, CPU (%p%)\n"}; - multi::fft::dft(c, in_mng() , out_mng() , multi::fft::forward); - cudaDeviceSynchronize(); - BOOST_TEST( abs( out_mng[5][4][3][1] - out[5][4][3][1] ) == 0. ); - } - } - -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw.hpp deleted file mode 100644 index 380c87dc69..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw.hpp +++ /dev/null @@ -1,833 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_FFTW_HPP -#define MULTI_ADAPTORS_FFTW_HPP - -#include "../adaptors/../array.hpp" - -#include "../detail/tuple_zip.hpp" - -#include // sort -#include -#include // accumulate - -#if HAVE_FFTW3_THREADS -#include -#endif - -#include // external fftw3 library - -namespace boost::multi { - -namespace fftw { -// template auto alignment_of(T* p){return ::fftw_alignment_of((double*)p);} -#if __cpp_lib_as_const >= 201510 -using std::as_const; -#else -template constexpr std::add_const_t& as_const(T& t) noexcept{return t;} -#endif - -struct flags { - using underlying_type = decltype(FFTW_PRESERVE_INPUT); // NOLINT(hicpp-signed-bitwise) : macro definition in external library - - private: - underlying_type underlying_; - - public: - constexpr explicit flags(underlying_type underlying) : underlying_{underlying}{} - constexpr explicit operator underlying_type() const{return underlying_;} - friend constexpr auto operator|(flags f1, flags f2){return flags{f1.underlying_ | f2.underlying_};} -}; - - -constexpr flags estimate {FFTW_ESTIMATE }; // NOLINT(hicpp-signed-bitwise) : defined in an external lib 1U << 6 -constexpr flags measure {FFTW_MEASURE }; - -constexpr flags preserve_input{FFTW_PRESERVE_INPUT}; // NOLINT(hicpp-signed-bitwise) : defined in an external lib 1U << 4 -// // NOLINT(): this is a defect in FFTW https://github.com/FFTW/fftw3/issues/246 - -} // end namespace fftw - -#if 0 -template -auto fftw_plan_dft_1d( - Size N, - std::complex const* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ -#ifndef NDEBUG - auto check = in[N/3]; // check that const data will not been overwritten -#endif - assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); - auto ret=::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT ); - assert(check == in[N/3]); // check that const data has not been overwritten - return ret; -} - -template -auto fftw_plan_dft_1d( - Size N, - std::complex* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ - assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); - return ::fftw_plan_dft_1d(N, (fftw_complex*)in, (fftw_complex*)out, sign, flags); -} - -template -auto fftw_plan_dft_2d( - Size N1, Size N2, - std::complex const* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ - assert( fftw::alignment_of(in) == fftw::alignment_of(out) ); -#ifndef NDEBUG - auto check = in[N1*N2/3]; // check that const data will not been overwritten -#endif - auto ret = ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT); - assert( check == in[N1*N2/3] ); // check that const data has not been overwritten - return ret; -} - -template -auto fftw_plan_dft_2d( - Size N1, Size N2, - std::complex* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ - assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); - return ::fftw_plan_dft_2d(N1, N2, (fftw_complex*)in, (fftw_complex*)out, sign, flags); -} - -template -auto fftw_plan_dft_3d( - Size N1, Size N2, Size N3, - std::complex* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ - assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); - return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags); -} -template -auto fftw_plan_dft_3d( - Size N1, Size N2, Size N3, - std::complex const* in, std::complex* out, int sign, - unsigned flags = FFTW_ESTIMATE -){ - assert( flags & FFTW_PRESERVE_INPUT ); - assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); - return ::fftw_plan_dft_3d(N1, N2, N3, (fftw_complex*)in, (fftw_complex*)out, sign, flags | FFTW_PRESERVE_INPUT); -} -#endif - -#if 0 -template -auto fftw_plan_dft( - Rank r, int* ns, - std::complex* in, std::complex* out, - int sign, unsigned flags = FFTW_ESTIMATE -){ - assert(fftw_alignment_of((double*)in) == fftw_alignment_of((double*)out)); - return ::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags); -} -template -auto fftw_plan_dft( - RankType r, int* ns, - std::complex const* in, std::complex* out, - int sign, unsigned flags = FFTW_ESTIMATE | FFTW_PRESERVE_INPUT -){ - assert( flags & FFTW_PRESERVE_INPUT ); - assert(fftw::alignment_of(in) == fftw::alignment_of(out)); -#ifndef NDEBUG - size_t ne = 1; for(RankType i = 0; i != r; ++i) ne*=ns[i]; - auto check = in[ne/3]; // check that const data will not been overwritten -#endif - auto ret=::fftw_plan_dft(r, ns, (fftw_complex*)in, (fftw_complex*)out, sign, flags); - assert(check == in[ne/3]); // check that const data has not been overwritten - return ret; -} -#endif - -#if 0 -template -auto fftw_plan_dft_1d( - In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE -){ - static_assert(in.dimensionality == 1, "!"); assert(size(in) == size(out)); - assert( in.is_compact() ); assert( out.is_compact() ); - return multi::fftw_plan_dft_1d(size(in), data_elements(in), data_elements(out), sign, flags); -} - -template -auto fftw_plan_dft_2d( - In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE -){ - static_assert(in.dimensionality == 2, "!"); assert(in.sizes() == out.sizes()); - assert( in.is_compact() ); assert( out.is_compact() ); - return multi::fftw_plan_dft_2d( - sizes(in)[0], sizes(in)[1], - data_elements(in), data_elements(out), sign, flags - ); -} - -template -auto fftw_plan_dft_3d( - In&& in, Out&& out, int sign, unsigned flags = FFTW_ESTIMATE -){ - static_assert(in.dimensionality == 3, "!"); assert(in.sizes() == out.sizes()); - assert( in.is_compact() ); assert( out.is_compact() ); - return multi::fftw_plan_dft_3d( - sizes(in)[0], sizes(in)[1], sizes(in)[2], - data(in), data(out), - sign, flags - ); -} -#endif - -template -constexpr auto to_array(Tpl const& tpl) { - return std::apply( - [](auto const&... elems) {return std::array::value>{static_cast(elems)...};}, - tpl - ); -} - -#if 0 -#if(__cpp_if_constexpr>=201606) -//https://stackoverflow.com/a/35110453/225186 -template constexpr auto _constx(T&&t) -> std::remove_reference_t{return t;} -#define logic_assert(C, M) \ - if constexpr(noexcept(_constx(C))) static_assert((C), M); else assert((C)&&(M)); -#else -#define logic_assert(ConditioN, MessagE) assert(ConditioN && MessagE); -#endif -#endif - -template< - typename It1, class It2, - std::enable_if_t{} or std::is_convertible*>{}, int> =0 -> -auto fftw_plan_many_dft(It1 first, It1 last, It2 d_first, int sign, fftw::flags flags) --> fftw_plan { - - static_assert( sizeof(*base( first)) == sizeof(real(*base( first))) + sizeof(imag(*base( first))), "input must have complex pod layout"); - static_assert( sizeof(*base( first)) == sizeof(fftw_complex) , "input must have complex pod layout"); - static_assert( sizeof(*base(d_first)) == sizeof(real(*base(d_first))) + sizeof(imag(*base(d_first))), "output must have complex pod layout"); - static_assert( sizeof(*base(d_first)) == sizeof(fftw_complex) , "output must have complex pod layout"); - - assert(strides(*first) == strides(*last)); - assert(sizes(*first)==sizes(*d_first)); - - auto const ssn_tuple = multi::detail::tuple_zip(strides(*first ), strides(*d_first), sizes(*first)); - auto ssn = std::apply([](auto... ssn) { - using boost::multi::detail::get; - return std::array, sizeof...(ssn)>{ - boost::multi::detail::mk_tuple(static_cast(get<0>(ssn)), static_cast(get<1>(ssn)), static_cast(get<2>(ssn)))... - }; - }, ssn_tuple); - std::sort(ssn.begin(), ssn.end(), std::greater<>{}); - - auto const istrides = [&]() { - std::array::rank::value> istrides{}; - using boost::multi::detail::get; - std::transform(ssn.begin(), ssn.end(), istrides.begin(), [](auto elem) {return get<0>(elem);}); - return istrides; - }(); - - auto const ostrides = [&]() { - std::array::rank::value> ostrides{}; - using boost::multi::detail::get; - std::transform(ssn.begin(), ssn.end(), ostrides.begin(), [](auto elem) {return get<1>(elem);}); - return ostrides; - }(); - assert( std::is_sorted(ostrides.begin(), ostrides.end(), std::greater<>{}) ); // otherwise ordering is incompatible - - auto const ion = [&]() { - std::array::rank::value> ion {}; - using boost::multi::detail::get; - std::transform(ssn.begin(), ssn.end(), ion .begin(), [](auto elem) {return get<2>(elem);}); - return ion; - }(); - - auto const inembed = [&]() { - std::array::rank::value + 1> inembed{}; - std::adjacent_difference( - istrides.rbegin(), istrides.rend(), inembed.rbegin(), [](auto alpha, auto omega) {assert(omega != 0 and alpha%omega == 0); return alpha/omega;} - ); - return inembed; - }(); - - auto const onembed = [&]() { - std::array::rank::value + 1> onembed{}; - std::adjacent_difference( - ostrides.rbegin(), ostrides.rend(), onembed.rbegin(), [](auto alpha, auto omega) {assert(omega != 0 and alpha%omega == 0); return alpha/omega;} - ); - return onembed; - }(); - - auto ret = ::fftw_plan_many_dft( - /*int rank */ ion.size(), - /*const int* n */ ion.data(), - /*int howmany */ last - first, - /*fftw_complex* in */ reinterpret_cast(const_cast*>(static_cast const*>(base(first)))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) input data - /*const int* inembed */ inembed.data(), - /*int istride */ istrides.back(), - /*int idist */ stride(first), - /*fftw_complex* out */ reinterpret_cast(static_cast*>(base(d_first))), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) adapt types - /*const int* onembed */ onembed.data(), - /*int ostride */ ostrides.back(), - /*int odist */ stride(d_first), - /*int */ sign, - /*unsigned */ static_cast(flags) - ); - assert(ret); // if you get null here it could be because your library doesn't support this fftw call mode - return ret; -} - -template< - typename It1, class It2, - std::enable_if_t{} or std::is_convertible*>{}, int> = 0 -> -auto fftw_plan_many_dft(It1 first, It1 last, It2 d_first, int sign) -->fftw_plan { - return fftw_plan_many_dft(first, last, d_first, sign, fftw::estimate); -} - -template< - class In, class Out, dimensionality_type D = std::decay_t::rank_v, - class=std::enable_if_t::rank_v>, - class=decltype(reinterpret_cast(/*static_cast *>*/(base(std::declval())))) // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) interact with legacy code -> -auto fftw_plan_dft(std::array which, In&& in, Out&& out, int sign, fftw::flags flags) -> fftw_plan { - static_assert( sizeof(*base(in )) == sizeof((*base(in )).real()) + sizeof((*base(in)).imag()) and sizeof(*base(in)) == sizeof(fftw_complex), - "input must have complex pod layout" ); - static_assert( sizeof(*base(out)) == sizeof((*base(out)).real()) + sizeof((*base(in)).imag()) and sizeof(*base(out)) == sizeof(fftw_complex), - "output must have complex pod layout" ); - - assert(in.sizes() == out.sizes()); - - auto const sizes_tuple = in.sizes(); - auto const istride_tuple = in.strides(); - auto const ostride_tuple = out.strides(); - - using boost::multi::detail::get; - auto which_iodims = std::apply([](auto... elems){ - return std::array, sizeof...(elems)>{ // TODO(correaa) use CTAD? - std::pair{ - get<0>(elems), - fftw_iodim64{get<1>(elems), get<2>(elems), get<3>(elems)} - }... - }; - }, boost::multi::detail::tuple_zip(which, sizes_tuple, istride_tuple, ostride_tuple)); - auto const part = std::stable_partition(which_iodims.begin(), which_iodims.end(), [](auto elem) {return std::get<0>(elem);}); - - std::array dims{}; - auto const dims_end = std::transform(which_iodims.begin(), part, dims.begin(), [](auto elem) {return elem.second;}); - - std::array howmany_dims{}; - auto const howmany_dims_end = std::transform(part, which_iodims.end() , howmany_dims.begin(), [](auto elem) {return elem.second;}); - - assert( in .base() ); - assert( out.base() ); - - assert( in.extensions() == out.extensions() ); - - assert( (sign == -1) or (sign == +1) ); - - fftw_plan ret = fftw_plan_guru64_dft( - /*int rank */ dims_end - dims.begin(), - /*const fftw_iodim64 *dims */ dims.data(), - /*int howmany_rank */ howmany_dims_end - howmany_dims.begin(), - /*const fftw_iodim *howmany_dims */ howmany_dims.data(), - /*fftw_complex *in */ const_cast(reinterpret_cast(/*static_cast const *>*/(in.base()))), // NOLINT(cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-pro-type-reinterpret-cast) FFTW is taken as non-const while it is really not touched - /*fftw_complex *out */ reinterpret_cast(/*static_cast *>*/(out.base())), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - sign, static_cast(flags) // | FFTW_ESTIMATE - ); - - assert(ret &&"fftw lib returned a null plan, if you are using MKL check the limitations of their fftw interface"); - //https://software.intel.com/content/www/us/en/develop/documentation/mkl-developer-reference-c/top/appendix-d-fftw-interface-to-intel-math-kernel-library/fftw3-interface-to-intel-math-kernel-library/using-fftw3-wrappers.html - return ret; -} - -template< - class In, class Out, dimensionality_type D = std::decay_t::rank_v, - class=std::enable_if_t::rank_v>, - class=decltype(reinterpret_cast(/*static_cast *>*/(base(std::declval())))) // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code -> -auto fftw_plan_dft(std::array which, In&& in, Out&& out, int sign) -> fftw_plan{ - return fftw_plan_dft(which, std::forward(in), std::forward(out), sign, fftw::estimate); -} - -template(multi::implicit_cast*>(std::declval())))> // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code -auto fftw_plan_dft(multi::layout_t const& in_layout, PtrIn in_base, multi::layout_t const& out_layout, PtrOut out_base, int dir, fftw::flags flags) { - using multi::sizes; using multi::strides; - - assert( in_layout.sizes() == out_layout.sizes() ); - - auto const dims = std::apply([](auto... elems){ - using boost::multi::detail::get; - return std::array{ - fftw_iodim64{get<0>(elems), get<1>(elems), get<2>(elems)} - ... - }; - }, boost::multi::detail::tuple_zip(in_layout.sizes(), in_layout.strides(), out_layout.strides())); - - auto ret = fftw_plan_guru64_dft( - /*int rank */ dir?D:0, - /*const fftw_iodim64 *dims */ dims.data(), - /*int howmany_rank */ 0, - /*const fftw_iodim *howmany_dims */ nullptr, //howmany_dims.data(), - /*fftw_complex *in */ const_cast(reinterpret_cast( static_cast const*>(in_base ))), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) : interact with legacy code - /*fftw_complex *out */ reinterpret_cast(multi::implicit_cast *>(out_base)) , // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code - dir, static_cast(flags) - ); - assert(ret); - return ret; -} - -template //, typename = decltype(reinterpret_cast(multi::implicit_cast*>(std::declval())))> // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code -auto fftw_plan_dft(multi::layout_t const& in_layout, multi::layout_t const& out_layout, int dir, fftw::flags flags) { - return fftw_plan_dft(in_layout, nullptr, out_layout, nullptr, dir, flags | fftw::estimate); -} - -template(multi::implicit_cast*>(base(std::declval()))))> // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code -auto fftw_plan_dft(In const& in, Out&& out, int dir, fftw::flags flags) { - return fftw_plan_dft(in.layout(), in.base(), out.layout(), out.base(), dir, flags); -} - -template(multi::implicit_cast*>(base(std::declval()))))> // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : interact with legacy code -auto fftw_plan_dft(In const& in, Out&& out, int dir) { - return fftw_plan_dft(in, out, dir, fftw::estimate); -} - -namespace fftw { - -#if HAVE_FFTW3_THREADS -inline void initialize_threads(){int good = fftw_init_threads(); assert(good); (void)good;} -#else -inline void initialize_threads(){} -#endif - -inline void cleanup(){fftw_cleanup();} - -struct environment{ - environment() = default; - environment(environment const&) = delete; - environment(environment&&) = delete; - auto operator=(environment const&) = delete; - auto operator=(environment&&) = delete; - ~environment(){fftw_cleanup();} -}; - -class plan { - plan() : impl_{nullptr, &fftw_destroy_plan} {} - std::unique_ptr, decltype(&fftw_destroy_plan)> impl_; - - public: - plan(plan const&) = delete; - plan(plan&&) = default; - ~plan() = default; - - template()...)) - > - explicit plan(As&&... args) : impl_{fftw_plan_dft(std::forward(args)...), &fftw_destroy_plan} { - assert(impl_); - } - template - static auto many(As&&... args) - ->std::decay_t(args)...) , std::declval())> { - plan ret; ret.impl_.reset(fftw_plan_many_dft(std::forward(args)...)); return ret; // this produces a compilation error in icc++17 - } - -private: - void execute() const {fftw_execute(impl_.get());} //TODO(correaa): remove const - template - void execute_dft(I&& in, O&& out) const { - ::fftw_execute_dft(impl_.get(), const_cast(reinterpret_cast(static_cast const*>(base(in)))), reinterpret_cast(static_cast*>(base(out)))); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) : to interface with legacy fftw - } - template void execute(I&& in, O&& out) const {execute_dft(std::forward(in), std::forward(out));} - friend void execute(plan const& self) {self.execute();} - -public: - auto operator=(plan &&) -> plan& = default; - auto operator=(plan const&) -> plan& = delete; - - template - void operator()(I&& in, O&& out) const {execute(std::forward(in), std::forward(out));} - void operator()() const {execute();} // http://www.fftw.org/fftw3_doc/Thread-safety.html#Thread-safety - - [[nodiscard]] auto cost() const -> double {return fftw_cost(impl_.get());} - [[nodiscard]] auto flops() const { - struct ret_t{ - double add = 0.; - double mul = 0.; - double fma = 0.; - // explicit operator double() const{return add + mul + 2*fma;} - } ret{}; - fftw_flops(impl_.get(), &ret.add, &ret.mul, &ret.fma); - return ret; - } - - //std::string string_print() const{ - // return std::unique_ptr{fftw_sprint_plan(impl_.get())}.get(); - //} - //friend std::ostream& operator<<(std::ostream& os, plan const& p){return os< bool{return false;} - static constexpr auto nthreads() -> bool{return true;} - static constexpr auto with_nthreads() -> int{return 1;} -#endif -}; - -#if HAVE_FFTW3_THREADS -bool plan::is_thread_safe_ = (plan::make_thread_safe(), true); -int plan::nthreads_ = (initialize_threads(), with_nthreads()); -#endif - -enum sign : decltype(FFTW_FORWARD) {backward = FFTW_BACKWARD, none = 0, forward = FFTW_FORWARD}; - -static_assert( forward != none and none != backward and backward != forward, "!"); - -//enum strategy: decltype(FFTW_ESTIMATE){ estimate = FFTW_ESTIMATE, measure = FFTW_MEASURE }; - -template -auto dft(In const& in, Out&& out, int dir) -->decltype(fftw::plan{in, out, dir}(), std::forward(out)) { - return fftw::plan{in, out, dir}(), std::forward(out); } - -using std::decay_t; - -template -auto dft(std::array which, In const& in, Out&& out, sign dir) -->decltype(plan{which, in, out, dir}(), std::forward(out)) { - return plan{which, in, out, dir}(), std::forward(out); } - -template::rank_v> -auto dft(std::array which, In const& in, Out&& out) { - std::array fwd{}; - std::transform(begin(which), end(which), begin(fwd), [](auto elem) {return elem == FFTW_FORWARD ;}); - dft(fwd, in, out, fftw::forward); - - std::array bwd{}; - std::transform(begin(which), end(which), begin(bwd), [](auto elem) {return elem == FFTW_BACKWARD;}); - if(std::accumulate(begin(bwd), end(bwd), false)) {dft(bwd, out, out, static_cast(FFTW_BACKWARD));} - - return std::forward(out); -} - -template -auto many_dft(It1 first, It1 last, It2 d_first, int sign) -->decltype(plan::many(first, last, d_first, sign)(), d_first + (last - first)) { - return plan::many(first, last, d_first, sign)(), d_first + (last - first); } - -template -[[nodiscard]] // ("when first argument is const") -auto dft(In const& in, sign dir) -->std::decay_t { - return dft(in, R(extensions(in), get_allocator(in)), dir);} - -template -auto rotate(multi::array& inout) -> decltype(auto) { - multi::array_ref::element_ptr> before(data_elements(inout), extensions(inout)); - inout.reshape(extensions(rotated(before) )); - fftw::dft(before, inout, fftw::none); - return inout; -} - -template().base()), typename std::decay_t::element>, int> =0 -> -[[nodiscard]] // ("when first argument is const") -auto dft(std::array which, In const& in, sign dir) -->std::decay_t { - return fftw::dft(which, in, R(extensions(in), get_allocator(in)), dir);} - -template::rank_v, - std::enable_if_t().base()), typename std::decay_t::element>, int> =0 -> -auto dft(std::array which, In&& in, sign dir) -->decltype(dft(which, in, in, dir), std::forward(in)) { - return dft(which, in, in, dir), std::forward(in); } - -template -void dft(std::array which, In const& in) = delete; - -template -[[nodiscard]] // ("when second argument is const") -auto dft(In const& in, sign dir) -> R { - static_assert( Rank <= In::rank_v, "!" ); - return dft(in, R(extensions(in), get_allocator(in)), dir); -} - -template auto dft_forward(A&&... array) -->decltype(fftw::dft(std::forward(array)..., fftw::forward)) { - return fftw::dft(std::forward(array)..., fftw::forward); } - -template -[[nodiscard]] // ("when input argument is read only") -auto dft_forward(BoolArray which, A const& array) -->decltype(fftw::dft(which, array, fftw::forward)) { - return fftw::dft(which, array, fftw::forward); } - -template -[[nodiscard]] // ("when input argument is read only") -auto dft_forward(std::array which, A const& array) -->decltype(fftw::dft(which, array, fftw::forward)) { - return fftw::dft(which, array, fftw::forward); } - -template -auto dft_forward(std::array which, A const& in, O&& out) -->decltype(fftw::dft(which, in, std::forward(out), fftw::forward)) { - return fftw::dft(which, in, std::forward(out), fftw::forward); } - -template -[[nodiscard]] // ("when input argument is read only") -auto dft_forward(A const& array) -->decltype(fftw::dft(array, fftw::forward)) { - return fftw::dft(array, fftw::forward); } - -template auto dft_backward(A&&... args) -->decltype(dft(std::forward(args)..., fftw::backward)) { - return dft(std::forward(args)..., fftw::backward); } - -template auto dft_inplace(In&& in, sign direction) -> In&& { - fftw::plan{in, in, static_cast(direction)}(); - return std::forward(in); -} - -template -auto copy(In const& in, Out&& out) -->decltype(dft(std::array{}, in, std::forward(out), fftw::forward)) { - return dft(std::array{}, in, std::forward(out), fftw::forward); } - -template -[[nodiscard]] // ("when input argument is const")]] -auto copy(In const& in) -> R -{//->decltype(copy(i, R(extensions(i), get_allocator(i))), R()){ - return copy(in, R(extensions(in), get_allocator(in)));} - -#if 0 -template::decay_type> -auto move(In&& in) { - if(in.is_compact()) { - multi::array_ref Ref( - in.base(), extensions(in) - ); - copy(in, Ref); - return R( - multi::array_ref>(std::make_move_iterator(in.mbase()), ((in.mbase()=0), extensions(Ref))) - ); - } - return copy(std::forward(in)); -} -#endif - -template> -auto copy(multi::basic_array>&& array) -> R { - if(array.is_compact()) { - return - fftw::copy( - array.template static_array_cast(), - multi::array_ref(array.base().base(), array.extensions()) - ).template static_array_cast>() - ; - } - return fftw::copy(array.template static_array_cast()); -} - -template -auto transpose(Array& array) -->decltype(fftw::copy(transposed(array), array.reshape(extensions(layout(array).transpose())))) { - multi::array_ref ref(array.base(), extensions(array)); - return fftw::copy(ref.transposed(), array.reshape(layout(array).transpose().extensions())); -} - -#if 0 -// TODO(correaa) investigate why this doesn't work as expected -template -auto rotate(Array& a) -->decltype(fftw::copy(rotated(a), a.reshape(extensions(layout(a).transpose())))){ - multi::array_ref r(a.base(), extensions(a)); - auto&& ro = r.rotated(); - return fftw::copy(ro, a.reshape(layout(a).rotate().extensions())); -} -#endif - -} // end namespace fftw -} // end namespace boost::multi - -namespace boost::multi::fftw { - -template -class fft_iterator { - MDIterator base_; - std::array which_ = {}; - - public: - using iterator_type = MDIterator; - - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = void*; - class reference { - typename MDIterator::reference::extensions_type x_; - explicit reference(typename MDIterator::reference const& ref) : x_{ref.extensions()} {} - friend class fft_iterator; - - public: - using extensions_type = typename MDIterator::reference::extensions_type; - auto extensions() const -> extensions_type {return x_;} - }; - - using iterator_category = std::random_access_iterator_tag; // using iterator_category = std::input_iterator_tag; - - explicit fft_iterator(iterator_type base, std::array which) noexcept : base_{std::move(base)}, which_{which} {} - - friend auto operator-(fft_iterator const& self, fft_iterator const& other) -> difference_type { - return self.base_ - other.base_; - } - - template - friend auto copy(fft_iterator first, fft_iterator last, ItOut d_first) { - assert(first.which_ == last.which_); - fftw::dft( - first.which_, - multi::ref(first.base_, last.base_), - multi::ref(d_first, d_first + (last.base_ - first.base_)) - ); - - return d_first + (last.base_ - first.base_); - } - template - friend auto uninitialized_copy(fft_iterator first, fft_iterator last, ItOut d_first) { - return copy(first, last, d_first); - } - - auto operator*() const {return reference{*base_};} -}; - -template -class fft_range { - Origin origin_; - Array ref_; - using which_type = std::array::rank_v>; - which_type which_; - - public: - using iterator_type = typename std::decay_t::const_iterator; - - using size_type = typename std::decay_t::size_type; - using iterator = fft_iterator; - - using decay_type = typename std::decay_t::decay_type; - - explicit fft_range(Origin&& origin, Array&& in, which_type which) - : origin_{std::forward(origin)}, ref_{std::forward(in)}, which_{which} {} - - operator decay_type() && { // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - if constexpr(std::is_same_v) { - decay_type the_ret{std::forward(origin_)}; - the_ret.reshape(this->extensions()); - - fftw::dft( - which_, - ref_, - the_ret - ); - - return the_ret; - } else { - return decay_type{this->begin(), this->end()}; - } - } - - auto operator+() const& {return static_cast( *this );} - auto operator+() && {return static_cast(std::move(*this));} - - auto begin() const {return iterator{ref_.begin(), which_};} - auto end() const {return iterator{ref_.end() , which_};} - - auto size() const {return ref_.size();} - auto extensions() const {return ref_.extensions();} - auto num_elements() const {return ref_.num_elements();} - - auto base() const {return ref_.base();} - - auto rotated() const { - auto new_which = which_; - std::rotate(new_which.begin(), new_which.begin() + 1, new_which.end()); - return fft_range>{origin_, ref_.rotated(), new_which}; - } - auto unrotated() const { - auto new_which = which_; - std::rotate(new_which.rbegin(), new_which.rbegin() + 1, new_which.rend()); - return fft_range>{origin_, ref_.unrotated(), new_which}; - } - auto transposed() const { - auto new_which = which_; - std::swap(std::get<0>(new_which), std::get<1>(new_which)); - return fft_range>{std::forward(origin_), ref_.transposed(), new_which}; - } - - template - auto operator()(FBNs... fbns) const { - static_assert( sizeof...(fbns) <= std::decay_t::rank_v , "too many arguments"); - auto new_which = which_; - std::array fbna{fbns...}; - std::transform(fbna.begin(), fbna.end(), new_which.begin(), new_which.begin(), - [](auto fbn, auto nw) { - if(fbn == fftw::none) {return nw;} - assert(nw == fftw::none); - return fbn; - } - ); - return fft_range>{std::forward(origin_), ref_(), new_which}; - } -}; - -template -auto ref(Array&& in) { - return fft_range { - std::forward(in), - std::forward(in), {} - }; -} - -template auto move(Array& in) {return fftw::ref(std::move(in));} - -template -auto fft(Array&& in) { - std::array::rank_v> which{}; - std::fill_n(which.begin(), ND, fftw::forward); - return fft_range{std::forward(in), std::forward(in), which}; -} - -template -auto ifft(Array&& in) { - std::array which{}; - std::fill_n(which.begin(), ND, fftw::backward); - return fft_range{in, which}; -} - -} // end namespace boost::multi::fftw - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/memory.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw/memory.hpp deleted file mode 100644 index fcbec40cb5..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/memory.hpp +++ /dev/null @@ -1,174 +0,0 @@ -#if COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0x -lfftw3 -lfftw3_mpi&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 -// apt-get install libfftw3-mpi-dev -// compile with: mpicc simple_mpi_example.c -Wl,-rpath=/usr/local/lib -lfftw3_mpi -lfftw3 -o simple_mpi_example */ - -#ifndef MULTI_ADAPTOR_FFTW_MEMORY_HPP -#define MULTI_ADAPTOR_FFTW_MEMORY_HPP - -#include - -#include "../../config/NODISCARD.hpp" - -#include -#include -#include -#include -#include -#include - -namespace boost{ -namespace multi{ -namespace fftw{ - -template -class allocator{ -public: - using value_type = T; - -#if 1 - using pointer = value_type*; - using const_pointer = typename std::pointer_traits::template - rebind; - using void_pointer = typename std::pointer_traits::template - rebind; - using const_void_pointer = typename std::pointer_traits::template - rebind; - using difference_type = typename std::pointer_traits::difference_type; - using size_type = std::make_unsigned_t; - - template struct rebind {typedef allocator other;}; -#endif - - allocator() noexcept {} // not required, unless used - template allocator(allocator const&) noexcept {} - - NODISCARD("to avoid memory leak") - value_type* allocate(std::size_t n) const{return static_cast(fftw_malloc(sizeof(T)*n));} - -// value_type* // Use pointer if pointer is not a value_type* -// allocate(std::size_t n){return static_cast(::operator new (n*sizeof(value_type)));} - - void deallocate(value_type* p, std::size_t){fftw_free(p);} -// void deallocate(value_type* p, std::size_t) noexcept // Use pointer if pointer is not a value_type* -// {::operator delete(p);} - - static int alignment_of(value_type* p){return fftw_alignment_of((double*)p);} - -#if 1 - value_type* allocate(std::size_t n, const_void_pointer){return allocate(n);} - - template - void construct(U* p, Args&& ...args){::new(p) U(std::forward(args)...);} - - template void destroy(U* p) noexcept{p->~U();} - - std::size_t max_size() const noexcept{return std::numeric_limits::max();} - - allocator select_on_container_copy_construction() const{return *this;} - - using propagate_on_container_copy_assignment = std::false_type; - using propagate_on_container_move_assignment = std::false_type; - using propagate_on_container_swap = std::false_type; - using is_always_equal = std::is_empty; -#endif -}; - -template -bool operator==(allocator const&, allocator const&) noexcept{return true;} - -template -bool operator!=(allocator const& x, allocator const& y) noexcept{ - return !(x == y); -} - -#if 0 -template -struct allocator{ - using value_type = T; - using pointer = value_type*; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; -// NODISCARD("to avoid memory leak") - pointer allocate(size_type n) const{return static_cast(fftw_malloc(sizeof(T)*n));} - void deallocate(pointer data, size_type){fftw_free(data);} -}; -#endif - -//template<> allocator>::pointer allocator>::allocate(size_type n){return reinterpret_cast*>(fftw_alloc_complex(n));} -//template<> allocator< double >::pointer allocator< double >::allocate(size_type n){return fftw_alloc_real(n) ;} - -#if 0 -template<> -struct allocator>{ - using value_type = std::complex; - using pointer = value_type*; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; - NODISCARD("to avoid memory leak") - pointer allocate(size_type n){return reinterpret_cast*>(fftw_alloc_complex(n));} - void deallocate(pointer data, size_type){fftw_free(data);} -}; - -template<> -struct allocator{ - using value_type = double; - using pointer = value_type*; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; - NODISCARD("to avoid memory leak") - pointer allocate(size_type n){return fftw_alloc_real(n);} - void deallocate(pointer data, size_type){fftw_free(data);} -}; -#endif - -}}} - - -#if 0//__NVCC__ -namespace std{ - -template struct allocator_traits> : std::allocator_traits>{ - using base = std::allocator_traits>; - template using rebind_alloc = boost::multi::fftw::allocator; - template - static auto allocate(A& a, typename base::size_type n){return a.allocate(n);} -}; - -} -#endif - -#if 0 //def __NVCC__ -namespace std{ -template struct allocator_traits> : std::allocator_traits>{ - template using rebind_alloc = boost::multi::fftw::allocator; -}; -} -#endif - -#if not __INCLUDE_LEVEL__ - -#include "../../array.hpp" - -#include - -namespace multi = boost::multi; - -int main(){ - { - std::vector> v(100); - multi::array arr({10, 20}); - } - { - std::vector, multi::fftw::allocator>> v(100); - multi::array, 2> arr({10, 20}); - } -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/combinations.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/combinations.cpp deleted file mode 100644 index 9564d8e59e..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/combinations.cpp +++ /dev/null @@ -1,207 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW adaptor" -#include - -#include "../../fftw.hpp" - -#include -#include -#include -#include - -namespace multi = boost::multi; - -namespace utf = boost::unit_test::framework; - -using fftw_fixture = multi::fftw::environment; -BOOST_TEST_GLOBAL_FIXTURE( fftw_fixture ); - -class watch : private std::chrono::high_resolution_clock { - std::string label; - time_point start = now(); - - public: - explicit watch(std::string label) : label{std::move(label)} {} - watch(watch const&) = delete; - watch(watch&&) = default; - auto operator=(watch const&) = delete; - auto operator=(watch&&) -> watch& = default; - auto elapsed_sec() const {return std::chrono::duration(now() - start).count();} - ~watch() { - std::cerr - << label <<": " - << elapsed_sec() <<" sec" - <; - - auto const in = [] { - multi::array ret({10 , 11 , 12 , 13 }); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [eng = std::default_random_engine {std::random_device {}()}, - uniform_01 = std::uniform_real_distribution<>{}]() mutable{ - return complex{uniform_01(eng), uniform_01(eng)}; - }); - return ret; - }(); - - std::vector> which_cases = { - {false, true , true , true }, - {false, true , true , false}, - {true , false, false, false}, - {true , true , false, false}, - {false, false, true , false}, - {false, false, false, false}, - }; - - using std::cout; - for(auto which : which_cases) { - cout<<"case "; - copy(begin(which), end(which), std::ostream_iterator{cout, ", "}); - cout<<"\n"; - - multi::array out = in; - { - watch unnamed{"cpu_oplac %ws wall, CPU (%p%)\n"}; - multi::fftw::dft_forward(which, in, out); - } - { - multi::fftw::plan pln{which, in, out, multi::fftw::forward}; - watch unnamed{"cpu_oplac planned %ws wall, CPU (%p%)\n"}; - pln(); - } - { - auto in_rw = in; - watch unnamed{"cpu_iplac %ws wall, CPU (%p%)\n"}; - multi::fftw::dft_forward(which, in_rw); - } - { - auto in_rw = in; - multi::fftw::plan pln{which, in_rw, in_rw, multi::fftw::forward}; - watch unnamed{"cpu_iplac planned %ws wall, CPU (%p%)\n"}; - pln(); - } - { - auto in_rw = in; - multi::fftw::plan pln{which, in_rw, in_rw, multi::fftw::forward}; - watch unnamed{"cpu_iplac planned measured %ws wall, CPU (%p%)\n"}; - pln(); - } - { - watch unnamed{"cpu_alloc %ws wall, CPU (%p%)\n"}; - auto out_cpy = multi::fftw::dft_forward(which, in); - BOOST_TEST(abs(out_cpy[5][4][3][1] - out[5][4][3][1]) == 0.); - } - { - auto in_rw = in; - watch unnamed{"cpu_move %ws wall, CPU (%p%)\n"}; - auto out_cpy = multi::fftw::dft_forward(which, std::move(in_rw)); - BOOST_TEST(abs(out_cpy[5][4][3][1] - out[5][4][3][1]) == 0.); - } - } -} - -BOOST_AUTO_TEST_CASE(fftw_4D_power_benchmark, *boost::unit_test::enabled() ) { - using complex = std::complex; - namespace fftw = multi::fftw; - - auto exts = multi::array::extensions_type({6, 12, 12, 12}); - multi::array in(exts); - std::iota(in.data_elements(), in.data_elements() + in.num_elements(), 1.2); - - BOOST_REQUIRE(in[0][0][0][0] == 1.2); - std::array which = {false, true, true, true}; - [&, unnamed = watch{utf::current_test_case().full_name()+" inplace FTTT"}] { - fftw::dft(which, in, fftw::forward); - }(); - [&, unnamed = watch{utf::current_test_case().full_name()+" inplace FTTT"}] { - fftw::dft(which, in, fftw::forward); - }(); - auto in0000 = in[0][0][0][0]; - BOOST_REQUIRE(in0000 != 1.2); - - multi::array out(exts); - [&, unnamed = watch{utf::current_test_case().full_name()+" outofplace FTTT"}] { - fftw::dft(which, in, out, fftw::forward); - }(); - [&, unnamed = watch{utf::current_test_case().full_name()+" outofplace FTTT"}] { - fftw::dft(which, in, out, fftw::forward); - }(); - [&, unnamed = watch{utf::current_test_case().full_name()+" outofplace FTTT"}] { - fftw::dft(which, in, out, fftw::forward); - }(); - [&, unnamed = watch{utf::current_test_case().full_name()+" outofplace+alloc FTTT"}] { - multi::array out2(exts); - fftw::dft(which, in, out2, fftw::forward); - }(); - [&, unnamed = watch{utf::current_test_case().full_name()+" outofplace+alloc FTTT"}] { - multi::array out2(exts); - fftw::dft(which, in, out2, fftw::forward); - }(); - BOOST_REQUIRE(in0000 == in[0][0][0][0]); -} - - -BOOST_AUTO_TEST_CASE(fftw_4D_power_benchmark_syntax) { - std::vector> which_cases = { - {false, true , true , true }, - {false, true , true , false}, - {true , false, false, false}, - {true , true , false, false}, - {false, false, true , false}, - {false, false, false, false}, - }; - using complex = std::complex; - - auto const in = [] { - multi::array ret({6, 12, 12, 12}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [eng = std::default_random_engine {std::random_device {}()}, - uniform_01 = std::uniform_real_distribution<>{}]() mutable{ - return complex{uniform_01(eng), uniform_01(eng)}; - }); - return ret; - }(); - - auto io = in; (void)io; - BOOST_REQUIRE( io.extensions() == in.extensions() ); - - namespace fftw = multi::fftw; - using clock = std::chrono::high_resolution_clock; - { - auto const tick = clock::now(); - multi::array out({6, 12, 12, 12}); - out = multi::fftw::ref(in)(fftw::none, fftw::forward, fftw::forward, fftw::forward); - BOOST_REQUIRE( out.extensions() == in.extensions() ); - auto time = std::chrono::duration(clock::now() - tick); - std::cout<<"allocate and copy assign (out-of-place fft) : "<< time.count() <(clock::now() - tick); - std::cout<<"copy construct (out-of-place fft) : "<< time.count() <(clock::now() - tick); - std::cout<<"self copy assign (in-place fft) : "<< time.count() < out = multi::fftw::move(io)(fftw::none, fftw::forward, fftw::forward, fftw::forward); - BOOST_REQUIRE( io.is_empty() ); - auto time = std::chrono::duration(clock::now() - tick); - std::cout<<"move construct (in-place fft) : "<< time.count() < - -#include "../../../adaptors/fftw.hpp" -#include "../../../array.hpp" - -namespace multi = boost::multi; -using complex = std::complex; [[maybe_unused]] complex const I{0, 1}; // NOLINT(readability-identifier-length) imag unit - -template auto power(M const& array) { - return std::transform_reduce(array.elements().begin(), array.elements().end(), 0., std::plus<>{}, [](auto zee) {return std::norm(zee);}); -// return accumulate(array.elements().begin(), array.elements().end(), 0., [](auto const& acc, auto const& elem) {return acc + std::norm(elem);}); -} - -template -void what(T&&) = delete; - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_fft_move) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - // auto* const in_base = in.base(); - - multi::array in2(in.extensions()); - - in2 = multi::fftw::fft(std::move(in)); - - BOOST_REQUIRE( power(in2)/num_elements(in2) - power(in_copy) < 1e-8 ); -// BOOST_REQUIRE( in2.base() == in_base ); -// BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_move) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - BOOST_REQUIRE( in[1][1] == 7. - 4.*I ); - - { - auto const in_copy = in; - auto* const in_base = in.base(); - BOOST_REQUIRE( in_base == in.base() ); - - in = multi::fftw::ref(in); - - BOOST_REQUIRE( in == in_copy ); - BOOST_REQUIRE( in_base == in.base() ); // prove no allocation - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - BOOST_REQUIRE( in[1][1] == 7. - 4.*I ); - - { - auto const in_copy = in; - auto* const in_base = in.base(); - BOOST_REQUIRE( in_base == in.base() ); - BOOST_REQUIRE( in.size() == 5 ); - - in = multi::fftw::ref(in).transposed(); - - BOOST_REQUIRE( in.size() == 3 ); - BOOST_REQUIRE( in == in_copy.transposed() ); // prove correctness - BOOST_REQUIRE( in_base == in.base() ); // prove no allocation - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_naive) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - BOOST_REQUIRE( in[1][1] == 7. - 4.*I ); - - { - auto const in_copy = in; - auto* const in_base = in.base(); - BOOST_REQUIRE( in_base == in.base() ); - BOOST_REQUIRE( in.size() == 5 ); - - in = in.transposed(); // this is UB - - BOOST_REQUIRE( in.size() == 3 ); - // BOOST_REQUIRE( in != in_copy.transposed() ); // prove it is incorrect - BOOST_REQUIRE( in_base == in.base() ); // prove no allocation - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_naive_copy) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - BOOST_REQUIRE( in[1][1] == 7. - 4.*I ); - - { - auto const in_copy = in; - auto* const in_base = in.base(); - BOOST_REQUIRE( in_base == in.base() ); - BOOST_REQUIRE( in.size() == 5 ); - - in = + in.transposed(); - - BOOST_REQUIRE( in.size() == 3 ); - BOOST_REQUIRE( in == in_copy.transposed() ); // prove correctness - BOOST_REQUIRE( in_base != in.base() ); // prove no allocation - } -} - - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_fft_copy) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2 = multi::fftw::fft(in); - - BOOST_REQUIRE( power(in2)/num_elements(in2) - power(in_copy) < 1e-8 ); - BOOST_REQUIRE( in2.base() != in_base ); - BOOST_REQUIRE( not in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_copyconstruct) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2 = multi::fftw::ref(in).transposed(); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() != in_base ); - BOOST_REQUIRE( in .base() == in_base ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveconstruct) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2 = multi::fftw::ref(std::move(in)).transposed(); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() == in_base ); - BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveconstruct_implicit) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - auto in2 = +multi::fftw::ref(std::move(in)).transposed(); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() == in_base ); - BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveassign_from_temp) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2; - in2 = static_cast>(multi::fftw::ref(std::move(in)).transposed()); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() == in_base ); - BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_moveassign) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2; - in2 = multi::fftw::ref(std::move(in)).transposed(); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() == in_base ); - BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - -BOOST_AUTO_TEST_CASE(fftw_2D_const_range_transposed_fftwmove) { - multi::array in = { - { 100. + 2.*I, 9. - 1.*I, 2. + 4.*I}, - { 3. + 3.*I, 7. - 4.*I, 1. + 9.*I}, - { 4. + 1.*I, 5. + 3.*I, 2. + 4.*I}, - { 3. - 1.*I, 8. + 7.*I, 2. + 1.*I}, - { 31. - 1.*I, 18. + 7.*I, 2. + 10.*I} - }; - - { - auto const in_copy = in; - auto* const in_base = in.base(); - - multi::array in2; - in2 = multi::fftw::move(in).transposed(); - - BOOST_REQUIRE( in2 == in_copy.transposed() ); - BOOST_REQUIRE( in2.base() == in_base ); - BOOST_REQUIRE( in.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/shift.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/shift.cpp deleted file mode 100644 index 32342a5060..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/shift.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW shift" -#include - -#include "../../fftw.hpp" - -#include // NOLINT(build/c++11) -#include - -template -class n_random_complex { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - std::size_t n_ = 0; - mutable std::mt19937 gen_{std::random_device{}()}; - mutable std::uniform_real_distribution<> dist_{-1., 1.}; - - public: - n_random_complex(n_random_complex const&) = delete; - explicit n_random_complex(std::size_t n) : n_{n} {} - - class iterator : public boost::multi::random_access_iterator, std::complex, void> { - n_random_complex const* ptr_; - std::size_t n_; - - public: - iterator(n_random_complex const* ptr, std::size_t n) : ptr_{ptr}, n_{n} {} - - auto operator*() const {return std::complex{ptr_->dist_(ptr_->gen_), ptr_->dist_(ptr_->gen_)};} - auto operator++() -> iterator& {++n_; return *this;} - - friend auto operator==(iterator const& self, iterator const& other) {return self.n_ == other.n_;} - friend auto operator!=(iterator const& self, iterator const& other) {return self.n_ != other.n_;} - - auto operator-(iterator const& other) const {return n_ - other.n_;} - }; - auto begin() const {return iterator{this, 0 };} - auto end () const {return iterator{this, n_};} - - auto size() const {return n_;} -}; - -namespace multi = boost::multi; -namespace fftw = multi::fftw; - -BOOST_AUTO_TEST_CASE(fftw_shift) { - class watch : std::chrono::steady_clock { - time_point start_ = now(); - - public: - auto elapsed_sec() const {return std::chrono::duration(now() - start_).count();} - }; - - multi::array, 1> const arr = n_random_complex(19586); BOOST_REQUIRE(arr.size() == 19586); - multi::array, 1> res(arr.extensions()); BOOST_REQUIRE(res.size() == 19586); - - fftw::plan fdft{arr, res, multi::fftw::forward}; - - auto const repeat = 40; - [&, unnamed = watch{}] { - for(int i = 0; i != repeat; ++i) { - fdft(arr.base(), res.base()); - std::rotate(res.begin(), res.begin() + res.size()/2, res.end()); - } - - BOOST_TEST_MESSAGE( "FFTW shift "<< unnamed.elapsed_sec()/repeat <<" sec" ); // prints 0.000882224 sec - }(); -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose.cpp deleted file mode 100644 index f0d5daa69d..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW transpose" -#include - -#include "../../fftw.hpp" - -#include -#include -#include - -namespace multi = boost::multi; - -class watch : private std::chrono::high_resolution_clock { - std::string label; - time_point start = now(); - - public: - explicit watch(std::string label) : label{std::move(label)} {} - watch(watch const&) = delete; - watch(watch&&) = default; - auto operator=(watch const&) = delete; - auto operator=(watch&&) -> watch& = default; - auto elapsed_sec() const {return std::chrono::duration(now() - start).count();} - ~watch() {std::cerr<< label <<": "<< elapsed_sec() <<" sec"<; - - { - auto const in = [] { - // multi::array ret({10137, 9973}); - // multi::array ret({1013, 997}); - multi::array ret({101, 99}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [eng = std::default_random_engine{std::random_device{}()}, uniform_01 = std::uniform_real_distribution<>{}]() mutable{ - return complex{uniform_01(eng), uniform_01(eng)}; - } - ); - // std::cout<<"memory size "<< ret.num_elements()*sizeof(complex)/1e6 <<" MB\n"; - return ret; - }(); - - { - multi::array out = in; - { - watch unnamed{"transposition with aux %ws wall, CPU (%p%)\n"}; - multi::array aux = ~out; - out = std::move(aux); - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - } - { - multi::array out = in; - auto* out_data = out.data_elements(); - { - watch unnamed{"fftw transpose fun thread %ws wall, CPU (%p%)\n"}; - multi::fftw::transpose( out ); - BOOST_REQUIRE( out.data_elements() == out_data ); - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - BOOST_REQUIRE( out == ~in ); - } - } -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose_square.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose_square.cpp deleted file mode 100644 index 2067ad7b6d..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/fftw/test/transpose_square.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi FFTW transpose" -#include -#include - -#include "../../fftw.hpp" - -#include -#include -#include - -namespace multi = boost::multi; - -using fftw_fixture = multi::fftw::environment; -BOOST_TEST_GLOBAL_FIXTURE( fftw_fixture ); - -using complex = std::complex; - -class watch : private std::chrono::high_resolution_clock { - std::string label; - time_point start = now(); - public: - explicit watch(std::string label) : label{std::move(label)} {} - watch(watch const&) = delete; - watch(watch&&) = default; - auto operator=(watch const&) = delete; - auto operator=(watch&&) -> watch& = default; - auto elapsed_sec() const {return std::chrono::duration(now() - start).count();} - ~watch() { - std::cerr - << label <<": " - << elapsed_sec() <<" sec" - < ret({819, 819}); - multi::array ret({81, 81}); - std::generate(ret.data_elements(), ret.data_elements() + ret.num_elements(), - [eng = std::default_random_engine{std::random_device{}()}, uniform_01 = std::uniform_real_distribution<>{}]() mutable{ - return complex{uniform_01(eng), uniform_01(eng)}; - } - ); - // std::cout<<"memory size "<< ret.num_elements()*sizeof(complex)/1e6 <<" MB\n"; - return ret; - }(); - // multi::fftw::plan::with_nthreads(1); - { - multi::array out = in; - auto* data = out.data_elements(); - { - watch unnamed{"fftw trans mve 1 thread %ws wall, CPU (%p%)\n"}; - multi::fftw::transpose( out ); - BOOST_REQUIRE( out.data_elements() == data ); - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - BOOST_REQUIRE( out == ~in ); - } -// { -// multi::array out = in; -// auto p = out.data_elements(); -// { -// boost::timer::auto_cpu_timer t{"fftw trans mve 1 thread %ws wall, CPU (%p%)\n"}; -// out = multi::fftw::copy( transposed( move(out) ) ); -// BOOST_REQUIRE( out.data_elements() == p ); -// BOOST_REQUIRE( out[35][79] == in[79][35] ); -// } -// BOOST_REQUIRE( out == ~in ); -// } - // multi::fftw::plan::with_nthreads(2); -// { -// multi::array out = in; -// auto p = out.data_elements(); -// { -// boost::timer::auto_cpu_timer t{"fftw trans mve 2 thread %ws wall, CPU (%p%)\n"}; -// out = multi::fftw::copy( ~move(out) ); -// BOOST_REQUIRE( out.data_elements() == p ); -// BOOST_REQUIRE( out[35][79] == in[79][35] ); -// } -// BOOST_REQUIRE( out == ~in ); -// } - // multi::fftw::plan::with_nthreads(4); -// { -// multi::array out = in; -// auto p = out.data_elements(); -// { -// boost::timer::auto_cpu_timer t{"fftw trans mve 4 thread %ws wall, CPU (%p%)\n"}; -// out = multi::fftw::copy( ~move(out) ); -// BOOST_REQUIRE( out.data_elements() == p ); -// BOOST_REQUIRE( out[35][79] == in[79][35] ); -// } -// BOOST_REQUIRE( out == ~in ); -// } - { - multi::array out = in; - multi::array aux(extensions(out)); - { - watch unnamed{"auxiliary copy %ws wall, CPU (%p%)\n"}; - aux = ~out; - out = std::move(aux); - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - BOOST_REQUIRE( out == ~in ); - } - { - multi::array out = in; - { - watch unnamed{"transposition with loop %ws wall, CPU (%p%)\n"}; - for(auto ii : extension(out)) { - for(auto j = 0; j != ii; ++j) { - std::swap(out[ii][j], out[j][ii]); - } - } - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - BOOST_REQUIRE( out == ~in ); - } - { - multi::array out = in; - { - watch unnamed{"transposition with loop 2 %ws wall, CPU (%p%)\n"}; - for(auto i = 0; i != out.size(); ++i) { - for(auto j = i + 1; j != out.size(); ++j) { - std::swap(out[i][j], out[j][i]); - } - } - BOOST_REQUIRE( out[35][79] == in[79][35] ); - } - BOOST_REQUIRE( out == ~in ); - } - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/lapack.hpp deleted file mode 100644 index 0811de557a..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack.hpp +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include "lapack/getrf.hpp" - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/geqrf.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/lapack/geqrf.hpp deleted file mode 100644 index 1386d673a8..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/geqrf.hpp +++ /dev/null @@ -1,276 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0x$OXX `pkg-config --libs blas lapack` -lboost_unit_test_framework&&$0x$OXX -x 0&&rm $0x$OXX;exit -#endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_ADAPTORS_LAPACK_GEQRF_HPP -#define MULTI_ADAPTORS_LAPACK_GEQRF_HPP - -#include "../lapack/core.hpp" -#include "../blas/filling.hpp" - -#include "../../config/NODISCARD.hpp" - -#include - -namespace boost{namespace multi{namespace lapack{ - -using blas::filling; - -template -A&& geqrf(Context&& ctxt, A&& a, TAU&& tau, WORK&& work){ -// assert( stride(~a) == 1); - assert( size(tau) == std::min(size(~a), size(a)) ); - int info = -1; - geqrf_(std::forward(ctxt), size(~a), size(a), a.base(), stride(a), tau.base(), work.data(), work.size(), info); - assert(info == 0); - return std::forward(a); -} - -//using ::core::syev; -//using ::core::geqrf; - -#if 0 -template -auto syev(blas::filling uplo, Array2D&& a, Array1D&& w, Array1DW&& work) -->decltype(syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride(a), base(w), base(work), size(work), std::declval()), a({0l, 1l}, {0l, 1l})) -{ - assert( size(work) >= std::max(1l, 3*size(a)-1l) ); - assert( size(a) == size(w) ); - assert( stride(w)==1 ); - assert( stride(work)==1 ); - if(size(a)==0) return std::forward(a)(); - int info = -1; - if(stride(rotated(a))==1) syev('V', uplo==blas::filling::upper?'L':'U', size(a), base(a), stride( a ), base(w), base(work), size(work), info); - else if(stride( a )==1) syev('V', uplo==blas::filling::upper?'U':'L', size(a), base(a), stride(rotated(a)), base(w), base(work), size(work), info); - else assert(0); // case not contemplated by lapack - if(info < 0) assert(0); // bad argument - return std::forward(a)({0, size(a)-info}, {0, size(a)-info}); -} - -template::decay_type> -auto syev(blas::filling uplo, Array2D&& a, Array1D&& w) -->decltype(syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)))){ - return syev(uplo, std::forward(a), std::forward(w), Array1DW(std::max(1l, 3*size(a)-1l), get_allocator(w)));}// TODO obtain automatic size from lapack info routine - -template -NODISCARD("because input array is const, output gives eigenvectors") -typename Array2D::decay_type syev(blas::filling uplo, Array2D const& a, Array1D&& w){ - auto ret = a.decay(); - if(syev(uplo, ret, std::forward(w)).size() != a.size()) assert(0); // failed - return ret; -} - -template -NODISCARD("because input array is const, output gives eigenvalues") -auto syev(blas::filling uplo, Array2D&& a){ - multi::array::element_type, 1, decltype(get_allocator(a))> eigenvalues(size(a), get_allocator(a)); - syev(uplo, std::forward(a), eigenvalues); - return eigenvalues; -} - -template -NODISCARD("because input array is const, output gives a structured binding of eigenvectors and eigenvactor") -auto syev(blas::filling uplo, Array2D const& a){ - struct{ - typename Array2D::decay_type eigenvectors; - typename Array2D::value_type eigenvalues; - } ret{a, {size(a), get_allocator(a)}}; - auto&& l = syev(uplo, ret.eigenvectors, ret.eigenvalues); - assert( size(l) == size(a) ); - return ret; -} -#endif - -}}} - -#if not __INCLUDE_LEVEL__ - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi lapack adaptor geqrf" -#if not __INTEL_COMPILER -#define BOOST_TEST_DYN_LINK -#endif -#include - -#include "../../array.hpp" - -#include // std::isnan -#include -#include // std::max - -namespace multi = boost::multi; -namespace lapack = multi::lapack; - -template decltype(auto) print(M const& C){ - using std::cout; - using multi::size; - for(int i = 0; i != size(C); ++i){ - for(int j = 0; j != size(C[i]); ++j) cout << C[i][j] << ' '; - cout << std::endl; - } - return cout << std::endl; -} - -template decltype(auto) print_1d(M const& C){ - using std::cout; - using multi::size; - for(int i = 0; i != size(C); ++i) cout<< C[i] <<' '; - return cout << std::endl; -} - -BOOST_AUTO_TEST_CASE(lapack_geqrf){ - - multi::array A = - { - {1., 2., 3.}, - {4., 5., 6.}, - {7., 8., 9.} - } - ; - multi::lapack::context ctxt; - - multi::array TAU(std::min(size(A), size(~A))); - multi::array WORK(std::max(1l, 3*size(A)-1)); - - multi::lapack::geqrf(ctxt, A, TAU, WORK); - - print(A); - print(TAU); - -} - -#if 0 -BOOST_AUTO_TEST_CASE(lapack_syev, *boost::unit_test::tolerance(0.00001) ){ -{ - multi::array A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - multi::array WORK(std::max(1l, 3*size(A)-1)); - multi::lapack::syev(multi::blas::filling::upper, A, W, WORK); - BOOST_TEST( A[2][1] == -0.579092 ); - BOOST_TEST( W[1] == 42.2081 ); -} -{ - multi::array A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - multi::lapack::syev(multi::blas::filling::upper, A, W); - BOOST_TEST( A[2][1] == -0.579092 ); - BOOST_TEST( W[1] == 42.2081 ); -} -{ - multi::array A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - multi::lapack::syev(multi::blas::filling::lower, rotated(A), W); - BOOST_TEST( A[2][1] == -0.579092 ); - BOOST_TEST( W[1] == 42.2081 ); -} -{ - namespace lapack = multi::lapack; - multi::array A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - auto W = lapack::syev(multi::blas::filling::upper, A); - BOOST_TEST( A[2][1] == -0.579092 ); - BOOST_TEST( W[1] == 42.2081 ); -} -{ - multi::array const A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - namespace lapack = multi::lapack; - auto A_copy = lapack::syev(lapack::filling::upper, A, W); - BOOST_TEST( A[1][2] == 126.746 ); - BOOST_TEST( A_copy[2][1] == -0.579092 ); - BOOST_TEST( W[1] == 42.2081 ); -} -{ - multi::array A = { - {167.413, 126.804, 0.}, - {NAN , 167.381, 0.}, - {NAN , NAN , 0.} - }; - multi::array W(size(A)); - namespace lapack = multi::lapack; - auto&& A_ref = lapack::syev(lapack::filling::upper, A, W); - BOOST_TEST( size(A_ref)==3 ); - BOOST_TEST( W[0]==0. ); -} -{ - multi::array A = { - {1. , 1., 1.}, - {NAN, 2 , 1.}, - {NAN, NAN, 1.} - }; - multi::array W(size(A)); - namespace lapack = multi::lapack; - auto&& A_ref = lapack::syev(lapack::filling::upper, A, W); - print(A_ref); - BOOST_TEST( size(A_ref)==3 ); - BOOST_TEST( W[0]==0. ); -} -{ - multi::array A = {{5.}}; - multi::array W(size(A)); - namespace lapack = multi::lapack; - lapack::syev(lapack::filling::upper, A, W); - BOOST_TEST( A[0][0] == 1. ); - BOOST_TEST( W[0]==5. ); -} -{ - namespace lapack = multi::lapack; - multi::array A; - multi::array W(size(A)); - lapack::syev(lapack::filling::upper, A, W); -} -{ - multi::array const A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - namespace lapack = multi::lapack; - auto sys = lapack::syev(lapack::filling::upper, A); - BOOST_TEST( A[1][2] == 126.746 ); - BOOST_TEST( sys.eigenvectors[2][1] == -0.579092 ); - BOOST_TEST( sys.eigenvalues[1] == 42.2081 ); -} -#if __cpp_structured_bindings -{ - multi::array const A = { - {167.413, 126.804, 125.114}, - {NAN , 167.381, 126.746}, - {NAN , NAN , 167.231} - }; - multi::array W(size(A)); - namespace lapack = multi::lapack; - auto [eigenvecs, eigenvals] = lapack::syev(lapack::filling::upper, A); - BOOST_TEST( A[1][2] == 126.746 ); - BOOST_TEST( eigenvecs[2][1] == -0.579092 ); - BOOST_TEST( eigenvals[1] == 42.2081 ); -} -#endif - -} -#endif -#endif - -#endif - - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/potrf.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/lapack/potrf.hpp deleted file mode 100644 index beee951b07..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/potrf.hpp +++ /dev/null @@ -1,220 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo '#include"'$0'"'>$0.cpp)&&$CXX -D_TEST_MULTI_ADAPTORS_LAPACK_POTRF $0.cpp -o$0x `pkg-config --libs blas lapack` -lboost_unit_test_framework&&valgrind $0x&&rm $0x $0.cpp;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef MULTI_ADAPTORS_LAPACK_POTRF_HPP -#define MULTI_ADAPTORS_LAPACK_POTRF_HPP - -#include "../../array.hpp" -#include "../../config/NODISCARD.hpp" - -#include "../lapack/core.hpp" -#include "../blas/numeric.hpp" - -#include "../blas/filling.hpp" - -#include - -namespace boost{namespace multi{namespace lapack{ - -using blas::filling; - -namespace{ - -using ::core::potrf; - -template -auto potrf(filling t, Iterator first, Iterator last) -->decltype(potrf(static_cast(t), typename std::iterator_traits::difference_type{}, base(first), stride(first), std::declval()), Iterator{}) -{ - assert( stride(first) == stride(last) ); - assert( first->stride() == 1 ); - auto n = std::distance(first, last); -// auto lda = stride(first); - int info; - potrf(static_cast(t), n, base(first), stride(first), info); - assert( info >= 0 ); - return info==0?last:first + info; -} -} - -template -auto potrf(filling t, A2D&& A) -->decltype(potrf(t, begin(A), end(A)), A({0, 1}, {0, 1})) -{ - using blas::flip; - if(stride(A)==1){ - auto last = potrf(flip(t), begin(rotated(A)), end(rotated(A))); - using std::distance; - return A({0, distance(begin(rotated(A)), last)}, {0, distance(begin(rotated(A)), last)}); - } - auto last = potrf(t, begin(A), end(A)); - using std::distance; - return A({0, distance(begin(A), last)}, {0, distance(begin(A), last)}); -} - -template -struct hermitic_t : private A{ - using underlying_type = A; - underlying_type const& underlying()const &{return *this;} - underlying_type& underlying()&{return *this;} - underlying_type&& underlying()&&{return std::move(*this);} - blas::filling side; - hermitic_t(A const& a, blas::filling side) : A{a}, side{side}{} - using A::size; -}; - -template hermitic_t()())>> hermitic(blas::filling side, A&& a){ - return {a(), side}; -} - -template -NODISCARD("result is returned because third argument is const") -auto potrf(filling t, A2D const& A) -->decltype(potrf(t, decay(A)), decay(A)){ - auto ret = decay(A); - auto last = potrf(t, ret); assert( size(last) == size(ret) ); - return ret; -} - -template -NODISCARD("result is returned because third argument is const") -decltype(auto) potrf(HA&& ha){ - return hermitic(ha.side, potrf(ha.side, std::forward(ha).underlying()));//static_cast(ha))); -} - -// orthonormalize rows -template auto onrm(A&& a, filling f = filling::upper) -->decltype(trsm(flip(f), hermitized(potrf(f, herk(f, a))), std::forward(a))){assert(size(a) <= size(rotated(a))); - return trsm(flip(f), hermitized(potrf(f, herk(f, a))), std::forward(a)); -} - -template auto onrm(A&& a, B&& buffer, filling f = filling::upper) -->decltype(trsm(flip(f), hermitized(potrf(f, herk(f, a, buffer))), std::forward(a))){assert(size(a) <= size(rotated(a))); - return trsm(flip(f), hermitized(potrf(f, herk(f, a, buffer))), std::forward(a)); -} - -//template -//decltype(auto) potrf(A2D&& A){return potrf(blas::detect_triangular(A), A);} - -}}} - -#if _TEST_MULTI_ADAPTORS_LAPACK_POTRF - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi lapack adaptor potrf" -#define BOOST_TEST_DYN_LINK -#include - -#include // std::isnan - -namespace multi = boost::multi; -namespace lapack = multi::lapack; - -template decltype(auto) print(M const& C){ - using std::cout; - using multi::size; - for(int i = 0; i != size(C); ++i){ - for(int j = 0; j != size(C[i]); ++j) cout << C[i][j] << ' '; - cout << std::endl; - } - return cout << std::endl; -} - -BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001) ){ - using complex = std::complex; complex const I{0, 1}; -{ - multi::array A = { - {167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN , 167.381 , 126.746 + 0.0327519*I}, - {NAN , NAN , 167.231 } - }; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) - BOOST_TEST( real(A[1][2]) == 3.78646 ); - BOOST_TEST( imag(A[1][2]) == 0.0170734 ); -// BOOST_TEST( std::isnan(norm(A[2][1])) ); -} -{ - multi::array A = - {{167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN, 167.381, 126.746 + 0.0327519*I}, - {NAN, NAN , 167.231}} - ; - multi::array At = rotated(A); - auto&& Att = rotated(At); - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, Att); // A is hermitic in the upper triangular (implicit hermitic below) - BOOST_TEST( real(Att[1][2]) == 3.78646 ); - BOOST_TEST( imag(Att[1][2]) == 0.0170734 ); -// BOOST_TEST( std::isnan(norm(Att[2][1])) ); -} -{ - multi::array A = - {{167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN, 167.381, 126.746 + 0.0327519*I}, - {NAN, NAN , 167.231}} - ; - using lapack::potrf; - using lapack::filling; - potrf(filling::upper, A); // A is hermitic in the upper triangular (implicit hermitic below) - BOOST_TEST( real(A[1][2]) == 3.78646 ); - BOOST_TEST( imag(A[1][2]) == 0.0170734 ); -// BOOST_TEST( std::isnan(A[2][1]) ); -} -{ - multi::array A = - {{190., 126., 125.}, - {NAN , 1110., 122.}, - {NAN , NAN , 1350.}} - ; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is the upper triangle (implicit hermitic/symmetric below), A becomes upper triangular with implicit zeros - BOOST_TEST( real(A[1][2]) == 1.22058 ); -// BOOST_TEST( std::isnan(norm(A[2][1])) ); -} -{ - multi::array A = - {{190., 126., 125.}, - {NAN , 1110., 122.}, - {NAN , NAN , 1350.}} - ; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is the upper triangle (implicit hermitic/symmetric below), A becomes upper triangular with implicit zeros - BOOST_TEST( A[1][2] == 1.22058 ); -// BOOST_TEST( std::isnan(norm(A[2][1])) ); -} -{ - multi::array A = - {{190., 126., 125.}, - {NAN , 1110., 122.}, - {NAN , NAN , 1350.}} - ; - using lapack::filling; - using lapack::potrf; - potrf(filling::lower, rotated(A)); // A is the upper triangle (implicit hermitic/symmetric below), A becomes upper triangular with implicit symmetry - print(A); - BOOST_TEST( A[1][2] == 1.22058 ); -// BOOST_TEST( std::isnan(norm(A[2][1])) ); -} -{ - multi::array const A = - {{190., 126., 125.}, - {NAN , 1110., 122.}, - {NAN , NAN , 1350.}} - ; - using lapack::filling; - using lapack::potrf; - auto B = potrf(filling::upper, A); - print(B); - BOOST_TEST( real(B[1][2]) == 1.22058 ); -} -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/CMakeLists.txt b/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/CMakeLists.txt deleted file mode 100644 index c827cf634a..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/CMakeLists.txt +++ /dev/null @@ -1,69 +0,0 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- -#[=[Multi Test suite can be run like this: - mkdir -p build - cd build - cmake .. [-DENABLE_CUDA=1] - make -j - ctest -j --output-on-error [-T memcheck] - exit -#]=] -cmake_minimum_required(VERSION 3.11) - -set(CMAKE_VERBOSE_MAKEFILE ON) - -project( - boost-multi-adaptors-lapack-test - VERSION 0.1 - LANGUAGES CXX -) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -find_package(Boost REQUIRED COMPONENTS unit_test_framework) -add_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) -include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) -link_libraries(${Boost_LIBRARIES}) -link_directories(${Boost_LIBRARY_DIRS}) - -# find_package(BLAS REQUIRED) - -set(BLA_VENDOR OpenBLAS) -# set(BLA_VENDOR Intel10_64lp) find_package(BLAS) if(BLAS_FOUND) # in some systems with MKL, regular BLAS headers need to be found for it to work message("Multi/BLAS: MKL environment detected") add_definitions(-DRETURN_BY_STACK) else() -# message("Multi/BLAS: MKL environment not detected, looking for other BLAS") unset(BLA_VENDOR) -find_package(BLAS REQUIRED) -# endif() - -find_path( - BLAS_INCLUDE_DIRS - cblas.h - /usr/include - /usr/local/include - $ENV{BLAS_HOME}/include -) - -include_directories(../../../..) - -link_libraries(${BLAS_LIBRARIES}) -link_libraries(-llapacke) - -include_directories(${TEST_EXE} PRIVATE ${BLAS_INCLUDE_DIRS}) - -add_compile_options( - -Werror - -Wall - -Wextra - -fno-common - -Wfatal-errors - # $<$: -Wpedantic -Wformat-truncation -fstack-usage >#-Wconversion - # $<$,$>: - # -Wpedantic -Wmove > $<$: -wd161 -diag-disable=remark -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses - # -Wpointer-sign -Wreorder -Wno-return-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings -Werror -diag-error:3846 > $<$: /W4 > -) - -enable_testing() -include(CTest) - -add_executable(getrf.cpp.x getrf.cpp) -add_test(NAME getrf.cpp.x COMMAND ./getrf.cpp.x) diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/potrf.cpp b/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/potrf.cpp deleted file mode 100644 index 5522b536b4..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/lapack/tests/potrf.cpp +++ /dev/null @@ -1,140 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo '#include"'$0'"'>$0.cpp)&&nvcc -x cu --expt-relaxed-constexpr`#$CXX` $0 -o $0x -Wno-deprecated-declarations -lcudart -lcublas -lcusolver `pkg-config --libs blas lapack` -DBOOST_TEST_DYN_LINK -lboost_unit_test_framework -DBOOST_LOG_DYN_LINK -lboost_log -lpthread -lboost_system &&$0x&&rm $0x $0.cpp; exit -#endif -// © Alfredo A. Correa 2019-2020 -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi cuSolver potrf" -#include - -#include "../../../adaptors/cuda.hpp" -#include "../../lapack/potrf.hpp" -#include "../../blas/herk.hpp" -#include "../../blas/trsm.hpp" - -#include "../../../adaptors/lapack/cuda.hpp" -#include "../../../adaptors/blas/cuda.hpp" - -#include -#include - -namespace multi = boost::multi; -namespace lapack = multi::lapack; -namespace blas = multi::blas; - -using complex = std::complex; - -std::ostream& operator<<(std::ostream& os, std::complex const& c){ - return os<< real(c) <<" + I*"<< imag(c); -} - -template decltype(auto) print(M const& C){ - using std::cout; - using multi::size; - cout<<'{'; - for(int i = 0; i != size(C); ++i){ - cout<<'{'; - for(int j = 0; j != size(C[i]); ++j){ - cout<< C[i][j]; - if(j + 1 != size(C[i])) cout<<", "; - } - cout<<'}'< -M&& randomize(M&& A){ - std::mt19937 eng{123}; - auto gen = [&](){return std::complex{std::uniform_real_distribution<>{-1, 1}(eng), std::uniform_real_distribution<>{-1, 1}(eng)};}; - std::for_each(begin(A), end(A), [&](auto&& r){std::generate(begin(r), end(r), gen);}); - return std::forward(A); -} - -/* -BOOST_AUTO_TEST_CASE(orthogonalization_over_rows, *boost::unit_test::tolerance(0.00001)){ - auto A = randomize(multi::array({3, 10})); - lapack::onrm(A); - - using blas::herk; - using blas::hermitized; - using blas::filling; - auto id = herk(filling::upper, A); - BOOST_TEST( real(id[1][1]) == 1. ); BOOST_TEST( imag(id[1][1]) == 0. ); - BOOST_TEST( real(id[1][2]) == 0. ); BOOST_TEST( imag(id[1][2]) == 0. ); -} -*/ - -BOOST_AUTO_TEST_CASE(orthogonalization_over_rows_cuda, *boost::unit_test::tolerance(0.00001)){ - auto Acpu = randomize(multi::array({3, 10})); - - multi::cuda::array A = Acpu; - - using namespace blas; - using namespace lapack; - - trsm(filling::lower, hermitized(potrf(filling::upper, herk(filling::upper, A))), A); - - Acpu = A; - auto id = herk(filling::upper, Acpu); - BOOST_TEST( real(id[1][1]) == 1. ); BOOST_TEST( imag(id[1][1]) == 0. ); - BOOST_TEST( real(id[1][2]) == 0. ); BOOST_TEST( imag(id[1][2]) == 0. ); -} - -/* -BOOST_AUTO_TEST_CASE(orthogonalization_over_columns, *boost::unit_test::tolerance(0.00001)){ - - auto A = randomize( multi::array({10, 3}) ); - using blas::hermitized; - lapack::onrm(hermitized(A)); - - using blas::filling; - auto id = herk(filling::upper, hermitized(A)); - BOOST_TEST( real(id[1][1]) == 1. ); BOOST_TEST( imag(id[1][1]) == 0. ); - BOOST_TEST( real(id[1][2]) == 0. ); BOOST_TEST( imag(id[1][2]) == 0. ); -}*/ - -BOOST_AUTO_TEST_CASE(lapack_potrf, *boost::unit_test::tolerance(0.00001) ){ - - complex const I{0, 1}; -{ - multi::array A = { - {167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN , 167.381 , 126.746 + 0.0327519*I}, - {NAN , NAN , 167.231 } - }; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) - BOOST_TEST( real(A[1][2]) == 3.78646 ); - BOOST_TEST( imag(A[1][2]) == 0.0170734 ); -// BOOST_TEST( A[2][1] != A[2][1] ); - print(A); -} -{ - multi::cuda::managed::array A = { - {167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN , 167.381 , 126.746 + 0.0327519*I}, - {NAN , NAN , 167.231 } - }; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) - BOOST_TEST( real(A[1][2]) == 3.78646 ); - BOOST_TEST( imag(A[1][2]) == 0.0170734 ); -// BOOST_TEST( A[2][1] != A[2][1] ); -} -{ - multi::cuda::array A = { - {167.413, 126.804 - 0.00143505*I, 125.114 - 0.1485590*I}, - {NAN , 167.381 , 126.746 + 0.0327519*I}, - {NAN , NAN , 167.231 } - }; - using lapack::filling; - using lapack::potrf; - potrf(filling::upper, A); // A is hermitic in upper triangular (implicit below) - multi::array A_copy = A; - print(A_copy); -} - -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/serialization/xml_archive.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/serialization/xml_archive.hpp deleted file mode 100644 index 669aaeb0a7..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/serialization/xml_archive.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXX $0 -o $0x -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2021 -#include "../../utility.hpp" - -#include -#include - -namespace boost{ -namespace multi{ - -template<> -struct archive_traits{ - template static auto make_nvp(char const* name, T& value) -> decltype(auto){ - return boost::serialization::make_nvp(name, value); - } -}; -template<> -struct archive_traits{ - template static auto make_nvp(char const* name, T& value) -> decltype(auto){ - return boost::serialization::make_nvp(name, value); - } -}; - -} // end namespace multi -} // end namespace boost - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi adaptor serialization xml_archive" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../array.hpp" - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_adaptor_serialization_xml_archive){ - BOOST_REQUIRE(true); -} -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/thrust.hpp deleted file mode 100644 index 7cbc0532ad..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust.hpp +++ /dev/null @@ -1,308 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2021-2022 Alfredo A. Correa - -#pragma once - -#include "../array.hpp" - -#include "./thrust/cuda/managed.hpp" - -#include - -#include -#include // ::thrust::cuda::allocator - -//#include -//#include - -#include - -#include // std::copy - -// begin of nvcc trhust 11.5 workaround : https://github.com/NVIDIA/thrust/issues/1629 -namespace thrust { - -template class pointer; -template struct pointer_traits; - -} // end namespace thrust - -namespace std { - -template struct pointer_traits> -: thrust::detail::pointer_traits> { - template - using rebind = typename thrust::detail::pointer_traits>::template rebind::other; -}; - -} // end namespace std -// end of nvcc trhust 11.5 workaround - -namespace boost::multi { - -template -struct pointer_traits<::thrust::pointer> : std::pointer_traits<::thrust::pointer> { - using default_allocator_type = ::thrust::universal_allocator>; -}; - -} // end namespace boost::multi - -namespace boost::multi { - -template -struct allocator_traits<::thrust::mr::stateless_resource_allocator> -: std::allocator_traits<::thrust::mr::stateless_resource_allocator> { - private: - using Alloc = ::thrust::mr::stateless_resource_allocator; - using base = std::allocator_traits; - - public: - using typename base::pointer; - using typename base::size_type; - using typename base::const_void_pointer; - - using base::allocate; - [[nodiscard]] static constexpr auto allocate(Alloc& a, size_type n, const_void_pointer hint) -> pointer { - auto ret = allocator_traits::allocate(a, n); - if(not hint) { - prefetch_to_device(ret, n*sizeof(TT), get_current_device()); - return ret; - } - prefetch_to_device(ret, n*sizeof(TT), get_device(hint)); - return ret; - } - - private: - using device_index = int; - static auto get_current_device() -> device_index { - int device; - switch(cudaGetDevice(&device)) { - case cudaSuccess : break; - case cudaErrorInvalidValue: assert(0); - } - return device; - } - static void prefetch_to_device(const_void_pointer p, size_type byte_count, device_index d) { - switch(cudaMemPrefetchAsync(raw_pointer_cast(p), byte_count, d)) { - case cudaSuccess : break; - case cudaErrorInvalidValue : assert(0); break; - case cudaErrorInvalidDevice: assert(0); break; - } - } - - static auto get_device(const_void_pointer p) -> device_index { - cudaPointerAttributes attr{}; - switch(cudaPointerGetAttributes(&attr, raw_pointer_cast(p))) { - case cudaSuccess: break; - case cudaErrorInvalidDevice: assert(0); break; - case cudaErrorInvalidValue: assert(0); break; - } - assert(attr.type == cudaMemoryTypeManaged); - return attr.device; - } -}; - -} - -// this is important for algorithms to dispatch to the right thrust executor -namespace thrust { - -template struct iterator_system; - -template -struct iterator_system>{ - using type = typename thrust::iterator_system::element_ptr>::type; -}; - -template -struct iterator_system> { - using type = typename thrust::iterator_system::pointer>::type; -}; - -} - -namespace boost::multi { -namespace thrust { - -// defines multi::thrust::device_array -// defines multi::thrust::host_array - -template using device_array = multi::array>; -template using universal_array = multi::array>; -template using host_array = multi::array; - -// defines multi::thrust::device::array -// defines multi::thrust::host ::array -namespace device {template using array = device_array ;} // end namespace device -namespace universal {template using array = universal_array;} // end namespace universal -namespace host {template using array = host_array ;} // end namespace host - -// defines multi::thrust::cuda::array -// defines multi::thrust::cuda::managed::array -namespace cuda { - template using array = multi::array>; - - namespace managed { - template using array = multi::array>; - } // end namespace managed -} // end namespace cuda - -} // end namespace thrust -} // end namespace boost::multi - -namespace boost::multi { - -template -constexpr auto default_allocator_of(::thrust::pointer /*unused*/) { - return ::thrust::cuda::universal_allocator>::value_type>{}; -} - -// copy_n -#if 1 -template -auto copy_n( - boost::multi::elements_iterator_t< Q1* , L1> first, Size count, - boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first -)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { - if constexpr(std::is_trivially_assignable{}) { - if constexpr(L1::dimensionality == 1 and L2::dimensionality == 1) { - if(first.layout().stride() == 1 and d_first.layout().stride() == 1) { - auto s = cudaMemcpy (raw_pointer_cast(d_first.current()), first.current(), sizeof(Q2)* static_cast(count), cudaMemcpyHostToDevice); assert( s == cudaSuccess ); - } else { - auto s = cudaMemcpy2D(raw_pointer_cast(d_first.current()), static_cast(d_first.layout().stride())*sizeof(Q2), first.current(), static_cast(first.layout().stride())*sizeof(Q2), sizeof(Q2), static_cast(count), cudaMemcpyHostToDevice); assert( s == cudaSuccess ); - } - return d_first + count; - } else if constexpr(L1::dimensionality == 2 and L1::dimensionality == 2) { - if(std::get<1>(first.layout().strides()) == 1 and std::get<1>(d_first.layout().strides()) == 1 and count%std::get<1>(first.layout().sizes()) == 0) { - auto s = cudaMemcpy2D(raw_pointer_cast(d_first.current()), static_cast(d_first.layout().stride())*sizeof(Q2), first.current(), static_cast(first.layout().stride())*sizeof(Q2), static_cast(std::get<1>(first.layout().sizes()))*sizeof(Q2), static_cast(count/std::get<1>(first.layout().sizes())), cudaMemcpyHostToDevice); assert( s == cudaSuccess ); - return d_first + count; - } // else fallthrough - } - cudaHostRegister( - const_cast(static_cast(first.base())), - static_cast (first.layout().hull_size()*sizeof(Q1)), - cudaHostRegisterPortable - ); - auto ret = ::thrust::copy_n( - ::thrust::cuda::par, - first, count, d_first - ); - cudaHostUnregister( - const_cast(static_cast(first.base())) - ); - return ret; - } else { - return ::thrust::copy_n(first, count, d_first); - } - return d_first + count; -} - -template -auto copy_n( - boost::multi::elements_iterator_t<::thrust::pointer, L1> first, Size count, - boost::multi::elements_iterator_t< Q2* , L2> d_first -)-> boost::multi::elements_iterator_t< Q2* , L2> { - if constexpr(std::is_trivially_assignable{}) { - if constexpr(L1::dimensionality == 1 and L2::dimensionality == 1) { - if(first.layout().stride() == 1 and d_first.layout().stride() == 1) { - auto s = cudaMemcpy ( d_first.current() , raw_pointer_cast(first.current()), sizeof(Q2)* static_cast(count), cudaMemcpyDeviceToHost); assert( s == cudaSuccess ); - } else { - auto s = cudaMemcpy2D( d_first.current() , static_cast(d_first.layout().stride())*sizeof(Q2), raw_pointer_cast(first.current()), static_cast(first.layout().stride())*sizeof(Q2), sizeof(Q2), static_cast(count), cudaMemcpyDeviceToHost); assert( s == cudaSuccess ); - } - return d_first + count; - } else if constexpr(L1::dimensionality == 2 and L1::dimensionality == 2) { - if(std::get<1>(first.layout().strides()) == 1 and std::get<1>(d_first.layout().strides()) == 1 and count%std::get<1>(first.layout().sizes()) == 0) { - auto s = cudaMemcpy2D( d_first.current() , static_cast(d_first.layout().stride())*sizeof(Q2), raw_pointer_cast(first.current()), static_cast(first.layout().stride())*sizeof(Q2), static_cast(std::get<1>(first.layout().sizes()))*sizeof(Q2), static_cast(count/std::get<1>(first.layout().sizes())), cudaMemcpyDeviceToHost); assert( s == cudaSuccess ); - return d_first + count; - } - } - cudaHostRegister( - const_cast(static_cast(d_first.base())), - static_cast (d_first.layout().hull_size()*sizeof(Q1)), - cudaHostRegisterPortable - ); - auto ret = ::thrust::copy_n( - ::thrust::cuda::par, - first, count, d_first - ); - cudaHostUnregister( - const_cast(static_cast(d_first.base())) - ); - return ret; - } else { - return ::thrust::copy_n(first, count, d_first); - } - return d_first + count; -} - -template -auto uninitialized_copy_n( - boost::multi::elements_iterator_t< Q1* , L1> first, Size count, - boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first -)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { - if constexpr(std::is_trivially_assignable{}) { - return boost::multi::copy_n(first, count, d_first); - } else { - return ::thrust::uninitialized_copy_n(first, count, d_first); - } -} - -template -auto uninitialized_copy_n( - boost::multi::elements_iterator_t<::thrust::pointer, L1> first, Size count, - boost::multi::elements_iterator_t< Q2* , L2> d_first -)-> boost::multi::elements_iterator_t< Q2* , L2> { - if constexpr(std::is_trivially_assignable{}) { - return boost::multi::copy_n(first, count, d_first); - } else { - return ::thrust::uninitialized_copy_n(first, count, d_first); - } -} - -template -auto copy( - boost::multi::elements_iterator_t< Q1* , L1> first, - boost::multi::elements_iterator_t< Q1* , L1> last , - boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first -)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { - return boost::multi::copy_n(first, last - first, d_first); -} - -template -auto copy( - boost::multi::elements_iterator_t<::thrust::pointer, L1> first, - boost::multi::elements_iterator_t<::thrust::pointer, L1> last , - boost::multi::elements_iterator_t< Q2* , L2> d_first -)-> boost::multi::elements_iterator_t< Q2* , L2> { - return boost::multi::copy_n(first, last - first, d_first); -} - -template -auto uninitialized_copy( - boost::multi::elements_iterator_t< Q1* , L1> first, - boost::multi::elements_iterator_t< Q1* , L1> last , - boost::multi::elements_iterator_t<::thrust::pointer, L2> d_first -)-> boost::multi::elements_iterator_t<::thrust::pointer, L2> { - if constexpr(std::is_trivially_assignable{}) { - return boost::multi::copy(first, last, d_first); - } else { - return ::thrust::uninitialized_copy(first, last, d_first); - } -} - -template -auto uninitialized_copy( - boost::multi::elements_iterator_t<::thrust::pointer, L1> first, - boost::multi::elements_iterator_t<::thrust::pointer, L1> last , - boost::multi::elements_iterator_t< Q2* , L2> d_first -)-> boost::multi::elements_iterator_t< Q2* , L2> { - if constexpr(std::is_trivially_assignable{}) { - return boost::multi::copy(first, last, d_first); - } else { - return ::thrust::uninitialized_copy(first, last, d_first); - } -} - -#endif - -} // end namespace boost::multi diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/CMakeLists.txt b/external_codes/boost_multi/multi/include/multi/adaptors/thrust/CMakeLists.txt deleted file mode 100644 index 7b63bd3227..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -cmake_minimum_required(VERSION 3.11) -project( - boost-multi-adaptor-cuda - VERSION 0.1 - LANGUAGES CXX -) - -set(CMAKE_VERBOSE_MAKEFILE ON) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -find_package(Boost REQUIRED COMPONENTS unit_test_framework) - -if(ENABLE_CUDA OR DEFINED CXXCUDA) - enable_language(CUDA) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++17 --extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"") - - enable_testing() - list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") # needs cmake 3.1 - - add_subdirectory(test) -endif() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/managed.cu b/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/managed.cu deleted file mode 100644 index 755bbb4cbb..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/cuda/test/managed.cu +++ /dev/null @@ -1,60 +0,0 @@ -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust" -#define BOOST_TEST_DYN_LINK -#include - -#include "../../cuda/managed.hpp" - -namespace multi = boost::multi; - -void set_one(double* p){ - *p = 1.; -} - -void set_two_gpu(thrust::cuda::pointer p){ - *p = 2.; -} - -void set_three_ref(double& p){ - p = 3.; -} - -template::value_type, class = std::enable_if_t{} and std::is_convertible>{}> > -void some_fun(Pointer p){} - -template::value_type, class = std::enable_if_t{} and std::is_convertible{}> > -void some_other_fun(Pointer p){} - -template class prio : std::conditional_t, std::false_type>{}; - -template::value_type, std::enable_if_t{} and std::is_convertible>{}, int> =0> -int overload_aux(Pointer p, prio<0>){return 0;} - -template::value_type, std::enable_if_t{} and std::is_convertible{}, int> =0> -int overload_aux(Pointer p, prio<1>){return 1;} - -template int overload(Pointer p){return overload_aux(p, prio<1>{});} - -BOOST_AUTO_TEST_CASE(vector){ - - multi::thrust::cuda::managed::allocator alloc; - multi::thrust::cuda::managed::pointer p = alloc.allocate(100); - - p[17] = 3.; - BOOST_TEST_REQUIRE( p[17] == 3. ); - - set_one(p); - BOOST_TEST_REQUIRE( p[0] == 1. ); - - set_two_gpu(p); - BOOST_TEST_REQUIRE( p[0] == 2. ); - - set_three_ref( p[1] ); - BOOST_TEST_REQUIRE( p[1] == 3. ); - - some_fun(p); - - BOOST_TEST_REQUIRE(overload(p) == 1); - - alloc.deallocate(p, 100); - -} diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/CMakeLists.txt b/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/CMakeLists.txt deleted file mode 100644 index 4711351142..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/CMakeLists.txt +++ /dev/null @@ -1,81 +0,0 @@ -# -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;autowrap:nil;-*- -cmake_minimum_required(VERSION 3.11) -project( - boost-multi-adaptor-cuda-thrust-test - VERSION 0.1 - LANGUAGES CXX CUDA -) - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda --Werror=cross-execution-space-call -Xcudafe \"--display_error_number\"") - -find_package(Boost REQUIRED COMPONENTS unit_test_framework) - -enable_testing() - -find_program(MEMORYCHECK_COMMAND valgrind) -set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --error-exitcode=1") -include(CTest) - -# TODO(correaa) this will be simpler in Cmake 3.24 https://stackoverflow.com/a/72332793/225186 -if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - # temporary workaround to detect CUDA arch, for between the deprecation of FindCUDA in CMake 3.10 and the availability of FindCudaToolkit in CMake 3.17 # https://stackoverflow.com/a/68223399/225186 - include(FindCUDA/select_compute_arch) - cuda_detect_installed_gpus(INSTALLED_GPU_CCS_1) - string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2) - string( - REPLACE " " - ";" - INSTALLED_GPU_CCS_3 - "${INSTALLED_GPU_CCS_2}" - ) - string( - REPLACE "." - "" - CUDA_ARCH_LIST - "${INSTALLED_GPU_CCS_3}" - ) - set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) - set_property(GLOBAL PROPERTY CUDA_ARCHITECTURES "${CUDA_ARCH_LIST}") -endif() - -if("30;35;50;52;60;61;70;70+PTX;75;75+PTX" STREQUAL "${CUDA_ARCH_LIST}" OR "" STREQUAL "${CUDA_ARCH_LIST}") - message(STATUS "gpu not detected ${CUDA_ARCH_LIST}") - set(TEST_SRCS) -else() - message(STATUS "gpu detected: ${CUDA_ARCH_LIST}") -endif() - -find_package(Boost REQUIRED COMPONENTS unit_test_framework timer) - -set(TEST_SRCS - array.cu - speed.cu - speed_algo.cu - universal.cu -) - -foreach(TEST_FILE ${TEST_SRCS}) - set(TEST_EXE "${TEST_FILE}.x") - add_executable(${TEST_EXE} ${TEST_FILE}) - if(ENABLE_CUDA OR DEFINED CXXCUDA) - set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) - endif() - - target_include_directories(${TEST_EXE} PUBLIC ../../../../../include) - - target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_PP_VARIADICS") - target_compile_definitions(${TEST_EXE} PRIVATE ${Boost_DEFINITIONS}) - target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) - target_link_libraries(${TEST_EXE} PRIVATE ${Boost_LIBRARIES}) - target_link_directories(${TEST_EXE} PRIVATE ${Boost_LIBRARY_DIRS}) - if("30;35;50;52;60;61;70;70+PTX;75;75+PTX" STREQUAL "${CUDA_ARCH_LIST}" OR "" STREQUAL "${CUDA_ARCH_LIST}" OR NOT ENABLE_GPU) - message(STATUS "gpu not detected ${CUDA_ARCH_LIST}") - else() - message(STATUS "gpu detected: ${CUDA_ARCH_LIST}") - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) - endif() - if(ENABLE_GPU) - message(STATUS "gpu run forced") - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) - endif() -endforeach() diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed.cu b/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed.cu deleted file mode 100644 index 25d4e1bae4..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/thrust/test/speed.cu +++ /dev/null @@ -1,82 +0,0 @@ -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA thrust universal copy and assignment" -#include - -#include -#include - -#include - -#include - - -namespace multi = boost::multi; -using complex = thrust::complex; - -typedef boost::mpl::list test_types; - -BOOST_AUTO_TEST_CASE_TEMPLATE(thrust_universal_speed, T, test_types) { - - auto const n = 8000; - - multi::array> src({n, n}); - multi::array> dst(extensions(src)); - - auto const threshold = 0.2; - - auto const size = src.num_elements()*sizeof(T)/1e9; - - auto const dummy = std::invoke([&]{ - auto start_time = std::chrono::high_resolution_clock::now(); - cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements()*sizeof(T), cudaMemcpyDeviceToDevice); - std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; - auto rate = size/time.count(); - std::cout<<"memcpy rate = "<< rate <<" GB/s (ratio = 1)\n"; - return rate; - }); - - auto const memcpy_rate = std::invoke([&]{ - auto start_time = std::chrono::high_resolution_clock::now(); - cudaMemcpy(raw_pointer_cast(dst.data_elements()), raw_pointer_cast(src.data_elements()), src.num_elements()*sizeof(T), cudaMemcpyDeviceToDevice); - std::chrono::duration time = std::chrono::high_resolution_clock::now() - start_time; - auto rate = size/time.count(); - std::cout<<"memcpy rate = "<< rate <<" GB/s (ratio = 1)\n"; - return rate; - }); - - { //cctor - auto tick = std::chrono::high_resolution_clock::now(); - - auto dst2 = src; - - std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; - double rate = size/time.count(); - double ratio = rate/memcpy_rate; - - std::cout<<"cctor rate = "<< rate <<" GB/s (ratio = "<< ratio <<")\n"; - BOOST_TEST(ratio >= threshold); - } - { //assign - auto tick = std::chrono::high_resolution_clock::now(); - - dst = src; - - std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; - double rate = size/time.count(); - double ratio = rate/memcpy_rate; - - std::cout << "assign rate = "<< rate <<" GB/s (ratio = "<< ratio <<")\n"; - BOOST_TEST(ratio >= threshold); - } - { //subarray assign - auto tick = std::chrono::high_resolution_clock::now(); - - dst({0, n - 2}, {0, n - 2}) = src({2, n}, {2, n}); - - std::chrono::duration time = std::chrono::high_resolution_clock::now() - tick; - double rate = size/time.count(); - double ratio = rate/memcpy_rate; - std::cout << "subasssign rate = "<< rate <<" GB/s (ratio = "<< ratio << ")\n"; - BOOST_TEST(ratio >= threshold); - } -} - diff --git a/external_codes/boost_multi/multi/include/multi/adaptors/totalview.hpp b/external_codes/boost_multi/multi/include/multi/adaptors/totalview.hpp deleted file mode 100644 index 938b1b2710..0000000000 --- a/external_codes/boost_multi/multi/include/multi/adaptors/totalview.hpp +++ /dev/null @@ -1,150 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0x -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020-2021 - -#ifndef MULTI_ADAPTORS_TOTALVIEW_HPP -#define MULTI_ADAPTORS_TOTALVIEW_HPP - -#include // TODO remove -#include - -#include -#include -//#include - -#include "../adaptors/../array.hpp" - -#include "tv_data_display.h" // you have to find the directory with the totalview include files -#include "../src/tv_data_display.c" // you have to find the directory with the totalview include files -// ^^^^^^^^^^^ this can produce problemas later with linking -// https://docs.roguewave.com/totalview/2018.1/html/index.html#page/Reference_Guide%2FCompilingAndLinkingTV_data_display.html%23ww1738654 - -template constexpr char const* pretty_name = "unknown"; - -template<> constexpr char const* pretty_name = "double"; -template<> constexpr char const* pretty_name = "float"; - -template<> constexpr char const* pretty_name> = "std::complex"; -template<> constexpr char const* pretty_name > = "std::complex"; - -template<> constexpr char const* pretty_name = "long"; -template<> constexpr char const* pretty_name = "int"; - -template -#ifdef __GCC__ -__attribute__((used)) -#endif -int TV_ttf_display_type(boost::multi::array const* mad1P) { - if(not mad1P->is_empty()) { - std::array tname; // char tname[128]; - snprintf(tname.data(), tname.size(), "%s[%ld]", pretty_name, (long)mad1P->size());//, (long)mad1P->stride()); - int result = TV_ttf_add_row("elements", tname.data(), mad1P->origin()); - if (result != 0){ - fprintf(stderr, "TV_ttf_add_row returned error %d\n", result); - return TV_ttf_format_failed; - } - } - return TV_ttf_format_ok_elide; -} - -template -#ifdef __GCC__ -__attribute__((used)) -#endif -int TV_ttf_display_type(boost::multi::array const* mad2P) { - if(not mad2P->is_empty()) { - std::arra tname; // char tname[128]; - using std::get; - snprintf(tname.data(), tname.size(), "%s[%ld][%ld]", pretty_name, (long)get<0>(mad2P->sizes()), (long)get<1>(mad2P->sizes()));//, (long)mad1P->stride()); - int result = TV_ttf_add_row("elements", tname.data(), mad2P->origin()); - - if(result != 0) { - fprintf(stderr, "TV_ttf_add_row returned error %d\n", result); - return TV_ttf_format_failed; - } - } - return TV_ttf_format_ok_elide; -} - -template -#ifdef __GCC__ -__attribute__((used)) -#endif -int TV_ttf_display_type(boost::multi::basic_array const* mad2P) { - boost::multi::array const value = *mad2P; - return TV_ttf_display_type(std::addressof(value)); -} - -template -#ifdef __GCC__ -__attribute__((used)) -#endif -int TV_ttf_display_type(boost::multi::basic_array const* mad2P) { - boost::multi::array const value = *mad2P; - return TV_ttf_display_type(std::addressof(value)); -} - -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type>(boost::multi::array, 1> const*); -template int TV_ttf_display_type >(boost::multi::array , 1> const*); -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type(boost::multi::array const*); - -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type>(boost::multi::array, 2> const*); -template int TV_ttf_display_type >(boost::multi::array , 2> const*); -template int TV_ttf_display_type(boost::multi::array const*); -template int TV_ttf_display_type(boost::multi::array const*); - -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type>(boost::multi::basic_array, 1> const*); -template int TV_ttf_display_type >(boost::multi::basic_array , 1> const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); - -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type>(boost::multi::basic_array, 2> const*); -template int TV_ttf_display_type >(boost::multi::basic_array , 2> const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); -template int TV_ttf_display_type(boost::multi::basic_array const*); - -#if defined(__INCLUDE_LEVEL__) and (not __INCLUDE_LEVEL__) - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi TotalView adaptor" -#define BOOST_TEST_DYN_LINK -#include - -#include "../array.hpp" -#include "../utility.hpp" - -#include -#include -#include // iota -#include // transform - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(multi_1d) { - - std::vector V = {10, 20, 30}; - - multi::array const A = {1, 2, 3, 4, 5}; - auto&& Apart = A({1, 3}); - - multi::array const B = {{1, 2, 3}, {4, 5, 6}}; - - double sum = 0; - for(auto i : A.extension()) sum += A[i]; - - BOOST_REQUIRE( sum == 15. ); - BOOST_REQUIRE( B[1][0] == 4. ); -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/array.hpp b/external_codes/boost_multi/multi/include/multi/array.hpp deleted file mode 100644 index 82542fe2d5..0000000000 --- a/external_codes/boost_multi/multi/include/multi/array.hpp +++ /dev/null @@ -1,1080 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#ifndef MULTI_ARRAY_HPP -#define MULTI_ARRAY_HPP - -#include "./array_ref.hpp" -#include "./config/NO_UNIQUE_ADDRESS.hpp" - -#include "./detail/adl.hpp" -#include "./detail/memory.hpp" -#include "./detail/type_traits.hpp" - -// #include "./memory/allocator.hpp" - -#include // for copy -#include // for allocator_traits -#include // needed by a deprecated function -#include // for move - -namespace boost::multi { - -template -struct array_allocator { - using allocator_type = Allocator; - - private: - MULTI_NO_UNIQUE_ADDRESS allocator_type alloc_; - - using allocator_traits = typename multi::allocator_traits; - using size_type_ = typename allocator_traits::size_type; - using pointer_ = typename allocator_traits::pointer; - - protected: - auto alloc() & -> allocator_type & {return alloc_;} - auto alloc() const& -> allocator_type const& {return alloc_;} - - array_allocator() = default; - explicit array_allocator(allocator_type const& alloc) : alloc_{alloc} {} - - auto allocate(size_type_ n) -> pointer_ { - return n?allocator_traits::allocate(alloc_, n):pointer_{nullptr}; - } - auto allocate(size_type_ n, typename allocator_traits::const_void_pointer hint) -> pointer_ { - return n?allocator_traits::allocate(alloc_, n, hint):pointer_{nullptr}; - } - - auto uninitialized_fill_n(pointer_ first, size_type_ count, typename allocator_traits::value_type value) { - return adl_alloc_uninitialized_fill_n(alloc_, first, count, value); - } - template - auto uninitialized_copy_n(It first, size_type count, pointer_ d_first) { - if constexpr(std::is_trivial_v::value_type>) { - return adl_copy_n( first, count, d_first); - } else { - return adl_alloc_uninitialized_copy_n(alloc_, first, count, d_first); - } - } - template - auto destroy_n(It first, size_type n) {return adl_alloc_destroy_n(this->alloc(), first, n);} - - public: - constexpr auto get_allocator() const -> allocator_type {return alloc_;} -}; - -template> -struct static_array // NOLINT(fuchsia-multiple-inheritance) : multiple inheritance used for composition -: protected array_allocator -, public array_ref::pointer> -, boost::multi::random_iterable> { - protected: - using array_alloc = array_allocator; - - public: - static_assert( std::is_same::value_type, typename static_array::element>{}, - "allocator value type must match array value type"); - - using array_alloc::get_allocator; - using allocator_type = typename array_allocator::allocator_type; - using decay_type = array; - using layout_type = typename array_ref::pointer>::layout_type; - - using ref = array_ref::template rebind_alloc>::pointer>; - - protected: - using alloc_traits = typename multi::allocator_traits; - - auto uninitialized_value_construct() { - return adl_alloc_uninitialized_value_construct_n(static_array::alloc(), this->base_, this->num_elements()); - } - - auto uninitialized_default_construct_if(std::true_type /*true*/ ) {} - auto uninitialized_default_construct_if(std::false_type/*false*/) { - return adl_alloc_uninitialized_default_construct_n(static_array::alloc(), this->base_, this->num_elements()); - } - - auto uninitialized_default_construct() { - return uninitialized_default_construct_if(multi::is_trivially_default_constructible{}); - } - - template auto uninitialized_copy_elements(It first) { - return array_alloc::uninitialized_copy_n(first, this->num_elements(), this->data_elements()); - } - - void destroy() { - if constexpr(not std::is_trivially_destructible_v) { - array_alloc::destroy_n(this->data_elements(), this->num_elements()); - } - } - - void allocate() {this->base_ = array_alloc::allocate(static_cast::size_type>(static_array::num_elements()));} - - public: - using value_type = typename std::conditional< - (D > 1), // this parenthesis is needed - array, - typename static_array::element - >::type; - - using typename ref::size_type; - using typename ref::difference_type; - explicit static_array(allocator_type const& alloc) : array_alloc{alloc} {} - - using ref::operator(); -// HD constexpr auto operator()() & -> decltype(auto) {return ref::operator()();} - HD constexpr auto operator()() && -> decltype(auto) {return ref::element_moved();} -// HD constexpr auto operator()() const& -> decltype(auto) {return ref::operator()();} - - using ref::take; - constexpr auto take(difference_type n) && -> decltype(auto) {return ref::take(n).element_moved();} - - using ref::drop; - constexpr auto drop(difference_type n) && -> decltype(auto) {return ref::drop(n).element_moved();} - - protected: - static_array(static_array&& other, allocator_type const& alloc) noexcept // 6b TODO(correaa) move from array only - : array_alloc{alloc} // TODO(correaa) : handle allocation propagation here - , ref{other.base_, other.extensions()} { - other.layout_mutable() = {}; - // other.ref::layout_t::operator=({}); - other.base_ = nullptr; - } - - static_array(static_array&& other) noexcept - : static_array(std::move(other), allocator_type{}) {} // 6b - - public: - template>::difference_type> // decltype(std::distance(std::declval(), std::declval()), *std::declval())> - // analogous to std::vector::vector (5) https://en.cppreference.com/w/cpp/container/vector/vector - static_array(It first, It last, allocator_type const& alloc) - : array_alloc{alloc} - , ref { - array_alloc::allocate(static_cast::size_type>(layout_type {index_extension {adl_distance(first, last)}*multi::extensions(*first)}.num_elements())), - index_extension {adl_distance(first, last)}*multi::extensions(*first) - } { - adl_alloc_uninitialized_copy(static_array::alloc(), first, last, ref::begin()); - } - - template>::difference_type> // decltype(std::distance(std::declval(), std::declval()), *std::declval())> - // analogous to std::vector::vector (5) https://en.cppreference.com/w/cpp/container/vector/vector - static_array(It first, It last) : static_array(first, last, allocator_type{}) {} - - template< - class Range, class = std::enable_if_t>{}>, - class = decltype(/*static_array*/(std::declval().begin() - std::declval().end())), // instantiation of static_array here gives a compiler error in 11.0, partially defined type? - class = std::enable_if_t{}> - > - // cppcheck-suppress noExplicitConstructor ; because I want to use equal for lazy assigments form range-expressions // NOLINTNEXTLINE(runtime/explicit) - static_array(Range const& rng) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - : static_array{rng.begin(), rng.end()} {} - - template - auto uninitialized_fill_elements(TT const& value) { - return array_alloc::uninitialized_fill_n(this->data_elements(), this->num_elements(), value); - } - - // vvv TODO(correaa) : check if really necessary - template - static_array(array_ref const& other, allocator_type const& alloc) - : array_alloc{alloc} - , ref{ - array_alloc::allocate(static_cast::size_type>(other.num_elements())), - other.extensions() - } { - if constexpr(std::is_trivial_v) { - adl_copy_n( other.data_elements(), other.num_elements(), this->data_elements()); - } else { - adl_alloc_uninitialized_copy_n(static_array::alloc(), other.data_elements(), other.num_elements(), this->data_elements()); - } - } - - template - // cppcheck-suppress noExplicitConstructor ; because argument can be well-represented // NOLINTNEXTLINE(runtime/explicit) - static_array(array_ref const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - : static_array(other, allocator_type{}) {} - // ^^^ TODO(correaa) : check if really necessary - - static_array(typename static_array::extensions_type extensions, typename static_array::element const& elem, allocator_type const& alloc) // 2 - : array_alloc{alloc} - , ref{array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), nullptr), extensions} { - array_alloc::uninitialized_fill_n(this->data_elements(), static_cast::size_type>(this->num_elements()), elem); - } - - template{} and (D == 0), int> = 0> - explicit static_array(Element const& elem, allocator_type const& alloc) - : static_array(typename static_array::extensions_type{}, elem, alloc) {} - - static_array(typename static_array::extensions_type extensions, typename static_array::element const& elem) // 2 - : array_alloc{} - , ref{array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), nullptr), extensions} { - array_alloc::uninitialized_fill_n(this->base(), static_cast::size_type>(this->num_elements()), elem); - } - - explicit static_array(typename static_array::extensions_type extensions, typename allocator_traits::const_void_pointer hint) - : array_alloc{} - , ref{array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), hint), extensions} {} - - template{}>> - explicit static_array(typename static_array::index_extension const& extension, ValueType const& value, allocator_type const& alloc) // 3 - = delete; - - template{}>> - explicit static_array(typename static_array::index_extension const& extension, ValueType const& value) // 3 - = delete; - -// analgous to std::vector::vector ((4)) https://en.cppreference.com/w/cpp/container/vector/vector - explicit static_array(typename static_array::extensions_type extensions, allocator_type const& alloc) - : array_alloc{alloc} - , ref{array_alloc::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements())), extensions} { - uninitialized_default_construct(); - } - - explicit static_array(typename static_array::extensions_type extensions) - : static_array(extensions, allocator_type{}) {} - - template::element>{}>, - class = decltype(adl_copy(std::declval const&>().begin(), std::declval const&>().end(), std::declval())) - > - static_array(multi::basic_array const& other, allocator_type const& alloc) - : static_array(other.extensions(), alloc) { - adl_uninitialized_copy(other.begin(), other.end(), this->begin()); // TODO(correaa): call this conditionally on T properties - } - - template::element>{}>, - class = decltype(adl_copy(std::declval const&>().begin(), std::declval const&>().end(), std::declval())) - > - // cppcheck-suppress noExplicitConstructor ; because argument can be well-represented // NOLINTNEXTLINE(runtime/explicit) - static_array(multi::basic_array const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - : static_array(other, allocator_type{}) {} - - template - explicit static_array(array_ref&& other) - : array_alloc{} - , ref{array_alloc::allocate(other.num_elements()), other.extensions()} { - static_array::uninitialized_copy_elements(std::move(other).data_elements()); - } - - static_array(static_array const& other) // 5b - : array_alloc{allocator_traits::select_on_container_copy_construction(other.alloc())} - , ref{array_alloc::allocate(static_cast::size_type>(other.num_elements()), other.data_elements()), extensions(other)} { - uninitialized_copy_elements(other.data_elements()); - } - - // cppcheck-suppress noExplicitConstructor ; to allow assignment-like construction of nested arrays - static_array(std::initializer_list::value_type> values) - : static_array{static_array(values.begin(), values.end())} {} // construct all with default constructor and copy to special memory at the end - - static_array( - std::initializer_list::value_type> values, - allocator_type const& alloc - ) - : static_array{static_array(values.begin(), values.end()), alloc} {} - - template - constexpr explicit static_array(TT(&array)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backward compatibility - : static_array(std::begin(array), std::end(array)) {} - - // template static auto distance(It a, It b) { - // using std::distance; - // return distance(a, b); - // } - - constexpr auto begin() const& -> typename static_array::const_iterator {return ref:: begin();} - constexpr auto end () const& -> typename static_array::const_iterator {return ref:: end ();} - - constexpr auto begin() && -> typename static_array:: move_iterator {return ref::mbegin();} - constexpr auto end () && -> typename static_array:: move_iterator {return ref::mend ();} - - constexpr auto begin() & -> typename static_array:: iterator {return ref:: begin();} - constexpr auto end () & -> typename static_array:: iterator {return ref:: end ();} - - constexpr auto operator[](index idx) const& -> typename static_array::const_reference {return ref::operator[](idx);} - constexpr auto operator[](index idx) && -> decltype(auto) { - if constexpr(D == 1) {return std::move(ref::operator[](idx) );} - else {return ref::operator[](idx).moved();} // NOLINT(readability/braces) - } - constexpr auto operator[](index idx) & -> typename static_array:: reference {return ref::operator[](idx);} - - protected: - void deallocate() { - if(this->num_elements()) { - alloc_traits::deallocate(this->alloc(), this->base_, static_cast(this->num_elements())); - } - } - void clear() noexcept { - this->destroy(); - deallocate(); - this->layout_mutable() = {}; - } - template - constexpr auto reindex(Indices... idxs) & -> static_array& { - static_array::layout_t::reindex(idxs...); - return *this; - } - template - constexpr auto reindex(Indices... idxs) && -> static_array&& { - reindex(idxs...); - return std::move(*this); - } - - public: - static_array() = default; - ~static_array() noexcept {destroy(); deallocate();} - - using element_const_ptr = typename std::pointer_traits::template rebind; - using element_move_ptr = multi::move_ptr; - - using reference = typename std::conditional< - (D > 1), - basic_array, - typename std::conditional< - D == 1, - typename std::iterator_traits::reference, - void - >::type - >::type; - using const_reference = typename std::conditional< - (D > 1), - basic_array, // TODO(correaa) should be const_reference, but doesn't work witn rangev3? - typename std::conditional< - D == 1, - decltype(*std::declval()), - void - >::type - >::type; - - using iterator = multi::array_iterator; - using const_iterator = multi::array_iterator; - - friend - #if not defined(__NVCC__) and not defined(__INTEL_COMPILER) - constexpr - #endif - auto get_allocator(static_array const& self) -> allocator_type {return self.get_allocator();} - - HD constexpr auto data_elements() const& -> element_const_ptr {return this->base_;} - HD constexpr auto data_elements() & -> typename static_array::element_ptr {return this->base_;} - HD constexpr auto data_elements() && -> typename static_array::element_move_ptr {return std::make_move_iterator(this->base_);} - friend constexpr auto data_elements(static_array const& self) {return self .data_elements();} - friend constexpr auto data_elements(static_array & self) {return self .data_elements();} - friend constexpr auto data_elements(static_array && self) {return std::move(self).data_elements();} - - constexpr auto base() & -> typename static_array::element_ptr {return ref::base();} - constexpr auto base() const& -> typename static_array::element_const_ptr {return typename static_array::element_const_ptr{ref::base()};} - friend constexpr auto base(static_array & self) -> typename static_array::element_ptr {return self.base();} - friend constexpr auto base(static_array const& self) -> typename static_array::element_const_ptr {return self.base();} - - constexpr auto origin() & -> typename static_array::element_ptr {return ref::origin();} - constexpr auto origin() const& -> typename static_array::element_const_ptr {return ref::origin();} - friend constexpr auto origin(static_array & self) -> typename static_array::element_ptr {return self.origin();} - friend constexpr auto origin(static_array const& self) -> typename static_array::element_const_ptr {return self.origin();} - - private: - constexpr auto rotated_aux() const { - typename static_array::layout_t new_layout = this->layout(); - new_layout.rotate(); - return basic_array{new_layout, this->base_}; - } - - public: - constexpr auto rotated() const& {return std::move(*this).rotated_aux();} - constexpr auto rotated() & {return std::move(*this).rotated_aux();} - constexpr auto rotated() && {return std::move(*this).rotated_aux();} - - friend constexpr auto rotated(static_array& self) -> decltype(auto) {return self.rotated();} - friend /*constexpr*/ auto rotated(static_array const& self) -> decltype(auto) {return self.rotated();} - - constexpr auto unrotated() const& { - typename static_array::layout_t new_layout = this->layout(); - new_layout.unrotate(); - return basic_array{new_layout, this->base_}; - } - constexpr auto unrotated() & { - typename static_array::layout_t new_layout = this->layout(); - new_layout.unrotate(); - return basic_array{new_layout, this->base_}; - } - -// constexpr auto unrotated() const& {return unrotated(1);} -// constexpr auto unrotated() & {return unrotated(1);} - - friend constexpr auto unrotated(static_array & self) -> decltype(auto) {return self.unrotated();} - friend constexpr auto unrotated(static_array const& self) -> decltype(auto) {return self.unrotated();} - - template - auto operator=(multi::basic_array const& other) -> static_array& { - ref::operator=(other); // TODO(correaa) : protect for self assigment - return *this; - } - auto operator=(static_array const& other) & -> static_array& { - if(std::addressof(other) == this) {return *this;} // cert-oop54-cpp - assert( extensions(other) == static_array::extensions() ); - if(&other == this) {return *this;} // lints (cert-oop54-cpp) : handle self-assignment properly - adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); - return *this; - } - constexpr auto operator=(static_array&& other) noexcept -> static_array& { // lints (cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - assert( extensions(other) == static_array::extensions() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert - adl_move(other.data_elements(), other.data_elements() + other.num_elements(), this->data_elements()); // there is no std::move_n algorithm - return *this; - } - template - auto operator=(static_array const& other) & -> static_array& { - assert( extensions(other) == static_array::extensions() ); - adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); - return *this; - } - constexpr explicit operator basic_array()& { - return this->template static_array_cast(*this); - } - - template - void serialize(Archive& arxiv, const unsigned int version) { - ref::serialize(arxiv, version); - } -}; - -template -struct static_array // NOLINT(fuchsia-multiple-inheritance) : design -: protected array_allocator -, public array_ref::allocator_type>::pointer> { - static_assert( std::is_same::value_type, typename static_array::element>{}, - "allocator value type must match array value type"); - - private: - using array_alloc = array_allocator; - - public: - // NOLINTNEXTLINE(runtime/operator) - auto operator&() && -> static_array * = delete; // NOLINT(google-runtime-operator) : delete to avoid taking address of temporary - // NOLINTNEXTLINE(runtime/operator) - auto operator&() & -> static_array * {return this;} // NOLINT(google-runtime-operator) : override from base - // NOLINTNEXTLINE(runtime/operator) - auto operator&() const& -> static_array const* {return this;} // NOLINT(google-runtime-operator) : override from base - - using array_alloc::get_allocator; - using allocator_type = typename static_array::allocator_type; - - protected: - using alloc_traits = typename multi::allocator_traits; - using ref = array_ref::template rebind_alloc>::pointer>; - - auto uninitialized_value_construct_if_not(std::true_type /*true */) {} - auto uninitialized_value_construct_if_not(std::false_type/*false*/) { - return adl_alloc_uninitialized_value_construct_n(static_array::alloc(), this->base_, this->num_elements()); - } - auto uninitialized_value_construct() { - uninitialized_value_construct_if_not(std::is_trivially_default_constructible{}); - } - - template auto uninitialized_copy(It first) {return adl_alloc_uninitialized_copy_n(this->alloc(), first, this->num_elements(), this->data_elements());} - template - auto uninitialized_move(It first) { - return adl_alloc_uninitialized_move_n(this->alloc(), first, this->num_elements(), this->data_elements()); - } - void destroy() {array_alloc::destroy_n(this->data_elements(), this->num_elements());} - - public: - using typename ref::value_type; - using typename ref::size_type; - using typename ref::difference_type; - constexpr explicit static_array(allocator_type const& alloc) : array_alloc{alloc} {} - - protected: - constexpr static_array(static_array&& other, allocator_type const& alloc) // 6b - : array_alloc{alloc} - , ref{other.base_, other.extensions()} { - other.ref::layout_t::operator=({}); - } - - public: - using ref::operator==; - using ref::operator!=; - - static_array(typename static_array::extensions_type extensions, typename static_array::element const& elem, allocator_type const& alloc) // 2 - : array_alloc{alloc} - , ref(static_array::allocate(typename static_array::layout_t{extensions}.num_elements()), extensions) { - uninitialized_fill(elem); - } - - static_array(typename static_array::element_type const& elem, allocator_type const& alloc) - : static_array(typename static_array::extensions_type{}, elem, alloc) {} - - auto uninitialized_fill(typename static_array::element const& elem) { - array_alloc::uninitialized_fill_n( - this->base_, - static_cast::size_type>(this->num_elements()), - elem - ); - } - - static_array( - typename static_array::extensions_type const& extensions, - typename static_array::element const& elem - ) // 2 - : array_alloc{} - , ref(static_array::allocate(static_cast::size_type>(typename static_array::layout_t{extensions}.num_elements()), nullptr), extensions) { - uninitialized_fill(elem); - } - - static_array() : static_array(multi::iextensions<0>{}) {} - - explicit static_array(typename static_array::element const& elem) // 2 - : static_array(multi::iextensions<0>{}, elem) {} - - template{}>> - explicit static_array(typename static_array::index_extension const& extension, ValueType const& value, allocator_type const& alloc) // 3 - : static_array(extension*extensions(value), alloc) { - using std::fill; fill(this->begin(), this->end(), value); - } - template{}>> - explicit static_array(typename static_array::index_extension const& extension, ValueType const& value) // 3 // TODO(correaa) : call other constructor (above) - : static_array(extension*extensions(value)) { - using std::fill; fill(this->begin(), this->end(), value); - } - - explicit static_array(typename static_array::extensions_type const& extensions, allocator_type const& alloc) // 3 - : array_alloc{alloc} - , ref{static_array::allocate(typename static_array::layout_t{extensions}.num_elements()), extensions} { - uninitialized_value_construct(); - } - explicit static_array(typename static_array::extensions_type const& extensions) // 3 - : static_array(extensions, allocator_type{}) {} - - template - explicit static_array(multi::basic_array const& other, allocator_type const& alloc) - : array_alloc{alloc} - , ref(static_array::allocate(other.num_elements()), extensions(other)) { - using std::copy; copy(other.begin(), other.end(), this->begin()); - } - template - explicit static_array(multi::basic_array const& other) // TODO(correaa) : call other constructor (above) - : array_alloc{}, ref(static_array::allocate(other.num_elements()) - , extensions(other)) { - using std::copy; copy(other.begin(), other.end(), this->begin()); - } - - template - explicit static_array(array_ref const& other) - : array_alloc{}, ref{static_array::allocate(other.num_elements()), extensions(other)} { - uninitialized_copy_(other.data_elements()); - } - - static_array(static_array const& other, allocator_type const& alloc) // 5b - : array_alloc{alloc} - , ref{static_array::allocate(other.num_elements()), extensions(other)} { - uninitialized_copy_(other.data_elements()); - } - - static_array(static_array const& other) // 5b - : array_alloc{other.get_allocator()} - , ref{static_array::allocate(other.num_elements(), other.data_elements()), {}} { - uninitialized_copy(other.data_elements()); - } - - static_array(static_array&& other) noexcept // it is private because it is a valid operation for derived classes //5b - : array_alloc{other.get_allocator()} - , ref{static_array::allocate(static_cast::size_type>(other.num_elements()), other.data_elements()), other.extensions()} { - uninitialized_move(other.data_elements()); - } -// template static auto distance(It a, It b) {using std::distance; return distance(a, b);} - - protected: - void deallocate() { // TODO(correaa) : move this to array_allocator - if(this->num_elements()) { - alloc_traits::deallocate(this->alloc(), this->base_, static_cast(this->num_elements())); - } - } - void clear() noexcept { - this->destroy(); - deallocate(); - layout_t<0>::operator=({}); - } - - public: - ~static_array() noexcept { - this->destroy(); - deallocate(); - } - using element_const_ptr = typename std::pointer_traits::template rebind; - - friend - #if not defined(__NVCC__) and not defined(__INTEL_COMPILER) - constexpr - #endif - auto get_allocator(static_array const& self) -> allocator_type {return self.get_allocator();} - - constexpr auto base() & -> typename static_array::element_ptr {return ref::base();} - constexpr auto base() const& -> typename static_array::element_const_ptr {return ref::base();} - friend constexpr auto base(static_array & self) -> typename static_array::element_ptr {return self.base();} - friend constexpr auto base(static_array const& self) -> typename static_array::element_const_ptr {return self.base();} - - constexpr auto origin() & -> typename static_array::element_ptr {return ref::origin();} - constexpr auto origin() const& -> typename static_array::element_const_ptr {return ref::origin();} - friend constexpr auto origin(static_array & self) -> typename static_array::element_ptr {return self.origin();} - friend constexpr auto origin(static_array const& self) -> typename static_array::element_const_ptr {return self.origin();} - - constexpr explicit operator typename std::iterator_traits::reference() const& { - return *(this->base_); - } - constexpr explicit operator typename std::add_rvalue_reference::reference>::type()&& { - return *(this->base_); - } - constexpr explicit operator typename std::iterator_traits::reference()& { - return *(this->base_); - } - - constexpr auto rotated() const& { - typename static_array::layout_t new_layout = *this; - new_layout.rotate(); - return basic_array{new_layout, this->base_}; - } - - constexpr auto rotated() & { - typename static_array::layout_t new_layout = *this; - new_layout.rotate(); - return basic_array{new_layout, this->base_}; - } - - constexpr auto rotated() && { - typename static_array::layout_t new_layout = *this; - new_layout.rotate(); - return basic_array{new_layout, this->base_}; - } - - friend constexpr auto rotated(static_array& self) -> decltype(auto) {return self.rotated();} - friend constexpr auto rotated(static_array const& self) -> decltype(auto) {return self.rotated();} - - private: - constexpr auto unrotated_aux() { - typename static_array::layout_t new_layout = *this; - new_layout.unrotate(); - return basic_array{new_layout, this->base_}; - } - - public: - constexpr auto unrotated() & {return unrotated_aux();} - constexpr auto unrotated() const& {return unrotated_aux().as_const();} - - friend constexpr auto unrotated(static_array & self) -> decltype(auto) {return self.unrotated();} - friend constexpr auto unrotated(static_array const& self) -> decltype(auto) {return self.unrotated();} - -// constexpr auto operator<<(dimensionality_type d) -> decltype(auto) {return rotated(d);} -// constexpr auto operator>>(dimensionality_type d) -> decltype(auto) {return unrotated(d);} - -// constexpr auto operator<<(dimensionality_type d) const -> decltype(auto) {return rotated(d);} -// constexpr auto operator>>(dimensionality_type d) const -> decltype(auto) {return unrotated(d);} - - constexpr auto operator=(static_array const& other) -> static_array& { - assert( extensions(other) == static_array::extensions() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert - if(this == &other) {return *this;} // lints (cert-oop54-cpp) : handle self-assignment properly - adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); - return *this; - } - - private: - constexpr auto equal_extensions_if(std::true_type /*true */, static_array const& other ) {return this->extensions() == extensions(other);} - constexpr auto equal_extensions_if(std::false_type /*false*/, static_array const&/*other*/) {return true;} - - public: - constexpr auto operator=(static_array&& other) noexcept -> static_array& { - assert( equal_extensions_if(std::integral_constant{}, other) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : allow a constexpr-friendly assert - adl_move(other.data_elements(), other.data_elements() + other.num_elements(), this->data_elements()); // there is no std::move_n algorithm - return *this; - } - - template{}> > - auto operator=(static_array const& other)& -> static_array& { - assert( extensions(other) == static_array::extensions() ); - adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); - return *this; - } - - constexpr explicit operator basic_array()& { - return this->template static_array_cast(); - // return static_array_cast(*this); - } - - template - void serialize(Archive& arxiv, const unsigned int version) { - ref::serialize(arxiv, version); - } -}; - -template -struct array : static_array{ - using static_ = static_array; - using static_::static_; - - auto reextent(typename array::extensions_type const& /*empty_extensions*/) -> array& { - return *this; - } - - // NOLINTNEXTLINE(runtime/operator) - auto operator&() && -> array * = delete; // NOLINT(google-runtime-operator) : delete operator&& defined in base class to avoid taking address of temporary - // auto operator&() & -> array *{return this;} - // auto operator&() const& -> array const*{return this;} -}; - -template -struct array : static_array { - using static_ = static_array; - static_assert( - std::is_same{} - or std::is_same{}, "!" - ); - - public: - // NOLINTNEXTLINE(runtime/operator) - auto operator&() && -> array * = delete; // NOLINT(google-runtime-operator) : delete operator&& defined in base class to avoid taking address of temporary - // NOLINTNEXTLINE(runtime/operator) - auto operator&() & -> array * {return this;} // NOLINT(google-runtime-operator) : delete operator&& defined in base class to avoid taking address of temporary - // NOLINTNEXTLINE(runtime/operator) - auto operator&() const& -> array const* {return this;} // NOLINT(google-runtime-operator) : delete operator&& defined in base class to avoid taking address of temporary - - friend auto sizes(array const& self) -> typename array::sizes_type {return self.sizes();} - - template - void serialize(Archive& arxiv, const unsigned int version) { - using AT = multi::archive_traits; - auto extensions_ = this->extensions(); - arxiv & AT::make_nvp("extensions", extensions_); - // arxiv & boost::serialization::make_nvp("extensions", extensions ); - // arxiv & cereal ::make_nvp("extensions", extensions ); - // arxiv & BOOST_SERIALIZATION_NVP( extensions ); - // arxiv & CEREAL_NVP( extensions ); - // arxiv & extensions ; - if(this->extensions() != extensions_) {clear(); this->reextent(extensions_);} - static_::serialize(arxiv, version); - } - - using static_::static_; - using typename static_::value_type; - - // cppcheck-suppress noExplicitConstructor ; to allow assignment-like construction of nested arrays - array(std::initializer_list::value_type> ilv) - : static_{static_array(ilv.begin(), ilv.end())} {} // construct all with default constructor and copy to special memory at the end - - array() = default; - array(array const&) = default; - - auto reshape(typename array::extensions_type extensions) & -> array& { - typename array::layout_t new_layout{extensions}; // TODO(correaa) implement move-reextent in terms of reshape - assert( new_layout.num_elements() == this->num_elements() ); - this->layout_mutable() = new_layout; - return *this; - } - - auto clear() noexcept -> array& { - static_::clear(); - return *this; - } - friend auto clear(array& self) noexcept -> array& {return self.clear();} - - friend auto data_elements(array const& self) {return self.data_elements();} - friend auto data_elements(array & self) {return self.data_elements();} - friend auto data_elements(array && self) {return std::move(self).data_elements();} - - auto move() & -> basic_array> { - basic_array> - ret = multi::static_array_cast>(*this); - layout_t::operator=({}); - return ret; - } - friend auto move(array& self) -> basic_array >{ - return self.move(); - } - - array(array&& other, typename array::allocator_type const& alloc) noexcept : static_{std::move(other), alloc} {} - array(array&& other) noexcept : array{std::move(other), other.get_allocator()} {} - - friend - #if not defined(__NVCC__) and not defined(__NVCOMPILER) and not defined(__INTEL_COMPILER) - constexpr - #endif - auto get_allocator(array const& self) -> typename array::allocator_type {return self.get_allocator();} - - void swap(array& other) noexcept { - using std::swap; - if constexpr(allocator_traits::propagate_on_container_swap::value) { - swap(this->alloc(), other.alloc()); - } - swap(this->base_, other.base_); - swap( - this->layout_mutable(), - other.layout_mutable() - ); - } - -#ifndef NOEXCEPT_ASSIGNMENT - auto operator=(array&& other) noexcept -> array& { - clear(); - this->base_ = other.base_; - if constexpr(allocator_traits::propagate_on_container_move_assignment::value) { - this->alloc() = std::move(other.alloc()); - } - this->layout_mutable() = std::exchange(other.layout_mutable(), {}); - return *this; - } - - auto operator=(array const& other) -> array& { - if(array::extensions() == other.extensions()) { - if(this == &other) {return *this;} // required by cert-oop54-cpp - if constexpr(allocator_traits::propagate_on_container_copy_assignment::value) { - this->alloc() = other.alloc(); - } - static_::operator=(other); - } else { - clear(); - if constexpr(allocator_traits::propagate_on_container_copy_assignment::value) { - this->alloc() = other.alloc(); - } - this->layout_mutable() = other.layout(); - array::allocate(); - array::uninitialized_copy_elements(other.data_elements()); - } - return *this; - } -#else - auto operator=(array o) noexcept -> array& {return swap(o), *this;} -#endif - - template< - class Range, - class = decltype(std::declval().operator=(std::declval())), - std::enable_if_t>{}, int> = 0 - > - auto operator=(Range&& other) ->array& { // TODO(correaa) : check that LHS is not read-only? - if(array::extensions() == other.extensions()) { - static_::operator=(other); - } else if(this->num_elements() == other.extensions().num_elements()) { - reshape(other.extensions()); - static_::operator=(other); - } else { - operator=(static_cast(std::forward(other))); - } - return *this; - } - - template - auto operator=(multi::basic_array const& other) -> array& { - if(array::extensions() == other.extensions()) { - static_::operator=(other); // TODO(correaa) : protect for self assigment - } else { - operator=(array{other}); - } - return *this; - } - - friend void swap(array& self, array& other) {self.swap(other);} - - void assign(typename array::extensions_type extensions, typename array::element const& elem) { - if(array::extensions() == extensions) { - adl_fill_n(this->base_, this->num_elements(), elem); - } else { - this->clear(); - (*this).array::layout_t::operator=(layout_t{extensions}); - this->base_ = this->static_::array_alloc::allocate(this->num_elements(), nullptr); - adl_alloc_uninitialized_fill_n(this->alloc(), this->base_, this->num_elements(), elem); // recursive_uninitialized_fill(alloc(), begin(), end(), e); - } - } - - template - auto assign(It first, It last) -> array& { - using std::next; using std::all_of; - if(adl_distance(first, last) == array::size()) { // and multi::extensions(*first) == multi::extensions(*array::begin())){ - static_::ref::assign(first); - } else { - this->operator=(array(first, last)); - } - return *this; - } - void assign(std::initializer_list values) {assign(values.begin(), values.end());} - - template auto assign(Range&& other) & - ->decltype(assign(adl_begin(other), adl_end(other))) { // TODO(correaa) use forward - return assign(adl_begin(other), adl_end(other)); } - - auto operator=(std::initializer_list values) -> array& { - assign(values.begin(), values.end()); - return *this; - } - - template - [[deprecated]] auto reextent(std::tuple const& other) -> array& { - return reextent( - std::apply([](auto const&... extensions) {return typename array::extensions_type(extensions...);}, other) - ); // paren is important here ext_type(...) for allow narrowing casts ^^^ - } - - auto reextent(typename array::extensions_type const& extensions) && -> array& { - if(extensions == this->extensions()) {return *this;} - this->destroy(); - this->deallocate(); - this->layout_mutable() = typename array::layout_t{extensions}; - this->base_ = this->static_::array_alloc::allocate( - static_cast::size_type>( - typename array::layout_t{extensions}.num_elements() - ), - this->data_elements() // used as hint - ); - if constexpr(not std::is_trivially_default_constructible{}) { // TODO(correaa) convert into constexpr if - adl_alloc_uninitialized_value_construct_n(this->alloc(), this->base_, this->num_elements()); - } - - return *this; - } - - auto reextent(typename array::extensions_type const& extensions) & -> array& { - if(extensions == this->extensions()) {return *this;} -#if 0 - array tmp(x, this->get_allocator()); // TODO(correaa) opportunity missed to use hint allocation - auto const is = intersection(this->extensions(), x); - tmp.apply(is) = this->apply(is); - swap(tmp); -#else - auto&& tmp = typename array::ref{ - this->static_::array_alloc::allocate( - static_cast::size_type>( - typename array::layout_t{extensions}.num_elements() - ), - this->data_elements() // used as hint - ), - extensions - }; - if constexpr(not std::is_trivially_default_constructible{}) { // TODO(correaa) convert into constexpr if - adl_alloc_uninitialized_value_construct_n(this->alloc(), tmp.data_elements(), tmp.num_elements()); - } - auto const is = intersection(this->extensions(), extensions); - tmp.apply(is) = this->apply(is); // TODO(correaa) : use (and implement) `.move();` - this->destroy(); - this->deallocate(); - this->base_ = tmp.base(); - this->layout_mutable() = tmp.layout(); -#endif - return *this; - } - - auto reextent(typename array::extensions_type const& extensions, typename array::element const& elem) && -> array& { - if(extensions == this->extensions()) {return *this;} - this->destroy(); - this->deallocate(); - this->layout_mutable() = typename array::layout_t{extensions}; - this->base_ = this->static_::array_alloc::allocate( - static_cast::size_type>( - typename array::layout_t{extensions}.num_elements() - ), - this->data_elements() // used as hint - ); - this->uninitialized_fill_n(this->base_, static_cast::size_type>(this->num_elements()), elem); - - return *this; - } - - auto reextent(typename array::extensions_type const& exs, typename array::element const& elem) & -> array& { - if(exs == this->extensions()) { - return *this; - } -#if 0 - array tmp(x, e, this->get_allocator()); // TODO(correaa) opportunity missed to use hint allocation - auto const is = intersection(this->extensions(), x); - tmp.apply(is) = this->apply(is); - swap(tmp); -#else // implementation with hint - auto&& tmp = typename array::ref{this->static_::array_alloc::allocate( - static_cast::size_type>(typename array::layout_t{exs}.num_elements()), - this->data_elements() // use as hint - ), exs}; - this->uninitialized_fill_n(tmp.data_elements(), static_cast::size_type>(tmp.num_elements()), elem); - auto const is = intersection(this->extensions(), exs); - tmp.apply(is) = this->apply(is); - this->destroy(); - this->deallocate(); - this->base_ = tmp.base(); // TODO(correaa) : use (and implement) `.move();` - this->layout_mutable() = tmp.layout(); - // (*this).array::layout_t::operator=(tmp.layout()); -#endif - return *this; - } - template constexpr auto reindex(Indices... idxs)&& -> array&& {this->layout_mutable().reindex(idxs...); return std::move(*this);} - template constexpr auto reindex(Indices... idxs) & -> array & {this->layout_mutable().reindex(idxs...); return *this ;} - - ~array() = default; -}; - -#if defined(__cpp_deduction_guides) - -#define IL std::initializer_list // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing TODO(correaa) remove - -template static_array(IL ) -> static_array; -template static_array(IL> ) -> static_array; -template static_array(IL>> ) -> static_array; -template static_array(IL>>> ) -> static_array; -template static_array(IL>>>>) -> static_array; - -template array(IL ) -> array; -template array(IL> ) -> array; -template array(IL>> ) -> array; -template array(IL>>> ) -> array; -template array(IL>>>>) -> array; - -#undef IL - -template array(T[] ) -> array; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) - -// vvv these are necessary to catch {n, m, ...} notation (or single integer notation) -template>> array(iextensions<0>, T) -> array; -template>> array(iextensions<1>, T) -> array; -template>> array(iextensions<2>, T) -> array; -template>> array(iextensions<3>, T) -> array; -template>> array(iextensions<4>, T) -> array; -template>> array(iextensions<5>, T) -> array; - -// generalization, will not work with naked {n, m, ...} notation (or single integer notation) -template> > -array(iextensions, T) -> array; - -template -array(MatrixRef)->array; - -template array(basic_array)->array; - -#endif // ends defined(__cpp_deduction_guides) - -template -auto decay(const T(&arr)[N]) noexcept -> multi::array::type, std::rank_v> { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility - return multi::array_cref::type, std::rank_v>(data_elements(arr), extensions(arr)); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility -} - -template -struct array_traits { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility - using reference = T&; - using element = std::remove_all_extents_t; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : for backwards compatibility - using decay_type = multi::array; -}; - -} // end namespace boost::multi - -#if defined(__cpp_lib_memory_resource) && (__cpp_lib_memory_resource >= 201603) -namespace boost::multi::pmr { - -template -using array = boost::multi::array>; - -} // end namespace boost::multi::pmr -#endif - -namespace boost::serialization { - -template -struct version< boost::multi::array > { - using type = std::integral_constant; // typedef mpl::int_<1> type; - enum { value = type::value }; -}; - -} // end namespace boost::serialization - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/array_ref.hpp b/external_codes/boost_multi/multi/include/multi/array_ref.hpp deleted file mode 100644 index eb87e25ee8..0000000000 --- a/external_codes/boost_multi/multi/include/multi/array_ref.hpp +++ /dev/null @@ -1,2746 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#ifndef MULTI_ARRAY_REF_HPP -#define MULTI_ARRAY_REF_HPP - -#include "./memory/pointer_traits.hpp" -#include "utility.hpp" - -#include "./config/ASSERT.hpp" -#include "./config/DELETE.hpp" -#include "./config/MARK.hpp" - -#include "./detail/adl.hpp" -#include "./detail/layout.hpp" -#include "./detail/memory.hpp" // for pointer_traits -#include "./detail/operators.hpp" // for random_iterable -#include "./detail/serialization.hpp" -#include "./detail/types.hpp" // for dimensionality_type - -#if defined(__NVCC__) -#define HD __host__ __device__ -#else -#define HD -#endif - -#include // fpr copy_n -#include // for memset in reinterpret_cast -#include // for invoke -#include // for next -#include // for pointer_traits -#include // for forward - -namespace std { - -template -struct pointer_traits> : std::pointer_traits { - template using rebind = - std::conditional_t< - std::is_const::value, - U*, - std::pointer_traits> - >; -}; - -} // end namespace std - -namespace boost::multi { - -template> -struct basic_array; - -} // end namespace boost::multi - -namespace boost::multi { - -template -constexpr auto home(Array&& array) -->decltype(std::forward(array).home()) { - return std::forward(array).home(); } - -template auto modify(T const& value) -> T& {return const_cast(value);} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) see what is this used for - -template> struct array; - -template> -struct array_types : private Layout { // cppcheck-suppress syntaxError ; false positive in cppcheck - using element = T; - using element_type = element; // this follows more closely https://en.cppreference.com/w/cpp/memory/pointer_traits - - using element_ptr = ElementPtr; - using element_const_ptr = typename std::pointer_traits::template rebind; - using element_move_ptr = multi::move_ptr; - - using element_ref = typename std::iterator_traits::reference; - - using layout_t = Layout; - - using rank = typename layout_t::rank ; - - using layout_t::rank_v; - using layout_t::dimensionality; - - using typename layout_t::stride_type; - using layout_t::stride ; - - using layout_t::num_elements; - using layout_t::offset; - - using layout_t::offsets; - - using typename layout_t::index; - using typename layout_t::index_range; - using typename layout_t::index_extension; - - using typename layout_t::strides_type; - using layout_t::strides ; - - using typename layout_t::difference_type; - - using typename layout_t::size_type; - using layout_t::size ; - - using layout_t::nelems; - - using typename layout_t::extension_type; - using layout_t::extension; - - using typename layout_t::extensions_type; - using layout_t::extensions; - - constexpr auto extensions() const -> extensions_type {return static_cast(*this).extensions();} - - using layout_t::is_empty; - using layout_t:: empty; - - using layout_t::sub; - - using typename layout_t::sizes_type; - using layout_t::sizes; - - using layout_t::is_compact; - - friend constexpr auto size (array_types const& self) noexcept -> size_type {return self.size ();} - friend constexpr auto extension (array_types const& self) noexcept -> extension_type {return self.extension ();} - friend constexpr auto is_empty (array_types const& self) noexcept -> bool {return self.is_empty ();} - friend constexpr auto num_elements(array_types const& self) noexcept -> size_type {return self.num_elements();} - - friend constexpr auto extensions (array_types const& self) noexcept -> extensions_type {return self.extensions ();} - friend constexpr auto sizes (array_types const& self) noexcept -> sizes_type {return self.sizes ();} - - // TODO(correaa) [[deprecated("use member syntax for non-salient properties")]] - friend - constexpr auto stride (array_types const& self) noexcept -> stride_type {return self.stride ();} - - // TODO(correaa) [[deprecated("use member syntax for non-salient properties")]] - friend - constexpr auto strides (array_types const& self) noexcept -> strides_type {return self.strides ();} - - protected: - constexpr auto layout_mutable() -> layout_t& {return static_cast(*this);} - - public: - using value_type = typename std::conditional< - (D > 1), - array::default_allocator_type>, - element - >::type; - - using reference = typename std::conditional< - (D > 1), - basic_array, - typename std::iterator_traits::reference - >::type; - - using const_reference = typename std::conditional< - (D > 1), - basic_array, - typename std::iterator_traits::reference - >::type; - - HD constexpr auto base() const -> element_ptr {return base_;} - HD constexpr auto cbase() const -> element_const_ptr {return base_;} - HD constexpr auto mbase() const& -> element_ptr& {return base_;} - - friend /*constexpr*/ auto base(array_types const& self) -> element_ptr {return self.base();} - - HD constexpr auto layout() const -> layout_t const& {return *this;} - friend constexpr auto layout(array_types const& self) -> layout_t const& {return self.layout();} - - constexpr auto origin() const& -> decltype(auto) {return base_ + Layout::origin();} - friend constexpr auto origin(array_types const& self) -> decltype(auto) {return self.origin();} - - protected: - using derived = basic_array; - element_ptr base_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes,misc-non-private-member-variables-in-classes) : TODO(correaa) try to make it private, [static_]array needs mutation - HD constexpr explicit array_types(std::nullptr_t nil) : Layout{}, base_{nil} {} - - public: - array_types() = default; - - HD constexpr array_types(layout_t const& lyt, element_ptr const& data) - : Layout{lyt}, base_{data} {} - - protected: // TODO(correaa) : find why this needs to be public and not protected or friend - template>{}> - , decltype(multi::explicit_cast(std::declval().base_))* = nullptr - > - HD constexpr explicit array_types(ArrayTypes const& other) : Layout{other.layout()}, base_{other.base_} {} - - template< - class ArrayTypes, - typename = std::enable_if_t>{}>, - decltype(multi::implicit_cast(std::declval().base_))* = nullptr - > - // cppcheck-suppress noExplicitConstructor ; because underlying pointers are implicitly convertible - HD constexpr /*implt*/ array_types(ArrayTypes const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : inherit behavior of underlying pointer - : Layout{other.layout()}, base_{other.base_} {} - // ^^^ TODO(correaa) : call explicit from implicit, careful with infinite recursion - - template< - typename ElementPtr2, - typename = decltype(Layout{std::declval const&>().layout()}), - typename = decltype(element_ptr{std::declval const&>().base_}) - > - HD constexpr explicit array_types(array_types const& other) - : Layout{other.layout()}, base_{other.base_} {} - - template friend struct array_types; -}; - -template -struct basic_array_ptr // NOLINT(fuchsia-multiple-inheritance) : to allow mixin CRTP -: private Ref // TODO(correaa) : remove inheritance from Ref?? -, boost::multi::iterator_facade< - basic_array_ptr, void, std::random_access_iterator_tag, - Ref const&, typename Layout::difference_type -> { //, boost::multi::totally_ordered2, void> - ~basic_array_ptr() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - - HD constexpr auto operator=(basic_array_ptr&& other) noexcept // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) - -> basic_array_ptr& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - this->base_ = other.base_; - // static_cast(*this) - this->layout_mutable() = other.layout(); - return *this; - } - - using pointer = Ref const*; - using element_type = typename Ref::decay_type; - using difference_type = typename Layout::difference_type; - - using value_type = element_type; - using reference = Ref; - using iterator_category = std::random_access_iterator_tag; - - HD constexpr explicit basic_array_ptr(std::nullptr_t nil) : Ref{nil} {} - HD constexpr basic_array_ptr() : basic_array_ptr{nullptr} {} - - template friend struct basic_array_ptr; - - HD constexpr basic_array_ptr(typename Ref::element_ptr base, layout_t lyt) : Ref{lyt, base} {} - HD constexpr basic_array_ptr(typename Ref::element_ptr base, index_extensions exts) : Ref{base, exts} {} - template - // cppcheck-suppress noExplicitConstructor ; no information loss, allows comparisons - HD constexpr basic_array_ptr(Array* other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - : basic_array_ptr{other->data_elements(), other->layout()} {} - - basic_array_ptr(basic_array_ptr &&) noexcept = default; - basic_array_ptr(basic_array_ptr const& ) = default; - - HD constexpr auto operator=(basic_array_ptr const& other) noexcept -> basic_array_ptr& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - this->base_ = other.base_; - // static_cast(*this) - this->layout_mutable() = other.layout(); - return *this; - } - HD constexpr explicit operator bool() const {return this->base_;} - - HD constexpr auto dereference() const -> Ref {return Ref{this->layout(), this->base_};} - - HD constexpr auto operator* () const -> Ref{return Ref{*this};} - - HD constexpr auto operator->() const -> Ref* {return const_cast(this);} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) find a better way without const_cast - HD constexpr auto operator->() -> Ref* {return this;} - - HD constexpr auto operator[](difference_type n) const -> Ref {return *(*this + n);} - - HD constexpr auto operator<(basic_array_ptr const& other) const -> bool {return distance_to(other) > 0;} - - HD constexpr basic_array_ptr(typename Ref::element_ptr base, Layout const& lyt) : Ref{lyt, base} {} - - template - friend struct basic_array; - - HD constexpr auto base() const -> typename Ref::element_ptr {return this->base_;} - - friend HD constexpr auto base(basic_array_ptr const& self) {return self.base();} - - using Ref::base_; - using Ref::layout; - - constexpr auto operator==(basic_array_ptr const& other) const -> bool { - auto b1 = this->base_; - auto b2 = other.base_; - bool eq = (b1 == b2); - return eq and this->layout() == other.layout(); - } - - template - friend HD constexpr auto operator==(Array* other, basic_array_ptr const& self) -> bool { - return other->base() == self.base_ and other->layout() == self.layout(); - } - -// friend HD /*constexpr*/ auto operator==(basic_array_ptr const& self, basic_array_ptr const& other) -> bool { -// auto b1 = self.base_; -// auto b2 = other.base_; -// bool eq = (b1 == b2); -// return eq and self.layout() == other.layout(); -// } - - template >{}, int> =0> friend HD constexpr auto operator==(basic_array_ptr const& self, basic_array_ptr const& other) -> bool {return self.base() == other->base() and self.layout() == other->layout();} - template >{}, int> =0> friend HD constexpr auto operator!=(basic_array_ptr const& self, basic_array_ptr const& other) -> bool {return self.base() == other->base() and self.layout() == other->layout();} - - protected: - HD constexpr void increment() {base_ += Ref::nelems();} - HD constexpr void decrement() {base_ -= Ref::nelems();} - - HD constexpr void advance(difference_type n) {base_ += Ref::nelems()*n;} - HD constexpr auto distance_to(basic_array_ptr const& other) const -> difference_type { - assert( Ref::nelems() == other.Ref::nelems() and Ref::nelems() != 0 ); - assert( (other.base_ - base_)%Ref::nelems() == 0); - assert( layout() == other.layout() ); - return (other.base_ - base_)/Ref::nelems(); - } - - public: - HD constexpr auto operator+=(difference_type n) -> basic_array_ptr& {advance(n); return *this;} -}; - -template -struct array_iterator; - -template -struct array_iterator // NOLINT(fuchsia-multiple-inheritance) -: boost::multi::iterator_facade< - array_iterator, void, std::random_access_iterator_tag, - basic_array const&, typename layout_t::difference_type -> -, multi::decrementable> -, multi::incrementable> -, multi::affine, multi::difference_type> -, multi::totally_ordered2, void> { - ~array_iterator() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - - constexpr auto operator=(array_iterator&&) // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - noexcept // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) - -> array_iterator& = default; - - array_iterator(array_iterator&&) noexcept // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) - = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - - using difference_type = typename layout_t::difference_type; - using element = Element; - using element_ptr = ElementPtr; - using value_type = typename basic_array::decay_type; - - using pointer = basic_array*; - using reference = basic_array; - - using iterator_category = std::random_access_iterator_tag; - - constexpr static dimensionality_type rank_v = D; - using rank = std::integral_constant; - - using ptr_type = basic_array_ptr, layout_t>; - using stride_type = index; - using layout_type = typename reference::layout_type; - - HD constexpr explicit array_iterator(std::nullptr_t nil) : ptr_{nil} {} //, stride_{1} - HD constexpr array_iterator() : array_iterator{nullptr} {} - - template friend struct array_iterator; - - template< - class EElement, typename PPtr, - decltype(multi::explicit_cast(std::declval>().ptr_.base()))* = nullptr - > - HD constexpr explicit array_iterator(array_iterator const& other) - : ptr_{element_ptr{other.ptr_.base_}, other.ptr_.layout()}, stride_{other.stride_} {} - - template(std::declval>().ptr_.base()))* = nullptr - > - // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible - HD constexpr/*mplct*/ array_iterator(array_iterator const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : propagate implicitness of pointer - : ptr_{element_ptr{other.ptr_.base_}, other.ptr_.layout()}, stride_{other.stride_} {} - - array_iterator(array_iterator const&) = default; - auto operator=(array_iterator const&) -> array_iterator& = default; - - HD constexpr explicit operator bool() const {return static_cast(ptr_.base_);} - HD constexpr auto operator*() const -> basic_array {/*assert(*this);*/ return {*ptr_};} - - constexpr auto operator->() const -> decltype(auto) {/*assert(*this);*/ return ptr_;} - - HD constexpr auto operator+ (difference_type n) const -> array_iterator {array_iterator ret{*this}; ret += n; return ret;} - HD constexpr auto operator[](difference_type n) const -> basic_array {return *((*this) + n);} - - constexpr auto operator==(array_iterator const& other) const -> bool {return ptr_ == other.ptr_ and stride_== other.stride_ and ptr_.layout() == other.ptr_.layout();} - /*[[gnu::pure]]*/ constexpr auto operator< (array_iterator const& other) const -> bool {return distance_to(other) > 0;} - - HD constexpr explicit array_iterator(typename basic_array::element_ptr base, layout_t lyt, index stride) - : ptr_{base, lyt}, stride_{stride} {} - - template friend struct basic_array; - - template - HD constexpr auto operator()(index idx, As... args) const -> decltype(auto) {return this->operator[](idx)(args...); } - HD constexpr auto operator()(index idx) const -> decltype(auto) {return this->operator[](idx) ; } - - private: - template - static HD constexpr auto apply_impl(Self&& self, Tuple const& tuple, std::index_sequence/*012*/) -> decltype(auto) { - return std::forward(self)(std::get(tuple)...); - } - - public: - template HD constexpr auto apply(Tuple const& t) const& -> decltype(auto) {return apply_impl( *this , t, std::make_index_sequence::value>());} // NOLINT(readability-identifier-length) std naming - template HD constexpr auto apply(Tuple const& t) && -> decltype(auto) {return apply_impl(std::move(*this), t, std::make_index_sequence::value>());} // NOLINT(readability-identifier-length) std naming - template HD constexpr auto apply(Tuple const& t) & -> decltype(auto) {return apply_impl( *this , t, std::make_index_sequence::value>());} // NOLINT(readability-identifier-length) std naming - - private: - ptr_type ptr_; - stride_type stride_ = {1}; // nice non-zero default // TODO(correaa) use INT_MAX? - - constexpr auto equal(array_iterator const& other) const -> bool {return ptr_ == other.ptr_ and stride_ == other.stride_;} - constexpr void decrement() {ptr_.base_ -= stride_;} - constexpr void advance(difference_type n) {ptr_.base_ += stride_*n;} - constexpr auto distance_to(array_iterator const& other) const -> difference_type { - assert( stride_ == other.stride_); assert( stride_ != 0 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function - return (other.ptr_.base_ - ptr_.base_)/stride_; - } - - public: - HD constexpr auto base() const& -> element_ptr {return ptr_.base_;} - friend /*constexpr*/ auto base(array_iterator const& self) -> element_ptr {return self.base();} - - HD constexpr auto stride() const& -> stride_type {return stride_;} - friend constexpr auto stride(array_iterator const& self) -> stride_type {return self.stride_;} - - constexpr auto operator++() -> array_iterator& {ptr_.base_ += stride_; return *this;} - constexpr auto operator--() -> array_iterator& {decrement(); return *this;} - - friend constexpr auto operator-(array_iterator const& self, array_iterator const& other) -> difference_type { - assert(self.stride_ == other.stride_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function - assert(self.stride_ != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) normal in a constexpr function - return (self.ptr_.base_ - other.ptr_.base_)/self.stride_; - } - - constexpr auto operator+=(difference_type n) -> array_iterator& {advance(+n); return *this;} - constexpr auto operator-=(difference_type n) -> array_iterator& {advance(-n); return *this;} -}; - -template -struct cursor_t { - using difference_type = typename std::iterator_traits::difference_type; - using strides_type = StridesType; - using element_ptr = ElementPtr; - using element_ref = typename std::iterator_traits::reference; - using pointer = element_ptr; - using reference = element_ref; - using indices_type = typename extensions_t::indices_type; - - private: - strides_type strides_; - element_ptr base_; - - template friend struct basic_array; - template friend struct cursor_t; - - constexpr cursor_t(element_ptr base, strides_type const& strides) : strides_{strides}, base_{base} {} - - public: - HD constexpr auto operator[](difference_type n) const -> decltype(auto) { - if constexpr(D != 1) { - return cursor_t>{base_ + std::get<0>(strides_)*n, strides_.tail()}; - } else { - return base_[std::get<0>(strides_)*n]; - } - } - constexpr auto operator()(difference_type n) const -> decltype(auto) { - return operator[](n); - } - template - constexpr auto operator()(difference_type n, Ns... rest) const -> decltype(auto) { - return operator[](n)(rest...); - } - private: - template - constexpr auto apply_impl(Tuple const& tup, std::index_sequence /*012*/) const -> decltype(auto) { - return ((std::get(tup)*std::get(strides_)) + ...); -} - public: - template - constexpr auto operator+=(Tuple const& tup) -> cursor_t& { - base_ += apply_impl(tup, std::make_index_sequence::value>{}); - return *this; - } - constexpr auto operator* () const -> reference {return *base_;} - constexpr auto operator->() const -> pointer {return base_;} -}; - -template -struct elements_iterator_t // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) -: boost::multi::random_accessable, typename std::iterator_traits::difference_type, typename std::iterator_traits::reference> -{ - using difference_type = typename std::iterator_traits::difference_type; - using value_type = typename std::iterator_traits::value_type; - using pointer = Pointer; - using reference = typename std::iterator_traits::reference; - using iterator_category = std::random_access_iterator_tag; - - using const_pointer = typename std::pointer_traits::template rebind; - - using layout_type = LayoutType; - - private: - pointer base_; - layout_type l_; - difference_type n_ = 0; - extensions_t xs_; - - using indices_type = typename extensions_t::indices_type; - indices_type ns_ = {}; - - template friend struct elements_iterator_t; - template friend struct elements_range_t; - - constexpr elements_iterator_t(pointer base, layout_type lyt, difference_type n) - : base_{base}, l_{lyt}, n_{n}, xs_{l_.extensions()}, ns_{lyt.is_empty()?indices_type{}:xs_.from_linear(n)} {} - - public: - constexpr auto base() -> pointer {return base_;} - constexpr auto base() const -> const_pointer {return base_;} - HD constexpr auto layout() const -> layout_type {return l_;} - - template(std::declval().base_))* = nullptr> - // cppcheck-suppress noExplicitConstructor - HD constexpr /*impl*/ elements_iterator_t(Other const& other) : elements_iterator_t{other.base_, other.l_, other.n_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - template - HD constexpr explicit elements_iterator_t(Other const& other) : elements_iterator_t{other.base_, other.l_, other.n_} {} - - elements_iterator_t(elements_iterator_t const&) = default; - - HD constexpr auto operator++() -> elements_iterator_t& { - std::apply( [&xs = this->xs_](auto&... idxs){return xs.next_canonical(idxs...);}, ns_ ); - ++n_; - return *this; - } - HD constexpr auto operator--() -> elements_iterator_t& { - std::apply( [&xs = this->xs_](auto&... idxs) {return xs.prev_canonical(idxs...); }, ns_ ); - --n_; - return *this; - } - - HD constexpr auto operator+=(difference_type n) -> elements_iterator_t& { - auto const nn = std::apply(xs_, ns_); - ns_ = xs_.from_linear(nn + n); - n_ += n; - return *this; - } - HD constexpr auto operator-=(difference_type n) -> elements_iterator_t& { - auto const nn = std::apply(xs_, ns_); - ns_ = xs_.from_linear(nn - n); - n_ -= n; - return *this; - } - - HD /*[[gnu::pure]]*/ constexpr auto operator-(elements_iterator_t const& other) const -> difference_type { - assert(base_ == other.base_ and l_ == other.l_); - return n_ - other.n_; - } - HD constexpr auto operator<(elements_iterator_t const& other) const -> difference_type { - assert(base_ == other.base_ and l_ == other.l_); - return n_ < other.n_; - } - HD constexpr auto operator+(difference_type n) const -> elements_iterator_t {auto ret{*this}; ret += n; return ret;} // explicitly necessary for nvcc/thrust - HD constexpr auto operator-(difference_type n) const -> elements_iterator_t {auto ret{*this}; ret -= n; return ret;} // explicitly necessary for nvcc/thrust - - constexpr auto current() const -> pointer {return base_ + std::apply(l_, ns_);} - HD constexpr auto operator->() const -> pointer {return base_ + std::apply(l_, ns_) ;} - HD constexpr auto operator*() const -> reference {return base_ [std::apply(l_, ns_)];} - HD constexpr auto operator[](difference_type const& n) const -> reference { - auto const nn = std::apply(xs_, ns_); - return base_[std::apply(l_, xs_.from_linear(nn + n))]; - } // explicit here is necessary for nvcc/thrust - - HD /*[[gnu::pure]]*/ constexpr auto operator==(elements_iterator_t const& other) const -> bool { - assert(base_ == other.base_ and l_ == other.l_); - return n_ == other.n_;// and base_ == other.base_ and l_ == other.l_; - } - HD /*[[gnu::pure]]*/ constexpr auto operator!=(elements_iterator_t const& other) const -> bool { - assert(base_ == other.base_ and l_ == other.l_); - return n_ != other.n_; - } -}; - -template -struct elements_range_t { - using pointer = Pointer; - using layout_type = LayoutType; - - using value_type = typename std::iterator_traits::value_type; - using const_pointer = typename std::pointer_traits::template rebind; - - using reference = typename std::iterator_traits< pointer>::reference; - using const_reference = typename std::iterator_traits::reference; - - using size_type = typename std::iterator_traits::difference_type; - using difference_type = typename std::iterator_traits::difference_type; - - using iterator = elements_iterator_t; - using const_iterator = elements_iterator_t; - - private: - pointer base_; - layout_type l_; - - public: - template(std::declval().base_))* = nullptr> - // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible - constexpr /*impl*/ elements_range_t(OtherRange const& other) : base_{other.base}, l_{other.l_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to reproduce the implicitness of the argument - template(std::declval().base_))* = nullptr> - constexpr explicit elements_range_t(OtherRange const& other) : elements_range_t{other} {} - - constexpr elements_range_t(pointer base, layout_type lyt) : base_{base}, l_{lyt} {} - - private: - constexpr auto at_aux(difference_type n) const -> reference { - assert( not is_empty() ); - return base_[std::apply(l_, l_.extensions().from_linear(n))]; - } - - public: - HD constexpr auto operator[](difference_type n) const& -> const_reference {return at_aux(n);} - HD constexpr auto operator[](difference_type n) && -> reference {return at_aux(n);} - HD constexpr auto operator[](difference_type n) & -> reference {return at_aux(n);} - - constexpr auto size() const -> size_type {return l_.num_elements();} - - [[nodiscard]] - constexpr auto empty() const -> bool {return l_. empty();} - constexpr auto is_empty() const -> bool {return l_.is_empty();} - - elements_range_t(elements_range_t const&) = delete; - elements_range_t(elements_range_t &&) = delete; - - template auto operator==(elements_range_t const& other) const -> bool { - if( is_empty() and other.is_empty()) {return true;} - return size() == other.size() and adl_equal(other.begin(), other.end(), begin()); - } - template auto operator!=(elements_range_t const& other) const -> bool { - if(is_empty() and other.is_empty()) {return false;} - return size() != other.size() or not adl_equal(other.begin(), other.end(), begin()); - } - - template void swap(elements_range_t& other) & {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} - template void swap(elements_range_t& other) && {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} - template void swap(elements_range_t&& other) & {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} - template void swap(elements_range_t&& other) && {assert(size() == other.size()); adl_swap_ranges(begin(), end(), other.begin());} - - ~elements_range_t() = default; - - private: - /*[[gnu::pure]]*/ constexpr auto begin_aux() const {return iterator{base_, l_, 0 };} - /*[[gnu::pure]]*/ constexpr auto end_aux () const {return iterator{base_, l_, l_.num_elements()};} - - public: - /*[[gnu::pure]]*/ constexpr auto begin() const& -> const_iterator {return begin_aux();} - /*[[gnu::pure]]*/ constexpr auto end () const& -> const_iterator {return end_aux ();} - - constexpr auto begin() && -> iterator {return begin_aux();} - constexpr auto end () && -> iterator {return end_aux() ;} - - constexpr auto begin() & -> iterator {return begin_aux();} - constexpr auto end () & -> iterator {return end_aux() ;} - - constexpr auto front() const& -> const_reference {return *begin();} - constexpr auto back () const& -> const_reference {return *std::prev(end());} - - constexpr auto front() && -> reference {return *begin();} - constexpr auto back () && -> reference {return *std::prev(end());} - - constexpr auto front() & -> reference {return *begin();} - constexpr auto back () & -> reference {return *std::prev(end());} - - auto operator=(elements_range_t const&) -> elements_range_t& = delete; - auto operator=(elements_range_t &&) -> elements_range_t& = delete; - - template().begin(), std::declval().end(), std::declval()))> - auto operator=(OtherElementRange&& other) & -> elements_range_t& {assert(size() == other.size()); - if(not is_empty()) {adl_copy(other.begin(), other.end(), begin());} - return *this; - } - - template().begin(), std::declval().end(), std::declval()))> - auto operator=(OtherElementRange&& other) && -> elements_range_t& {assert(size() == other.size()); - if(not is_empty()) {adl_copy(other.begin(), other.end(), begin());} - return *this; - } - - auto operator=(std::initializer_list values) && -> elements_range_t& {operator=(values); return *this;} - auto operator=(std::initializer_list values) & -> elements_range_t& { - assert(static_cast(values.size()) == size()); - adl_copy_n(values.begin(), values.size(), begin()); - return *this; - } - -#if 0 - template auto operator= (elements_range_t const& o) & -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} - template auto operator= (elements_range_t const& o) && -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} - - template auto operator= (elements_range_t && o) & -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} - template auto operator= (elements_range_t && o) && -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} - - template auto operator= (elements_range_t & o) & -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} - template auto operator= (elements_range_t & o) && -> elements_range_t& {assert(size() == o.size()); if(not is_empty()) {adl_copy(o.begin(), o.end(), begin());}; return *this;} -#endif -}; - -template -[[gnu::pure]] constexpr auto ref(It begin, It end) -->multi::basic_array { - return multi::basic_array{begin, end}; -} - -template -struct basic_array -// : multi::partially_ordered2, void> -: array_types { - using types = array_types; - using ref_ = basic_array; - - using array_types::rank_v; - - friend struct basic_array; - friend struct basic_array; - - using types::layout; - using typename types::element_type; - - using layout_type = Layout; - - HD constexpr auto layout() const -> layout_type {return array_types::layout();} - - using basic_const_array = basic_array::template rebind, Layout>; - - basic_array() = default; - - HD constexpr basic_array(layout_type const& layout, ElementPtr const& base) - : array_types{layout, base} {} - - auto operator=(basic_array&& other) noexcept(false) -> basic_array& {operator=(other); return *this;} - - protected: - using types::types; - - template friend struct static_array; - basic_array(basic_array const&) = default; // NOTE: reference type cannot be copied. perhaps you want to return by std::move or std::forward if you got the object from a universal reference argument - - template friend struct basic_array_ptr; - - public: - using element = typename types::element; - using element_ptr = typename types::element_ptr; - using element_const_ptr = typename types::element_const_ptr; - using element_move_ptr = multi::move_ptr; - using element_ref = typename types::element_ref; - using element_cref = typename std::iterator_traits::reference; - - using elements_iterator = elements_iterator_t; - using celements_iterator = elements_iterator_t; - - using elements_range = elements_range_t; - using const_elements_range = elements_range_t; - - private: - constexpr auto elements_aux() const {return elements_range{this->base(), this->layout()};} - - public: -// #if defined(__NVCC__) - basic_array(basic_array&&) noexcept = default; // lints(readability-redundant-access-specifiers) -// #else -// basic_array(basic_array&&) noexcept = delete; // lints(readability-redundant-access-specifiers) -// #endif - - constexpr auto elements() & -> elements_range {return elements_aux();} - constexpr auto elements() && -> elements_range {return elements_aux();} - constexpr auto elements() const& -> const_elements_range {return const_elements_range{this->base(), this->layout()};} // TODO(correaa) simplify - constexpr auto const_elements() const -> const_elements_range {return elements_aux();} - - constexpr auto hull() const -> std::pair { - return {this->base(), std::abs(this->hull_size())}; - } - - ~basic_array() = default; // this lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - - // in C++ < 17 this is necessary to return references from functions - friend constexpr auto sizes(basic_array const& self) noexcept -> typename basic_array::sizes_type {return self.sizes();} // needed by nvcc - friend constexpr auto size (basic_array const& self) noexcept -> typename basic_array::size_type {return self.size ();} // needed by nvcc - - template friend constexpr auto reinterpret_array_cast(basic_array && self) {return std::move(self).template reinterpret_array_cast::template rebind>();} - template friend constexpr auto reinterpret_array_cast(basic_array const& self) {return self .template reinterpret_array_cast::template rebind>();} - - friend constexpr auto dimensionality(basic_array const& /*self*/) {return D;} - - using typename types::reference; - - using default_allocator_type = typename multi::pointer_traits::default_allocator_type; - - constexpr auto get_allocator() const -> default_allocator_type { - using multi::get_allocator; - return get_allocator(this->base()); - } - - friend - #if not defined(__NVCC__) and not defined(__NVCOMPILER) and not defined(__INTEL_COMPILER) - constexpr - #endif - auto get_allocator(basic_array const& self) -> default_allocator_type {return self.get_allocator();} - - using decay_type = array::default_allocator_type>; - - friend constexpr auto decay(basic_array const& self) -> decay_type {return self.decay();} - constexpr auto decay() const& -> decay_type { - decay_type ret{std::move(modify(*this))}; - return ret; - } - - constexpr auto operator+() const -> decay_type {return decay();} - using typename types::const_reference; - - private: - HD constexpr auto at_aux(index idx) const -> reference { // MULTI_ACCESS_ASSERT(this->extension().contains(i)&&"out of bounds"); - return reference{ - this->layout().sub(), - this->base() + (idx*this->layout().stride() - this->layout().offset()) - }; // cppcheck-suppress syntaxError ; bug in cppcheck 2.5 - } - - public: - HD constexpr auto operator[](index idx) const& -> const_reference {return at_aux(idx);} - HD constexpr auto operator[](index idx) && -> reference {return at_aux(idx);} - HD constexpr auto operator[](index idx) & -> reference {return at_aux(idx);} - - template(D)>, typename = std::enable_if_t<(std::tuple_size::value > 1)> > - HD constexpr auto operator[](Tuple const& tup) const - ->decltype(operator[](std::get<0>(tup))[detail::tuple_tail(tup)]) { - return operator[](std::get<0>(tup))[detail::tuple_tail(tup)]; } - - template::value == 1)> > - HD constexpr auto operator[](Tuple const& tup) const - ->decltype(operator[](std::get<0>(tup))) { - return operator[](std::get<0>(tup)); } - - constexpr auto front() const& -> const_reference {return *begin();} - constexpr auto back() const& -> const_reference {return *std::prev(end());} - - constexpr auto front() && -> reference {return *begin();} - constexpr auto back() && -> reference {return *std::prev(end());} - - constexpr auto front() & -> reference {return *begin();} - constexpr auto back() & -> reference {return *std::prev(end());} - - using typename types::index; - - constexpr auto reindexed(index first) const& -> basic_const_array { - typename types::layout_t new_layout = this->layout(); - new_layout.reindex(first); - return {new_layout, types::base_}; - } - constexpr auto reindexed(index first)& -> basic_array { - typename types::layout_t new_layout = this->layout(); - new_layout.reindex(first); - return {new_layout, types::base_}; - } - constexpr auto reindexed(index first)&& -> basic_array { - typename types::layout_t new_layout = this->layout(); - new_layout.reindex(first); - return {new_layout, types::base_}; - } - - // TODO(correaa) : implement reindexed_aux - template - constexpr auto reindexed(index first, Indexes... idxs) const& -> basic_const_array { - return ((reindexed(first).rotated()).reindexed(idxs...)).unrotated(); - } - template - constexpr auto reindexed(index first, Indexes... idxs) & -> basic_array { - return ((reindexed(first).rotated()).reindexed(idxs...)).unrotated(); - } - template - constexpr auto reindexed(index first, Indexes... idxs)&& -> basic_array { - return ((std::move(*this).reindexed(first).rotated()).reindexed(idxs...)).unrotated(); - } - private: - constexpr auto take_aux(difference_type n) const { - assert( n <= this->size() ); - typename types::layout_t new_layout{ - this->layout().sub(), - this->layout().stride(), - this->layout().offset(), - this->stride()*n - }; - return basic_array{new_layout, this->base()}; - } - - public: - constexpr auto take(difference_type n) const& -> basic_const_array {return take_aux(n);} - constexpr auto take(difference_type n) && -> basic_array {return take_aux(n);} - constexpr auto take(difference_type n) & -> basic_array {return take_aux(n);} - - private: - constexpr auto drop_aux(difference_type n) const { - assert( n <= this->size() ); - typename types::layout_t new_layout{ - this->layout().sub(), - this->layout().stride(), - this->layout().offset(), - this->stride()*(this->size() - n) - }; - return basic_array{new_layout, this->base() + n*this->layout().stride() - this->layout().offset()}; - } - - public: - constexpr auto drop(difference_type n) const& -> basic_const_array {return drop_aux(n);} - constexpr auto drop(difference_type n) && -> basic_array {return drop_aux(n);} - constexpr auto drop(difference_type n) & -> basic_array {return drop_aux(n);} - - private: - HD /*[[gnu::pure]]*/ constexpr auto sliced_aux(index first, index last) const { - MULTI_ACCESS_ASSERT(((first==last) or this->extension().contains(first ))&&"sliced first out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - MULTI_ACCESS_ASSERT(((first==last) or this->extension().contains(last - 1))&&"sliced last out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - typename types::layout_t new_layout = this->layout(); - new_layout.nelems() = this->stride()*(last - first); // TODO(correaa) : reconstruct layout instead of mutating it - return basic_array{new_layout, this->base() + (first*this->layout().stride() - this->layout().offset())}; - } - - public: - HD constexpr auto sliced(index first, index last) const& -> basic_const_array {return sliced_aux(first, last);} - HD /*[[gnu::pure]]*/ constexpr auto sliced(index first, index last) & -> basic_array {return sliced_aux(first, last);} - HD /*[[gnu::pure]]*/ constexpr auto sliced(index first, index last) && -> basic_array {return sliced_aux(first, last);} - - constexpr auto blocked(index first, index last) const& -> basic_const_array {return sliced(first, last).reindexed(first);} - /*[[gnu::pure]]*/ constexpr auto blocked(index first, index last) & -> basic_array {return sliced(first, last).reindexed(first);} - - using iextension = typename basic_array::index_extension; - - /*[[gnu::pure]]*/ constexpr auto stenciled(iextension iex) & -> basic_array{return blocked(iex.start(), iex.finish());} - constexpr auto stenciled(iextension iex, iextension iex1) & -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) & -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) & -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated();} - template - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) & -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated();} - - /*[[gnu::pure]]*/ constexpr auto stenciled(iextension iex) && -> basic_array{return blocked(iex.start(), iex.finish());} - constexpr auto stenciled(iextension iex, iextension iex1) && -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) && -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) && -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated();} - template - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) && -> basic_array{return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated();} - - constexpr auto stenciled(iextension iex) const& -> basic_const_array {return blocked(iex.start(), iex.finish());} - constexpr auto stenciled(iextension iex, iextension iex1) const& -> basic_const_array {return ((stenciled(iex).rotated()).stenciled(iex1)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2) const& -> basic_const_array {return ((stenciled(iex).rotated()).stenciled(iex1, iex2)).unrotated();} - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3) const& -> basic_const_array {return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3)).unrotated();} - - template - constexpr auto stenciled(iextension iex, iextension iex1, iextension iex2, iextension iex3, Xs... iexs) const& -> basic_const_array { - return ((stenciled(iex).rotated()).stenciled(iex1, iex2, iex3, iexs...)).unrotated(); - } - - constexpr auto elements_at(size_type idx) const& -> decltype(auto) { - assert(idx < this->num_elements()); - auto const sub_num_elements = this->begin()->num_elements(); - return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); - } - constexpr auto elements_at(size_type idx) && -> decltype(auto) { - assert(idx < this->num_elements()); - auto const sub_num_elements = this->begin()->num_elements(); - return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); - } - constexpr auto elements_at(size_type idx) & -> decltype(auto) { - assert(idx < this->num_elements()); - auto const sub_num_elements = this->begin()->num_elements(); - return operator[](idx / sub_num_elements).elements_at(idx % sub_num_elements); - } - - private: - constexpr auto strided_aux(difference_type diff) const -> basic_array { - typename types::layout_t new_layout{this->layout().sub(), this->layout().stride()*diff, this->layout().offset(), this->layout().nelems()}; - return {new_layout, types::base_}; - } - - public: - constexpr auto strided(difference_type diff) const& -> basic_const_array {return strided_aux(diff);} - constexpr auto strided(difference_type diff) && -> basic_array {return strided_aux(diff);} - constexpr auto strided(difference_type diff) & -> basic_array {return strided_aux(diff);} - - constexpr auto sliced( - typename types::index first, typename types::index last, typename types::index stride_ - ) const -> basic_array { - return sliced(first, last).strided(stride_); - } - - using index_range = typename basic_array::index_range; - - constexpr auto range(index_range irng) const& -> decltype(auto) {return sliced(irng.front(), irng.front() + irng.size());} - /*[[gnu::pure]]*/ constexpr auto range(index_range irng) && -> decltype(auto) {return std::move(*this).sliced(irng.front(), irng.front() + irng.size());} - /*[[gnu::pure]]*/ constexpr auto range(index_range irng) & -> decltype(auto) {return sliced(irng.front(), irng.front() + irng.size());} - - constexpr auto is_flattable() const -> bool{return this->stride() == this->layout().sub().nelems();} - - friend constexpr auto flatted(basic_array const& self) {return self.flatted();} - constexpr auto flatted() const& { - assert(is_flattable() && "flatted doesn't work for all layouts!"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - multi::layout_t new_layout{this->layout().sub()}; - new_layout.nelems() *= this->size(); // TODO(correaa) : use immutable layout - return basic_array{new_layout, types::base_}; - } - - // TODO(correaa) : define a diagonal_aux - constexpr auto diagonal() && {return this->diagonal();} - - constexpr auto diagonal() & -> basic_array { - using boost::multi::detail::get; - auto square_size = std::min(get<0>(this->sizes()), get<1>(this->sizes())); - multi::layout_t new_layout{(*this)({0, square_size}, {0, square_size}).layout().sub()}; - new_layout.nelems() += (*this)({0, square_size}, {0, square_size}).layout().nelems(); // TODO(correaa) : don't use mutation - new_layout.stride() += (*this)({0, square_size}, {0, square_size}).layout().stride(); // TODO(correaa) : don't use mutation - return {new_layout, types::base_}; - } - - template 1) and sizeof(Dummy*), int> =0> - constexpr auto diagonal() const& -> basic_array { - auto square_size = std::min(std::get<0>(this->sizes()), std::get<1>(this->sizes())); - multi::layout_t new_layout{(*this)({0, square_size}, {0, square_size}).layout().sub_}; - new_layout.nelems_ += (*this)({0, square_size}, {0, square_size}).layout().nelems_; - new_layout.stride_ += (*this)({0, square_size}, {0, square_size}).layout().stride_; // cppcheck-suppress arithOperationsOnVoidPointer ; false positive D == 1 doesn't happen here - return {new_layout, types::base_}; - } - - friend constexpr auto diagonal(basic_array const& self) {return self .diagonal();} - friend constexpr auto diagonal(basic_array& self) {return self .diagonal();} - friend constexpr auto diagonal(basic_array&& self) {return std::move(self).diagonal();} - - using partitioned_type = basic_array; - using partitioned_const_type = basic_array; - - private: - /*[[gnu::pure]]*/ constexpr auto partitioned_aux(size_type n) const -> partitioned_type { - assert(n != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - // vvv TODO(correaa) should be size() here? - assert( (this->layout().nelems() % n) == 0); // if you get an assertion here it means that you are partitioning an array with an incommunsurate partition // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : : normal in a constexpr function - multi::layout_t new_layout{this->layout(), this->layout().nelems()/n, 0, this->layout().nelems()}; - new_layout.sub().nelems() /= n; - return {new_layout, types::base_}; - } - - public: - constexpr auto partitioned(size_type n) const& -> partitioned_const_type {return partitioned_aux(n);} - constexpr auto partitioned(size_type n) & -> partitioned_type {return partitioned_aux(n);} - /*[[gnu::pure]]*/ constexpr auto partitioned(size_type n) && -> partitioned_type {return partitioned_aux(n);} - - friend constexpr auto partitioned(basic_array const& self, size_type n) -> partitioned_const_type {return self .partitioned(n);} - friend constexpr auto partitioned(basic_array & self, size_type n) -> partitioned_type {return self .partitioned(n);} - friend constexpr auto partitioned(basic_array && self, size_type n) -> partitioned_type {return std::move(self).partitioned(n);} - - private: - constexpr auto chunked_aux(size_type count) const -> partitioned_type { - assert( this->size() % count == 0 ); - return partitioned_aux(this->size()/count); - } - - public: // in Mathematica this is called Partition https://reference.wolfram.com/language/ref/Partition.html in RangesV3 it is called chunk - constexpr auto chunked(size_type count) const& -> partitioned_const_type {return chunked_aux(count);} - constexpr auto chunked(size_type count) & -> partitioned_type {return chunked_aux(count);} - constexpr auto chunked(size_type count) && -> partitioned_type {return chunked_aux(count);} - - private: - constexpr auto reversed_aux() const -> basic_array { - auto new_layout = this->layout(); - new_layout.reverse(); - return {new_layout, types::base_}; - } - - public: - constexpr auto reversed() const& -> basic_const_array {return reversed_aux();} - constexpr auto reversed() & -> basic_array {return reversed_aux();} - constexpr auto reversed() && -> basic_array {return reversed_aux();} - friend constexpr auto reversed(basic_array const& self) -> basic_const_array {return self .reversed();} - friend constexpr auto reversed(basic_array & self) -> basic_array {return self .reversed();} - friend constexpr auto reversed(basic_array && self) -> basic_array {return std::move(self).reversed();} - - constexpr auto transposed() const& -> basic_array { - return {this->layout().transpose(), types::base_}; - } - friend /*constexpr*/ auto transposed(basic_array const& self) -> basic_array {return self.transposed();} - friend -#if not((defined(__INTEL_COMPILER)) or defined(__NVCC__)) - constexpr -#endif - auto operator~ (basic_array const& self) -> basic_array {return self.transposed();} - - HD constexpr auto rotated() & -> basic_array { - typename types::layout_t new_layout = this->layout(); - new_layout.rotate(); - return basic_array{new_layout, types::base_}; - } - HD constexpr auto rotated() && -> basic_array { - typename types::layout_t new_layout = this->layout(); - new_layout.rotate(); - return basic_array{new_layout, types::base_}; - } - HD constexpr auto rotated() const& -> basic_const_array { - typename types::layout_t new_layout = this->layout(); - new_layout.rotate(); - typename basic_const_array::element_ptr new_base_{types::base_}; - return basic_const_array{new_layout, new_base_}; - } - - friend constexpr auto rotated(basic_array const& self) -> basic_const_array {return self .rotated();} - friend constexpr auto rotated(basic_array & self) -> basic_array {return self .rotated();} - friend /*constexpr*/ auto rotated(basic_array && self) -> basic_array {return std::move(self).rotated();} - - HD constexpr auto unrotated() & { - typename types::layout_t new_layout = this->layout(); - new_layout.unrotate(); - return basic_array{new_layout, types::base_}; - } - HD constexpr auto unrotated() && { - typename types::layout_t new_layout = this->layout(); - new_layout.unrotate(); - return basic_array{new_layout, types::base_}; - } - HD constexpr auto unrotated() const& { - typename types::layout_t new_layout = this->layout(); - new_layout.unrotate(); - return basic_const_array{new_layout, types::base_}; - } - friend constexpr auto unrotated(basic_array const& self) {return self.unrotated();} - - constexpr auto operator|(typename basic_array::size_type n) & -> decltype(auto) {return partitioned(n);} - constexpr auto operator|(typename basic_array::size_type n) && -> decltype(auto) {return std::move(*this).partitioned(n);} - constexpr auto operator|(typename basic_array::size_type n) const& -> decltype(auto) {return partitioned(n);} - - HD constexpr auto operator()() & -> basic_array {return *this;} - HD constexpr auto operator()() && -> basic_array {return this->operator()();} - HD constexpr auto operator()() const& -> basic_const_array {return {this->layout(), this->base()};} - - private: - template friend struct basic_array; - - HD constexpr auto paren_aux() & -> basic_array {return *this;} - HD constexpr auto paren_aux() && -> basic_array {return this->operator()();} - HD constexpr auto paren_aux() const& -> basic_const_array {return {this->layout(), this->base()};} - - template constexpr auto paren_aux(index_range irng, As... args) & { - // return range(a).rotated().paren_aux(as...).unrotated(); // TODO(correaa) compact - // auto&& tmp = range(irng); - // auto&& tmp2 = - // std::move(tmp). - // rotated(); - // auto&& tmp3 = std::move(tmp2).paren_aux(args...); -// auto&& ret = std::move(tmp3).unrotated(); - // return std::move(tmp3).unrotated(); // std::move(ret); - return range(irng).rotated().paren_aux(args...).unrotated(); // std::move(ret); - } - template constexpr auto paren_aux(index_range irng, As... args) && { - // auto&& tmp = std::move(*this).range(irng); - // auto&& tmp2 = std::move(tmp).rotated().paren_aux(args...); - // return std::move(tmp2).unrotated(); - return std::move(*this).range(irng).rotated().paren_aux(args...).unrotated(); - } - template constexpr auto paren_aux(index_range rng, As... args) const& {return range(rng).rotated().paren_aux(args...).unrotated();} - - template constexpr auto paren_aux(intersecting_range inr, As... args) & -> decltype(auto) {return paren_aux(intersection(this->extension(), inr), args...);} - template constexpr auto paren_aux(intersecting_range inr, As... args) && -> decltype(auto) {return paren_aux(intersection(this->extension(), inr), args...);} - template constexpr auto paren_aux(intersecting_range inr, As... args) const& -> decltype(auto) {return paren_aux(intersection(this->extension(), inr), args...);} - - template HD constexpr auto paren_aux(index idx, As... args) & -> decltype(auto) {return operator[](idx).paren_aux(args...);} - template HD constexpr auto paren_aux(index idx, As... args) && -> decltype(auto) {return operator[](idx).paren_aux(args...);} - template HD constexpr auto paren_aux(index idx, As... args) const& -> decltype(auto) {return operator[](idx).paren_aux(args...);} - - public: - // vvv DO NOT remove default parameter `= irange` : the default template parameters below help interpret the expression `{first, last}` syntax as index ranges - template constexpr auto operator()(A1 arg1) const& -> decltype(auto) {return paren_aux(arg1);} - template constexpr auto operator()(A1 arg1, A2 arg2) const& -> decltype(auto) {return paren_aux(arg1, arg2);} - template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) const& -> decltype(auto) {return paren_aux(arg1, arg2, arg3);} - template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) const& -> decltype(auto) {return paren_aux(arg1, arg2, arg3, arg4, args...);} - - template constexpr auto operator()(A1 arg1) & -> decltype(auto) {return paren_aux(arg1);} - template constexpr auto operator()(A1 arg1, A2 arg2) & -> decltype(auto) {return paren_aux(arg1, arg2);} - template /*[[gnu::pure]]*/ constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) & -> decltype(auto) {return paren_aux(arg1, arg2, arg3);} - template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) & -> decltype(auto) {return paren_aux(arg1, arg2, arg3, arg4, args...);} - - template constexpr auto operator()(A1 arg1) && -> decltype(auto) {return std::move(*this).paren_aux(arg1);} - template HD constexpr auto operator()(A1 arg1, A2 arg2) && -> decltype(auto) {return std::move(*this).paren_aux(arg1, arg2);} - template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3) && -> decltype(auto) {return std::move(*this).paren_aux(arg1, arg2, arg3);} - template constexpr auto operator()(A1 arg1, A2 arg2, A3 arg3, A4 arg4, As... args) && -> decltype(auto) {return std::move(*this).paren_aux(arg1, arg2, arg3, arg4, args...);} - - private: - template constexpr auto apply_impl(Tuple const& tuple, std::index_sequence/*012*/) const& -> decltype(auto) {return this->operator()(std::get(tuple)...);} - template constexpr auto apply_impl(Tuple const& tuple, std::index_sequence/*012*/) & -> decltype(auto) {return this->operator()(std::get(tuple)...);} - template constexpr auto apply_impl(Tuple const& tuple, std::index_sequence/*012*/) && -> decltype(auto) {return std::move(*this).operator()(std::get(tuple)...);} - - public: - template constexpr auto apply(Tuple const& tuple) const& -> decltype(auto) {return apply_impl(tuple, std::make_index_sequence::value>());} - template constexpr auto apply(Tuple const& tuple) && -> decltype(auto) {return apply_impl(tuple, std::make_index_sequence::value>());} - template constexpr auto apply(Tuple const& tuple) & -> decltype(auto) {return apply_impl(tuple, std::make_index_sequence::value>());} - - using iterator = array_iterator; - using const_iterator = array_iterator; - using move_iterator = array_iterator; - - private: - HD constexpr explicit basic_array(iterator begin, iterator end) - : basic_array{ - layout_type{begin->layout(), begin.stride(), 0, begin.stride()*(end - begin)}, - begin.base() - } { - assert(begin.stride() == end.stride() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - assert(begin->layout() == end->layout()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - } - friend constexpr auto ref(iterator begin, iterator end) -> multi::basic_array; - -// template -// struct basic_reverse_iterator // NOLINT(fuchsia-multiple-inheritance) -// : std::reverse_iterator -// , boost::multi::totally_ordered2, void> { -// template{base(std::declval())})> -// constexpr explicit basic_reverse_iterator(O const& o) : std::reverse_iterator{base(o)} {} -// constexpr basic_reverse_iterator() : std::reverse_iterator{} {} -// constexpr explicit basic_reverse_iterator(Iterator it) : std::reverse_iterator(std::prev(it)) {} -// constexpr explicit operator Iterator() const { -// auto ret = this->base(); -// if(ret!=Iterator{}) {return ++ret;} -// return Iterator{}; -// } -// constexpr explicit operator bool() const {return static_cast(this->base());} -// constexpr auto operator==(basic_reverse_iterator const& other) const -> bool {return (this->base() == other.base());} -// constexpr auto operator*() const -> typename Iterator::reference {return this->current;} -// constexpr auto operator->() const -> typename Iterator::pointer {return &this->current;} -// constexpr auto operator[](typename Iterator::difference_type n) const -> typename Iterator::reference {return *(this->current - n);} -// constexpr auto operator<(basic_reverse_iterator const& o) const -> bool {return o.base() < this->base();} -// }; - - public: -// using reverse_iterator = basic_reverse_iterator; - using ptr = basic_array_ptr; - using const_ptr = basic_array_ptr; - - constexpr auto addressof() && {return ptr{this->base_, this->layout()};} - - // NOLINTNEXTLINE(runtime/operator) - constexpr auto operator&() && {return ptr {this->base_, this->layout()};} // NOLINT(google-runtime-operator) // gives compiler crash in g++-7 (Ubuntu 7.5.0-6ubuntu4) 7.5.0 - constexpr auto operator&() & {return ptr {this->base_, this->layout()};} // NOLINT(google-runtime-operator) // gives compiler crash in g++-7 (Ubuntu 7.5.0-6ubuntu4) 7.5.0 - constexpr auto operator&() const& {return const_ptr {this->base_, this->layout()};} // NOLINT(google-runtime-operator) // gives compiler crash in g++-7 (Ubuntu 7.5.0-6ubuntu4) 7.5.0 - - // constexpr auto begin(dimensionality_type d) && -> iterator { - // Layout l = static_cast(*this); l.rotate(d); - // return {types::base_ + l(0 ), l.sub_, l.stride_}; - // } - // constexpr auto end (dimensionality_type d) && -> iterator { - // Layout l = static_cast(*this); l.rotate(d); - // return {types::base_ + l(l.size()), l.sub_, l.stride_}; - // } - - private: - HD constexpr auto begin_aux() const {return iterator{types::base_ , this->sub(), this->stride()};} - constexpr auto end_aux () const {return iterator{types::base_ + this->nelems(), this->sub(), this->stride()};} - - public: - HD constexpr auto begin() & {return begin_aux();} - constexpr auto end () & {return end_aux() ;} - friend HD /*constexpr*/ auto begin(basic_array & self) {return self.begin();} - friend constexpr auto end (basic_array & self) {return self.end ();} - - constexpr auto begin() && {return begin();} - constexpr auto end () && {return end() ;} - friend /*constexpr*/ auto begin(basic_array && self) {return std::move(self).begin();} - friend /*constexpr*/ auto end (basic_array && self) {return std::move(self).end() ;} - - constexpr auto begin() const& -> const_iterator {return begin_aux();} - constexpr auto end () const& -> const_iterator {return end_aux() ;} - friend /*constexpr*/ auto begin(basic_array const& self) -> const_iterator {return self.begin();} - friend /*constexpr*/ auto end (basic_array const& self) -> const_iterator {return self.end() ;} - - HD constexpr auto cbegin() const& {return begin();} - constexpr auto cend() const& {return end() ;} - friend constexpr auto cbegin(basic_array const& self) {return self.cbegin();} - friend constexpr auto cend (basic_array const& self) {return self.cend() ;} - - constexpr auto mbegin() & {return move_iterator{begin()};} - constexpr auto mend() & {return move_iterator{end() };} - friend constexpr auto mbegin(basic_array & self) {return self.mbegin();} - friend constexpr auto mend (basic_array & self) {return self.mend() ;} - - constexpr auto mbegin() && {return mbegin();} - constexpr auto mend() && {return mend() ;} - friend constexpr auto mbegin(basic_array && self) {return self.mbegin();} - friend constexpr auto mend (basic_array && self) {return self.mend() ;} - - constexpr auto mbegin() const& -> const_iterator {return begin();} - constexpr auto mend() const& -> const_iterator {return end() ;} - friend constexpr auto mbegin(basic_array const& self) {return self.mbegin();} - friend constexpr auto mend (basic_array const& self) {return self.mend() ;} - - private: - constexpr auto home_aux() const -> cursor_t { - return {this->base(), this->strides()}; - } - - public: - constexpr auto home() const& -> cursor_t {return home_aux();} - constexpr auto home() && -> cursor_t {return home_aux();} - constexpr auto home() & -> cursor_t {return home_aux();} - - template constexpr auto assign(It first) & -> It {adl_copy_n(first, this->size(), begin()); std::advance(first, this->size()); return first;} - template constexpr auto assign(It first)&& -> It {return assign(first);} - - template< - class Range, - class = std::enable_if_t>, - class = decltype(adl_copy_n(adl_begin(std::declval()), std::declval(), std::declval())) - > - constexpr auto operator=(Range const& rng) & // check that you LHS is not read-only - -> basic_array& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) - assert(this->size() == rng.size()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - // MULTI_MARK_SCOPE(std::string{"multi::operator= D="}+std::to_string(D)+" from range to "+typeid(T).name() ); - // adl_copy_n(adl_begin(r), this->size(), begin()); - adl_copy(adl_begin(rng), adl_end(rng), begin()); - return *this; - } - template>> - constexpr auto operator=(Range const& rng) && -> basic_array& {operator=(rng); return *this;} - - template - constexpr auto operator=(basic_array const& other) && -> basic_array& {operator=(other); return *this;} - - template - constexpr - auto operator=(basic_array const& other) & -> basic_array& { - assert(this->extension() == other.extension()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - // MULTI_MARK_SCOPE( std::string{"multi::operator= (D="}+std::to_string(D)+") from "+typeid(TT).name()+" to "+typeid(T).name() ); - this->elements() = other.elements(); -// if(this->is_empty()) {return *this;} -// if(this->num_elements() == this->nelems() and o.num_elements() == this->nelems() and this->layout() == o.layout()) { -// this->elements() = o.elements(); -//// adl_copy_n(o.base(), o.num_elements(), this->base()); -// } else if(o.stride() < (~o).stride()) { -// (~(*this)).elements() = o.elements(); -//// adl_copy_n( (~o).begin(), (~o).size(), (~(*this)).begin() ); -// } else { -// assign(o.begin()); -// } - return *this; - } - -// constexpr auto operator=(basic_array&& o)&& // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) -// noexcept // lints(hicpp-noexcept-move,performance-noexcept-move-constructor) // TODO(correaa) : make conditionally noexcept? -// -> basic_array& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) -// assert(this->extensions() == o.extensions()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function -// if(this->is_empty()) {return *this;} -// basic_array::operator=(o); -// return *this; // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) -// } - - constexpr - auto operator=(basic_array const& other) & -> basic_array& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - // if(&*this == &o) {return *this;} - assert(this->extension() == other.extension()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - // MULTI_MARK_SCOPE("multi::operator= [D="+std::to_string(D)+"] from "+typeid(T).name()+" to "+typeid(T).name() ); - elements() = other.elements(); -// if(this->num_elements() == this->nelems() and o.num_elements() == this->nelems() and this->layout() == o.layout()) { -// adl_copy_n(o.base(), o.num_elements(), this->base()); -// } else if(o.stride() < (~o).stride()) { -// adl_copy_n( (~o).begin(), (~o).size(), (~(*this)).begin() ); -// } else { -// assign(o.begin()); -// } - return *this; - } - - constexpr auto operator=(basic_array const& other) && - -> basic_array& { // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - operator=(other); - return *this; // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) - } - - template constexpr void swap(Array&& other) && { - assert( std::move(*this).extension() == std::forward(other).extension() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - elements().swap(other.elements()); - // adl_swap_ranges(this->begin(), this->end(), adl_begin(std::forward(o))); - } - template constexpr void swap(A&& other) & {return swap(std::forward(other));} - - friend constexpr void swap(basic_array&& self, basic_array&& other) {std::move(self).swap(std::move(other));} - - template constexpr void swap(basic_array const& self, Array&& other) {self.swap(other);} // TODO(correaa) remove - template constexpr void swap(Array&& other, basic_array const& self) {self.swap(other);} - - template - friend constexpr auto operator==(basic_array const& self, basic_array const& other) -> bool { - return (self.extension() == other.extension()) and (self.elements() == other.elements()); - } - template - friend constexpr auto operator!=(basic_array const& self, basic_array const& other) -> bool { - return (self.extension() != other.extension()) or (self.elements() != other.elements()); - } - - constexpr auto operator==(basic_array const& other) const -> bool { - return (this->extension() == other.extension()) and (this->elements() == other.elements()); - } - constexpr auto operator!=(basic_array const& other) const -> bool { - return (this->extension() != other.extension()) or (this->elements() != other.elements()); - } - - private: - friend constexpr auto lexicographical_compare(basic_array const& self, basic_array const& other) -> bool { - if(self.extension().first() > other.extension().first()) {return true ;} - if(self.extension().first() < other.extension().first()) {return false;} - return adl_lexicographical_compare( - self.begin(), self.end(), - other.begin(), other.end() - ); - } - - public: - /*[[gnu::pure]]*/ constexpr auto operator< (basic_array const& other) const& -> bool {return lexicographical_compare(*this, other);} - /*[[gnu::pure]]*/ constexpr auto operator<=(basic_array const& other) const& -> bool {return *this == other or lexicographical_compare(*this, other);} - constexpr auto operator> (basic_array const& other) const& -> bool {return other < *this;} - - template::template rebind> - constexpr auto static_array_cast() const -> basic_array { - P2 p2{this->base_}; - return basic_array{this->layout(), p2}; - } - template::template rebind, class... Args> - constexpr auto static_array_cast(Args&&... args) const -> basic_array { // name taken from std::static_pointer_cast - return {this->layout(), P2{this->base(), std::forward(args)...}}; - } - - template - constexpr auto element_transformed(UF&& fun) const& { - return static_array_cast< - // std::remove_cv_t>>, - std::decay_t>, - transform_ptr< - // std::remove_cv_t>>, - std::decay_t>, - UF, element_const_ptr, std::invoke_result_t - > - >(std::forward(fun)); - } - template - constexpr auto element_transformed(UF&& fun) & { - return static_array_cast< - // std::remove_cv_t>>, - std::decay_t>, - transform_ptr< - // std::remove_cv_t>>, - std::decay_t>, - UF, element_ptr , std::invoke_result_t - > - >(std::forward(fun)); - } - template - constexpr auto element_transformed(UF&& fun) && {return element_transformed(std::forward(fun));} - - template< - class T2, class P2 = typename std::pointer_traits::template rebind, - class Element = typename basic_array::element, - class PM = T2 Element::* - > - constexpr auto member_cast(PM member) const& -> basic_array { - static_assert(sizeof(T)%sizeof(T2) == 0, - "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements"); - - return basic_array{this->layout().scale(sizeof(T)/sizeof(T2)), static_cast(&(this->base_->*member))}; - } - - template< - class T2, class P2 = typename std::pointer_traits::template rebind, - class Element = typename basic_array::element, - class PM = T2 Element::* - > - constexpr auto member_cast(PM member) & -> basic_array { - static_assert(sizeof(T)%sizeof(T2) == 0, - "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements"); - - return basic_array{this->layout().scale(sizeof(T)/sizeof(T2)), static_cast(&(this->base_->*member))}; - } - - template< - class T2, class P2 = typename std::pointer_traits::template rebind, - class Element = typename basic_array::element, - class PM = T2 Element::* - > - constexpr auto member_cast(PM member) && -> basic_array { - return this->member_cast(member); - } - - template::template rebind> - using rebind = basic_array, D, P2>; - - template, class P2 = typename std::pointer_traits::template rebind> - constexpr auto const_array_cast() && -> rebind { - return {this->layout(), const_cast(this->base())}; // NOLINT(cppcoreguidelines-pro-type-const-cast) : to implement consts cast - } - - constexpr auto as_const() const { - return rebind{this->layout(), this->base()}; - } - constexpr auto moved() & {return rebind{this->layout(), element_move_ptr{this->base()}};} - constexpr auto moved() && {return moved();} - - constexpr auto element_moved() & {return rebind{this->layout(), element_move_ptr{this->base()}};} - constexpr auto element_moved() && {return element_moved();} - - private: - template - constexpr auto reinterpret_array_cast_aux() const -> rebind { - static_assert( sizeof(T)%sizeof(T2) == 0, - "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases" ); - - return { - this->layout().scale(sizeof(T)/sizeof(T2)), // NOLINT(bugprone-sizeof-expression) : sizes are compatible according to static assert above - reinterpret_pointer_cast(this->base()) // if ADL gets confused here (e.g. multi:: and thrust::) then adl_reinterpret_pointer_cast will be necessary - }; - } - - public: - template::template rebind> - constexpr auto reinterpret_array_cast() const& {return reinterpret_array_cast_aux().as_const();} - - template::template rebind> - constexpr auto reinterpret_array_cast() & {return reinterpret_array_cast_aux();} - - template::template rebind> - constexpr auto reinterpret_array_cast() && {return reinterpret_array_cast_aux();} - - template::template rebind > - constexpr auto reinterpret_array_cast(multi::size_type count) & -> basic_array, D + 1, P2> { - static_assert( sizeof(T)%sizeof(T2) == 0, - "error: reinterpret_array_cast is limited to integral stride values"); - - assert( count > 0 ); - assert( sizeof(T) == sizeof(T2)*static_cast(count) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - return { - layout_t{this->layout().scale(sizeof(T)/sizeof(T2)), 1, 0, count}.rotate(), // NOLINT(bugprone-sizeof-expression) T and T2 are size compatible (see static_assert above) - reinterpret_pointer_cast(this->base()) // if ADL gets confused here (e.g. multi:: and thrust::) then adl_reinterpret_pointer_cast will be necessary - }; - } - - template::template rebind > - constexpr auto reinterpret_array_cast(multi::size_type count) && -> basic_array, D + 1, P2> {return reinterpret_array_cast(count);} - - template::template rebind > - constexpr auto reinterpret_array_cast(size_type count) const& -> basic_array, D + 1, P2> { - static_assert( sizeof(T)%sizeof(T2) == 0, - "error: reinterpret_array_cast is limited to integral stride values"); - - assert( sizeof(T) == sizeof(T2)*static_cast(count) ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : checck implicit size compatibility - return { - layout_t{this->layout().scale(sizeof(T)/sizeof(T2)), 1, 0, count}.rotate(), - static_cast(static_cast(this->base())) - }; - } - - template - auto serialize(Archive& arxiv, unsigned int /*version*/) { - using AT = multi::archive_traits; - std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & AT ::make_nvp("item", item);}); - // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & cereal::make_nvp("item", item);}); - // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & item ;}); - } -}; - -template struct array_iterator{}; - -template -struct array_iterator // NOLINT(fuchsia-multiple-inheritance) -: boost::multi::iterator_facade< - array_iterator, - Element, std::random_access_iterator_tag, - typename std::iterator_traits::reference, multi::difference_type -> -, multi::affine , multi::difference_type> -, multi::decrementable > -, multi::incrementable > -, multi::totally_ordered2, void> -{ - using affine = multi::affine, multi::difference_type>; - using difference_type = typename affine::difference_type; - - array_iterator() = default; - using layout_type = multi::layout_t<0>; - - template< - class Other, - decltype(multi::implicit_cast(typename Other::pointer{}))* = nullptr, - decltype(std::declval().data_)* = nullptr - > - // cppcheck-suppress noExplicitConstructor ; because underlying pointer is implicitly convertible - HD constexpr/*mplct*/ array_iterator(Other const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to reproduce the implicitness of the argument - : data_{other.data_}, stride_{other.stride_} {} - - template< - class Other, - decltype(multi::explicit_cast(typename Other::pointer{}))* = nullptr, - decltype(std::declval().data_)* = nullptr - > - constexpr explicit array_iterator(Other const& other) - : data_{other.data_}, stride_{other.stride_} {} - - template friend struct array_iterator; - - constexpr explicit array_iterator(std::nullptr_t nil) : data_{nil} {} - constexpr explicit array_iterator(Ptr const& ptr) : data_{ptr} {} - - template< - class EElement, typename PPtr, - typename = decltype(multi::implicit_cast(std::declval>().data_)) - > - HD constexpr /*impl*/ array_iterator(array_iterator const& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to reproduce the implicitness of original pointer - : data_{other.data_}, stride_{other.stride_} {} - - constexpr explicit operator bool() const {return static_cast(this->data_);} - - HD constexpr auto operator[](typename array_iterator::difference_type n) const -> typename std::iterator_traits::reference { - return *((*this) + n); - } - - constexpr auto operator->() const -> Ptr {return data_;} - - using element = Element; - using element_ptr = Ptr; - using pointer = element_ptr; - using stride_type = multi::index; - - static constexpr dimensionality_type rank_v = 1; - using rank = std::integral_constant; - - constexpr auto operator<(array_iterator const& other) const -> bool {return distance_to(other) > 0;} - - HD explicit constexpr array_iterator(Ptr ptr, typename basic_array::index stride) - : data_{ptr}, stride_{stride} {} - - private: - friend struct basic_array; - - element_ptr data_{nullptr}; // TODO(correaa) : consider uninitialized pointer - stride_type stride_ = {1}; - - /*[[gnu::pure]]*/ - constexpr auto distance_to(array_iterator const& other) const -> difference_type { - assert(stride_==other.stride_ and (other.data_-data_)%stride_ == 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - return (other.data_ - data_)/stride_; - } - - public: - HD constexpr auto operator+(difference_type n) const -> array_iterator {array_iterator ret{*this}; ret+=n; return ret;} - - [[deprecated("use base() for iterator")]] - constexpr auto data() const -> element_ptr {return data_;} - - constexpr auto base() const& -> element_ptr {return data_;} - - friend // TODO(correaa) : defined FRIEND_CONSTEXPR or make "conditional" constexpr? - #if not defined(__INTEL_COMPILER) and not defined(__NVCOMPILER) and not defined(__NVCC__) - constexpr // this generates a problem with intel compiler 19 and v2021 "a constexpr function cannot have a nonliteral return type" - #endif - auto base(array_iterator const& self) -> element_ptr {return self.base();} - - HD constexpr auto stride() const -> stride_type {return stride_;} - friend constexpr auto stride(array_iterator const& self) -> stride_type {return self.stride_;} - - constexpr auto operator++() -> array_iterator& {data_ += stride_; return *this;} - constexpr auto operator--() -> array_iterator& {data_ -= stride_; return *this;} - - friend constexpr auto operator==(array_iterator const& self, array_iterator const& other) -> bool {return self.data_ == other.data_;} -// friend constexpr auto operator!=(array_iterator const& a, array_iterator const& b) -> bool {return not(a.data_ == b.data_);} - - HD constexpr auto operator*() const -> typename std::iterator_traits::reference {return *data_;} // NOLINT(readability-const-return-type) - - /*[[gnu::pure]]*/ - constexpr auto operator-(array_iterator const& other) const -> difference_type {return -distance_to(other);} - - constexpr auto operator+=(difference_type n) -> array_iterator& {data_ += stride_*n; return *this;} - constexpr auto operator-=(difference_type n) -> array_iterator& {data_ -= stride_*n; return *this;} -}; - -template -using iterator = array_iterator; - -template -struct basic_array -: array_types { - using types = array_types; - using types::types; - - using element = typename types::element; - using element_ref = typename std::iterator_traits::reference; - using element_cref = typename std::iterator_traits::reference; - using iterator = array_iterator; - - constexpr auto operator= (element const& elem) & -> basic_array& { - // MULTI_MARK_SCOPE(std::string{"multi::operator= D=0 from "}+typeid(T).name()+" to "+typeid(T).name() ); - adl_copy_n(&elem, 1, this->base_); - return *this; - } - constexpr auto operator= (element const& elem) && -> basic_array& { - operator=(elem); - return *this; // lints(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) - } - - constexpr auto operator==(element const& elem) const -> bool { - assert(this->num_elements() == 1); - return adl_equal(&elem, std::next(&elem, this->num_elements()), this->base()); - } - constexpr auto operator!=(element const& elem) const {return not operator==(elem);} - - template - constexpr - auto operator=(Range0 const& rng) & -> basic_array& { - adl_copy_n(&rng, 1, this->base_); - return *this; - } - - constexpr auto elements_at(size_type idx [[maybe_unused]]) const& -> element_cref {assert(idx < this->num_elements()); return *(this->base_);} - constexpr auto elements_at(size_type idx [[maybe_unused]]) && -> element_ref {assert(idx < this->num_elements()); return *(this->base_);} - constexpr auto elements_at(size_type idx [[maybe_unused]]) & -> element_ref {assert(idx < this->num_elements()); return *(this->base_);} - - constexpr auto operator!=(basic_array const& other) const {return not adl_equal(other.base_, other.base_ + 1, this->base_);} - constexpr auto operator==(basic_array const& other) const {return adl_equal(other.base_, other.base_ + 1, this->base_);} - - using decay_type = typename types::element; - - constexpr auto operator()() const -> element_ref {return *(this->base_);} - - constexpr operator element_ref () && {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - constexpr operator element_ref () & {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - constexpr operator element_cref() const& {return *(this->base_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax - - template - auto serialize(Archive& arxiv, const unsigned int /*version*/) { - using AT = multi::archive_traits; - auto& element_ = *(this->base_); - arxiv & AT::make_nvp("element", element_); - // arxiv & cereal::make_nvp("element", element_); - // arxiv & element_ ; - } -}; - -template -struct basic_array // NOLINT(fuchsia-multiple-inheritance) : to define operators via CRTP -// : multi::partially_ordered2, void> -: multi::random_iterable > -, array_types { - ~basic_array() = default; // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - - auto operator=(basic_array&& other) & - noexcept(std::is_nothrow_copy_assignable_v) // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) -// ^^^ lints(hicpp-noexcept-move,performance-noexcept-move-constructor) - -> basic_array& { // lints(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - operator=(other); - return *this; // lints([cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) - } - - static constexpr dimensionality_type rank_v = 1; - using rank = std::integral_constant; - - using types = array_types; - using types::types; - using layout_type = Layout; - using ref_ = basic_array; - - using element_type = T; - - using element_ptr = typename types::element_ptr; - using element_const_ptr = typename std::pointer_traits::template rebind; - using element_move_ptr = multi::move_ptr; - using element_ref = typename types::element_ref; - using element_cref = typename std::iterator_traits::reference; - - using default_allocator_type = typename multi::pointer_traits::default_allocator_type; - - constexpr auto get_allocator() const -> default_allocator_type {return default_allocator_of(basic_array::base());} - friend - #if not defined(__NVCC__) and not defined(__NVCOMPILER) and not defined(__INTEL_COMPILER) - constexpr - #endif - auto get_allocator(basic_array const& self) -> default_allocator_type {return self.get_allocator();} - - using decay_type = array::default_allocator_type>; - - constexpr auto decay() const -> decay_type {return decay_type{*this};} - friend constexpr auto decay(basic_array const& self) -> decay_type {return self.decay();} - - using basic_const_array = basic_array< - T, 1, - typename std::pointer_traits::template rebind, - Layout - >; - - using const_reference = typename array_types::const_reference; - using reference = typename array_types:: reference; - - protected: - template constexpr void intersection_assign(A&& other)&& {intersection_assign(std::forward(other));} - template constexpr void intersection_assign(A&& other)& { - std::for_each( - intersection(types::extension(), extension(other)).begin(), - intersection(types::extension(), extension(other)).end() , - [&](auto const idx) {operator[](idx) = std::forward(other)[idx];} - ); - // for(auto const idx : intersection(types::extension(), extension(other))) { - // operator[](idx) = std::forward(other)[idx]; - // } - } - - basic_array(basic_array const&) = default; - - template friend struct basic_array; - template friend struct static_array; - - template - friend constexpr auto static_array_cast(basic_array const&) -> decltype(auto); - - template - friend constexpr auto reinterpret_array_cast(basic_array&& self) { - return std::move(self).template reinterpret_array_cast::template rebind>(); - } - template - friend constexpr auto reinterpret_array_cast(basic_array const& self) { - return self.template reinterpret_array_cast::template rebind>(); - } - - public: - friend constexpr auto sizes(basic_array const& self) noexcept -> typename basic_array::sizes_type {return self.sizes();} // needed by nvcc - friend constexpr auto size (basic_array const& self) noexcept -> typename basic_array::size_type {return self.size ();} // needed by nvcc - - constexpr auto operator+() const -> decay_type {return decay();} - - basic_array(basic_array&&) noexcept = default; // in C++ 14 this is necessary to return array references from functions -// in c++17 things changed and non-moveable non-copyable types can be returned from functions and captured by auto - - protected: - template friend struct basic_array_ptr; - template friend struct array_iterator; - - public: - friend constexpr auto dimensionality(basic_array const& /*self*/) -> dimensionality_type {return 1;} - - auto operator=(std::initializer_list values) && -> basic_array& {operator=(values); return *this;} - auto operator=(std::initializer_list values) & -> basic_array& { - assert( static_cast(values.size()) == this->size() ); - adl_copy_n(values.begin(), values.size(), begin()); - return *this; - } - - // NOLINTNEXTLINE(runtime/operator) - HD constexpr auto operator&() && -> basic_array_ptr { // NOLINT(google-runtime-operator) : taking address of a reference-like object should be allowed - return {this->base_, this->layout()}; - } - // NOLINTNEXTLINE(runtime/operator) - HD constexpr auto operator&() & -> basic_array_ptr { // NOLINT(google-runtime-operator) : taking address of a reference-like object should be allowed - return {this->base_, this->layout()}; - } - // NOLINTNEXTLINE(runtime/operator) - HD constexpr auto operator&() const& -> basic_array_ptr {return {this->base_, this->layout()};} // NOLINT(google-runtime-operator) extend semantics - - HD constexpr void assign(std::initializer_list values) const {assert( values.size() == static_cast(this->size()) ); - assign(values.begin(), values.end()); - } - template - constexpr auto assign(It first) & -> It {adl_copy_n(first, this->size(), this->begin()); std::advance(first, this->size()); return first;} - template - constexpr auto assign(It first)&& -> It {return assign(first);} - template - constexpr void assign(It first, It last) & { - assert( std::distance(first, last) == this->size() ); (void)last; // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - assign(first); - } - template - constexpr void assign(It first, It last)&& {assign(first, last);} - - constexpr auto operator=(basic_array const& other) & -> basic_array& { - static_assert(std::is_copy_assignable_v, "assignment requires element-wise assignment"); // TODO(correaa) : make sfinae friendly - if(this == std::addressof(other)) {return *this;} - assert(this->extension() == other.extension()); - elements() = other.elements(); - return *this; - } - constexpr auto operator=(basic_array const& other) && -> basic_array& { - if(this == std::addressof(other)) {return *this;} // lints cert-oop54-cpp - operator=(other); return *this; - } - - private: - HD constexpr auto at_aux(index idx) const -> typename basic_array::reference { // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment - // MULTI_ACCESS_ASSERT(this->extension().contains(i)&&"out of bounds"); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - auto ba = this->base(); // NOLINT(llvm-qualified-auto,readability-qualified-auto) - auto of = (idx*this->stride() - this->offset()); // NOLINT(llvm-qualified-auto,readability-qualified-auto) - auto pt = ba + of; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic,llvm-qualified-auto,readability-qualified-auto) - return *pt; // in C++17 this is allowed even with syntethic references - // return *(this->base() + (idx*this->stride() - this->offset())); // TODO(correaa) use this->base()[(i*this->stride() - this->offset())] - } - - public: - HD constexpr auto operator[](index idx) const& -> typename basic_array::const_reference {return at_aux(idx);} // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment - HD constexpr auto operator[](index idx) & -> typename basic_array:: reference {return at_aux(idx);} // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment - HD constexpr auto operator[](index idx) && -> typename basic_array:: reference {return at_aux(idx);} // NOLINT(readability-const-return-type) fancy pointers can deref into const values to avoid assignment - - constexpr auto front() const& -> const_reference {return *begin();} - constexpr auto back() const& -> const_reference {return *std::prev(end());} - - constexpr auto front() && -> reference {return *begin();} - constexpr auto back() && -> reference {return *std::prev(end());} - - constexpr auto front() & -> reference {return *begin();} - constexpr auto back() & -> reference {return *std::prev(end());} - - - private: - template - static constexpr auto apply_impl(Self&& self, Tuple const& tuple, std::index_sequence /*012*/) -> decltype(auto) { - return std::forward(self)(std::get(tuple)...); - } - - public: - template HD constexpr auto apply(Tuple const& tuple) const& -> decltype(auto) {return apply_impl( *this , tuple, std::make_index_sequence>());} - template HD constexpr auto apply(Tuple const& tuple) && -> decltype(auto) {return apply_impl(std::move(*this), tuple, std::make_index_sequence>());} - template constexpr auto apply(Tuple const& tuple) & -> decltype(auto) {return apply_impl( *this , tuple, std::make_index_sequence>());} - - template::value == 0), int> = 0> HD constexpr auto operator[](Tuple const& /*empty*/) const& -> decltype(auto) {return *this;} - template::value == 1), int> = 0> HD constexpr auto operator[](Tuple const& indices ) const& -> decltype(auto) {return operator[](std::get<0>(indices));} - template::value > 1), int> = 0> HD constexpr auto operator[](Tuple const& indices ) const& - ->decltype(operator[](std::get<0>(indices))[detail::tuple_tail(indices)]) { - return operator[](std::get<0>(indices))[detail::tuple_tail(indices)]; } - - HD constexpr auto elements_at(size_type idx) const& -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} - HD constexpr auto elements_at(size_type idx) && -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} - HD constexpr auto elements_at(size_type idx) & -> decltype(auto) {assert(idx < this->num_elements()); return operator[](idx);} - - constexpr auto reindexed(index first) && {return reindexed(first);} - constexpr auto reindexed(index first) & { - typename types::layout_t new_layout = this->layout(); - new_layout.reindex(first); - return basic_array{new_layout, types::base_}; - } - - private: - constexpr auto take_aux(difference_type count) const { - assert( count <= this->size() ); // calculating size is expensive that is why - typename types::layout_t new_layout{ - this->layout().sub(), - this->layout().stride(), - this->layout().offset(), - this->stride()*count - }; - return basic_array{new_layout, this->base()}; - } - - public: - constexpr auto take(difference_type count) const& -> basic_const_array {return take_aux(count);} - constexpr auto take(difference_type count) && -> basic_array {return take_aux(count);} - constexpr auto take(difference_type count) & -> basic_array {return take_aux(count);} - - private: - constexpr auto drop_aux(difference_type count) const -> basic_array { - assert( count <= this->size() ); - typename types::layout_t new_layout{ - this->layout().sub(), - this->layout().stride(), - this->layout().offset(), - this->stride()*(this->size() - count) - }; - return basic_array{new_layout, this->base() + (count*this->layout().stride() - this->layout().offset())}; - } - - public: - constexpr auto drop(difference_type count) const& -> basic_const_array {return drop_aux(count);} - constexpr auto drop(difference_type count) && -> basic_array {return drop_aux(count);} - constexpr auto drop(difference_type count) & -> basic_array {return drop_aux(count);} - - private: - HD /*[[gnu::pure]]*/ constexpr auto sliced_aux(index first, index last) const { - typename types::layout_t new_layout = this->layout(); - if(this->is_empty()) { - assert(first == last); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - new_layout.nelems() = 0; // TODO(correaa) : don't use mutation - } else { - (new_layout.nelems() /= this->size())*=(last - first); - } - return basic_array{new_layout, this->base() + (first*this->layout().stride() - this->layout().offset())}; - } - - public: - using elements_iterator = elements_iterator_t; - using celements_iterator = elements_iterator_t; - - using elements_range = elements_range_t; - using const_elements_range = elements_range_t; - - private: - constexpr auto elements_aux() const {return elements_range{this->base(), this->layout()};} - - public: - constexpr auto elements() & -> elements_range {return elements_aux();} - constexpr auto elements() && -> elements_range {return elements_aux();} - constexpr auto elements() const& -> const_elements_range {return const_elements_range{this->base(), this->layout()};} // TODO(correaa) simplify - - constexpr auto celements() const -> const_elements_range {return elements_aux();} - - constexpr auto hull() const -> std::pair { - return {std::min(this->base(), this->base() + this->hull_size()), std::abs(this->hull_size())}; - } - - HD constexpr auto sliced(index first, index last) const& -> basic_const_array {return sliced_aux(first, last);} - HD /*[[gnu::pure]]*/ constexpr auto sliced(index first, index last) & -> basic_array {return sliced_aux(first, last);} - HD constexpr auto sliced(index first, index last) && -> basic_array {return sliced_aux(first, last);} - - /*[[gnu::pure]]*/ constexpr auto blocked(index first, index last)& -> basic_array { - return sliced(first, last).reindexed(first); - } - /*[[gnu::pure]]*/ constexpr auto stenciled(typename basic_array::index_extension ext) -> basic_array { - return blocked(ext.start(), ext.finish()); - } - - private: - constexpr auto strided_aux(difference_type diff) const -> basic_array { - typename types::layout_t new_layout = {this->layout().sub(), this->layout().stride()*diff, this->layout().offset(), this->layout().nelems()}; - return {new_layout, types::base_}; - } - - public: - constexpr auto strided(difference_type diff) const& -> basic_const_array {return strided_aux(diff);} - constexpr auto strided(difference_type diff) && -> basic_array {return strided_aux(diff);} - constexpr auto strided(difference_type diff) & -> basic_array {return strided_aux(diff);} - - HD constexpr auto sliced(index first, index last, difference_type stride) const& -> basic_const_array {return sliced(first, last).strided(stride);} - HD constexpr auto sliced(index first, index last, difference_type stride) && -> basic_array {return sliced(first, last).strided(stride);} - HD constexpr auto sliced(index first, index last, difference_type stride) & -> basic_array {return sliced(first, last).strided(stride);} - - HD constexpr auto range(index_range const& rng) & {return sliced(rng.front(), rng.last());} - HD constexpr auto range(index_range const& rng) && {return std::move(*this).sliced(rng.front(), rng.last());} - HD constexpr auto range(index_range const& rng) const& {return sliced(rng.front(), rng.last());} - - HD constexpr auto operator()() const& -> basic_const_array {return {this->layout(), this->base()};} - HD constexpr auto operator()() && -> basic_array {return *this;} - HD constexpr auto operator()() & -> basic_array {return *this;} - - HD constexpr auto operator()(index_range const& rng) & {return range(rng);} - HD constexpr auto operator()(index_range const& rng) && {return std::move(*this).range(rng);} - HD constexpr auto operator()(index_range const& rng) const& {return range(rng);} - - HD constexpr auto operator()(index idx) & -> decltype(auto) {return operator[](idx);} - HD constexpr auto operator()(index idx) && -> decltype(auto) {return std::move(*this).operator[](idx);} - HD constexpr auto operator()(index idx) const& -> decltype(auto) {return operator[](idx);} - - private: - HD constexpr auto paren_aux() & {return operator()();} - HD constexpr auto paren_aux() && {return operator()();} - HD constexpr auto paren_aux() const& {return operator()();} - - HD constexpr auto paren_aux(index_range const& rng) & {return range(rng);} - HD constexpr auto paren_aux(index_range const& rng) && {return range(rng);} - HD constexpr auto paren_aux(index_range const& rng) const& {return range(rng);} - - HD constexpr auto paren_aux(index idx) & -> decltype(auto) {return operator[](idx);} - HD constexpr auto paren_aux(index idx) && -> decltype(auto) {return operator[](idx);} - HD constexpr auto paren_aux(index idx) const& -> decltype(auto) {return operator[](idx);} - - constexpr auto paren_aux(intersecting_range const& rng) & -> decltype(auto) {return paren_aux(intersection(this->extension(), rng));} - constexpr auto paren_aux(intersecting_range const& rng) && -> decltype(auto) {return std::move(*this).paren_aux(intersection(this->extension(), rng));} - constexpr auto paren_aux(intersecting_range const& rng) const& -> decltype(auto) {return paren_aux(intersection(this->extension(), rng));} - - public: - constexpr auto operator()(intersecting_range const& isrange) & -> decltype(auto) {return paren_aux(isrange);} - constexpr auto operator()(intersecting_range const& isrange) && -> decltype(auto) {return std::move(*this).paren_aux(isrange);} - constexpr auto operator()(intersecting_range const& isrange) const& -> decltype(auto) {return paren_aux(isrange);} - - template - constexpr auto operator()(Args&&... args) & - ->decltype(paren(*this, std::forward(args)...)) { - return paren(*this, std::forward(args)...); } - - template - constexpr auto operator()(Args&&... args) && - ->decltype(paren(std::move(*this), std::forward(args)...)) { - return paren(std::move(*this), std::forward(args)...); } - - template - constexpr auto operator()(Args&&... args) const& - ->decltype(paren(*this, std::forward(args)...)) { - return paren(*this, std::forward(args)...); } - - using partitioned_type = basic_array; - using partitioned_const_type = basic_array; - - private: - constexpr auto partitioned_aux(size_type size) const -> partitioned_type { - assert( size != 0 ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - assert( (this->layout().nelems() % size) == 0 ); // TODO(correaa) remove assert? truncate left over? (like mathematica) // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - multi::layout_t<2> new_layout{this->layout(), this->layout().nelems()/size, 0, this->layout().nelems()}; - new_layout.sub().nelems() /= size; // TODO(correaa) : don't use mutation - return {new_layout, types::base_}; - } - - public: - constexpr auto partitioned(size_type size) const& -> partitioned_const_type {return partitioned_aux(size);} - constexpr auto partitioned(size_type size) & -> partitioned_type {return partitioned_aux(size);} - constexpr auto partitioned(size_type size) && -> partitioned_type {return partitioned_aux(size);} - - private: - constexpr auto chunked_aux(size_type size) const -> partitioned_type { - assert( this->size() % size == 0 ); - return partitioned_aux(this->size()/size); - } - - public: // in Mathematica this is called Partition https://reference.wolfram.com/language/ref/Partition.html in RangesV3 it is called chunk - constexpr auto chunked(size_type size) const& -> partitioned_const_type {return chunked_aux(size);} - constexpr auto chunked(size_type size) & -> partitioned_type {return chunked_aux(size);} - constexpr auto chunked(size_type size) && -> partitioned_type {return chunked_aux(size);} - - private: - constexpr auto reversed_aux() const -> basic_array { - auto new_layout = this->layout(); - new_layout.reverse(); - return {new_layout, types::base_}; - } - - public: - constexpr auto reversed() const& -> basic_const_array {return reversed_aux();} - constexpr auto reversed() & -> basic_array {return reversed_aux();} - constexpr auto reversed() && -> basic_array {return reversed_aux();} - - friend constexpr auto reversed(basic_array const& self) -> basic_const_array {return self .reversed();} - friend constexpr auto reversed(basic_array & self) -> basic_array {return self .reversed();} - friend constexpr auto reversed(basic_array && self) -> basic_array {return std::move(self).reversed();} - - friend constexpr auto rotated(basic_array const& self) -> decltype(auto) {return self. rotated();} - friend constexpr auto unrotated(basic_array const& self) -> decltype(auto) {return self.unrotated();} - - constexpr auto rotated() & -> decltype(auto) {return operator()();} - constexpr auto rotated() && -> decltype(auto) {return operator()();} - constexpr auto rotated() const& -> decltype(auto) {return operator()();} - - HD constexpr auto unrotated() const& -> decltype(auto) {return operator()();} - HD constexpr auto unrotated() && -> decltype(auto) {return operator()();} - HD constexpr auto unrotated() & -> decltype(auto) {return operator()();} - - using iterator = typename multi::array_iterator; - using const_iterator = typename multi::array_iterator; - using move_iterator = array_iterator; - - private: - HD constexpr explicit basic_array(iterator begin, iterator end) - : basic_array { - layout_type{ {}/*begin->layout()*/, begin.stride(), 0, begin.stride()*(end - begin)}, - begin.base() - } { - assert(begin.stride() == end.stride() ); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - // assert(begin->layout() == end->layout()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - } - friend constexpr auto ref(iterator begin, iterator end) -> multi::basic_array; - - constexpr HD auto begin_aux() const {return iterator{this->base_ , this->stride()};} - constexpr auto end_aux () const {return iterator{this->base_ + types::nelems(), this->stride()};} - - public: - constexpr HD auto begin() const& -> const_iterator {return begin_aux();} - constexpr auto begin() & -> iterator {return begin_aux();} - constexpr auto begin() && -> iterator {return begin_aux();} - - constexpr auto mbegin() & {return move_iterator{begin()};} - constexpr auto mend () & {return move_iterator{end ()};} - - constexpr auto mbegin() && {return move_iterator{begin()};} - constexpr auto mend () && {return move_iterator{end ()};} - - constexpr auto end () const& -> const_iterator {return end_aux();} - constexpr auto end () & -> iterator {return end_aux();} - constexpr auto end () && -> iterator {return end_aux();} - - friend /*constexpr*/ auto begin(basic_array const& self) -> const_iterator {return self .begin();} - friend /*constexpr*/ auto begin(basic_array & self) -> iterator {return self .begin();} - friend /*constexpr*/ auto begin(basic_array && self) -> iterator {return std::move(self).begin();} - - friend constexpr auto end (basic_array const& self) -> const_iterator {return self .end() ;} - friend constexpr auto end (basic_array & self) -> iterator {return self .end() ;} - friend constexpr auto end (basic_array && self) -> iterator {return std::move(self).end() ;} - - HD constexpr auto cbegin() const& -> const_iterator {return begin();} - constexpr auto cend () const& -> const_iterator {return end() ;} - - friend HD /*constexpr*/ auto cbegin(basic_array const& self) {return self.cbegin();} - friend constexpr auto cend (basic_array const& self) {return self.cend() ;} - - template constexpr auto operator=(basic_array const& other) && -> basic_array& {operator=( other ); return *this;} - template constexpr auto operator=(basic_array const& other) & -> basic_array& { - assert(other.extensions() == this->extensions()); - elements() = other.elements(); - return *this; - } - - template constexpr auto operator=(basic_array && other) && -> basic_array& {operator=(std::move(other)); return *this;} - template constexpr auto operator=(basic_array && other) & -> basic_array& { - assert(this->extensions() == other.extensions()); - elements() = std::move(other).elements(); - return *this; - } - - template constexpr auto assign(It first) && - ->decltype(adl_copy_n(first, this->size(), std::declval()), void()) { - return adl_copy_n(first, this->size(), std::move(*this).begin()), void(); } - - template - friend constexpr auto operator==(basic_array const& self, basic_array const& other) -> bool { - return self.extension() == other.extension() and self.elements() == other.elements(); - } - template - friend constexpr auto operator!=(basic_array const& self, basic_array const& other) -> bool { - return self.extension() != other.extension() or self.elements() != other.elements(); - } - - /*[[gnu::pure]]*/ friend constexpr auto operator< (basic_array const& self, basic_array const& other) -> bool {return lexicographical_compare(self, other);} - /*[[gnu::pure]]*/ friend constexpr auto operator<=(basic_array const& self, basic_array const& other) -> bool {return lexicographical_compare(self, other) or self == other;} - - template constexpr void swap(Array&& other) && { - assert(this->extension() == other.extension()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - adl_swap_ranges(this->begin(), this->end(), adl_begin(std::forward(other))); - } - template constexpr void swap(A&& other) & {return swap(std::forward(other));} - - friend constexpr void swap(basic_array&& self, basic_array&& other) {std::move(self).swap(std::move(other));} - - template>>> friend constexpr void swap(basic_array&& self, A&& other) {self.swap(other);} - template>>> friend constexpr void swap(A&& other, basic_array&& self) {self.swap(other);} - - private: - template - /*[[gnu::pure]]*/ static constexpr auto lexicographical_compare(A1 const& self, A2 const& other) -> bool { - if(extension(self).first() > extension(other).first()) {return true ;} - if(extension(self).first() < extension(other).first()) {return false;} - return adl_lexicographical_compare(adl_begin(self), adl_end(self), adl_begin(other), adl_end(other)); - } - - public: - template::template rebind> - constexpr auto static_array_cast() const -> basic_array { // name taken from std::static_pointer_cast - return {this->layout(), static_cast(this->base())}; - } - template::template rebind, class... Args> - constexpr auto static_array_cast(Args&&... args) const -> basic_array { // name taken from std::static_pointer_cast - return {this->layout(), P2{this->base(), std::forward(args)...}}; - } - - template - constexpr auto element_transformed(UF&& fun) const& { - return static_array_cast< - // std::remove_cv_t>>, - std::decay_t>, - transform_ptr< - // std::remove_cv_t>>, - std::decay_t>, - UF, element_const_ptr, std::invoke_result_t - > - >(std::forward(fun)); - } - template - constexpr auto element_transformed(UF&& fun) & { - return static_array_cast< - // std::remove_cv_t>>, - std::decay_t>, - transform_ptr< - // std::remove_cv_t>>, - std::decay_t>, - UF, element_ptr , std::invoke_result_t - > - >(std::forward(fun)); - } - template - constexpr auto element_transformed(UF&& fun) && {return element_transformed(std::forward(fun));} - - template< - class T2, class P2 = typename std::pointer_traits::template rebind, - class Element = typename basic_array::element, - class PM = T2 std::decay_t::* - > - constexpr auto member_cast(PM member) const -> basic_array { - static_assert(sizeof(T)%sizeof(T2) == 0, - "array_member_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom alignas structures (to the interesting member(s) sizes) or custom pointers to allow reintrepreation of array elements"); - -#if defined(__GNUC__) and (not defined(__INTEL_COMPILER)) - auto&& r1 = (*(reinterpret_cast(basic_array::base_))).*member; // ->*pm; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : reinterpret is what the function does. alternative for GCC/NVCC - auto* p1 = &r1; P2 p2 = reinterpret_cast(p1); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : find a better way - return {this->layout().scale(sizeof(T)/sizeof(T2)), p2}; -#else - return {this->layout().scale(sizeof(T)/sizeof(T2)), static_cast(&(this->base_->*member))}; // this crashes nvcc 11.2-11.4 and some? gcc compiler -#endif - } - - constexpr auto moved() & {return basic_array{this->layout(), element_move_ptr{this->base()}};} - constexpr auto moved() && {return moved();} - - constexpr auto element_moved() & {return basic_array{this->layout(), element_move_ptr{this->base()}};} - constexpr auto element_moved() && {return element_moved();} - - template::template rebind> - constexpr auto reinterpret_array_cast() const& -> basic_array, 1, P2> { // TODO(correaa) : use rebind for return type - static_assert( sizeof(T)%sizeof(T2)== 0, - "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases"); - - return {this->layout().scale(sizeof(T)/sizeof(T2)), reinterpret_pointer_cast(this->base())}; - } - - template::template rebind > - constexpr auto reinterpret_array_cast(size_type n) const& -> basic_array, 2, P2> { // TODO(correaa) : use rebind for return type - static_assert( sizeof(T)%sizeof(T2)== 0, - "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases"); - - return basic_array, 2, P2>{ - layout_t<2>{this->layout().scale(sizeof(T)/sizeof(T2)), 1, 0, n}, - reinterpret_pointer_cast(this->base()) - }.rotated(); - } - - // TODO(correaa) : rename to reinterpret_pointer_cast? - template::template rebind > - constexpr auto reinterpret_array_cast(size_type n)& -> basic_array, 2, P2> { - static_assert( sizeof(T)%sizeof(T2)== 0, - "error: reinterpret_array_cast is limited to integral stride values, therefore the element target size must be multiple of the source element size. Use custom pointers to allow reintrepreation of array elements in other cases"); - - return basic_array, 2, P2>{ - layout_t<2>{this->layout().scale(sizeof(T)/sizeof(T2)), 1, 0, n}, - reinterpret_pointer_cast(this->base()) - }.rotated(); - } - template::template rebind > - constexpr auto reinterpret_array_cast(size_type n)&& -> basic_array, 2, P2> { - return this->reinterpret_array_cast(n); - } - - template - constexpr auto fill(TT const& value) & -> decltype(auto) { - return adl_fill_n(this->begin(), this->size(), value), *this; - } - constexpr auto fill()& -> decltype(auto) {return fill(typename basic_array::element_type{});} - - template - constexpr auto fill(TT const& value) && -> decltype(auto) {return std::move(this->fill(value));} - constexpr auto fill() && -> decltype(auto) { - return std::move(*this).fill(typename basic_array::element_type{}); - } - - template - void serialize(Archive& arxiv, unsigned /*version*/) { - using AT = multi::archive_traits; - std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & AT ::make_nvp("item", item);}); - // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & cereal::make_nvp("item", item);}); - // std::for_each(this->begin(), this->end(), [&](auto&& item) {arxiv & item ;}); - } -}; - -template -constexpr auto static_array_cast(Array&& self, Args&&... args) -> decltype(auto) { - return self.template static_array_cast(std::forward(args)...); -} - -template -struct array_ref // TODO(correaa) : inheredit from multi::partially_ordered2, void>? -: basic_array -{ - ~array_ref() = default; // lints(cppcoreguidelines-special-member-functions) - - using layout_type = typename array_ref::types::layout_t; - - protected: - constexpr array_ref() noexcept : basic_array{{}, nullptr} {} - - using iterator = typename basic_array::iterator; - - public: // lints(hicpp-use-equals-delete,modernize-use-equals-delete) - array_ref(iterator, iterator) = delete; - - friend constexpr auto sizes(array_ref const& self) noexcept -> typename array_ref::sizes_type {return self.sizes();} // needed by nvcc - friend constexpr auto size (array_ref const& self) noexcept -> typename array_ref::size_type {return self.size ();} // needed by nvcc - - protected: - [[deprecated("references are not copyable, use auto&&")]] - array_ref(array_ref const&) = default; // don't try to use `auto` for references, use `auto&&` or explicit value type - - public: - #if defined(__NVCC__) - array_ref(array_ref&&) noexcept = default; // this needs to be public in nvcc c++17 - #else - array_ref(array_ref&&) = delete; - #endif - - - template{}>, decltype(multi::explicit_cast(std::declval()))* = nullptr> - constexpr explicit array_ref(array_ref&& other) - : basic_array{other.layout(), ElementPtr{other.base()}} {} - - template{}>, decltype(multi::implicit_cast(std::declval()))* = nullptr> - // cppcheck-suppress noExplicitConstructor ; to allow terse syntax - constexpr /*implicit*/ array_ref(array_ref&& other) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - : basic_array{other.layout(), ElementPtr{other.base()}} {} - - constexpr explicit array_ref(typename array_ref::element_ptr data, typename array_ref::extensions_type extensions) noexcept // TODO(correa) eliminate this ctor - : basic_array{typename array_ref::types::layout_t{extensions}, data} {} - - constexpr array_ref(typename array_ref::extensions_type extensions, typename array_ref::element_ptr data) noexcept - : basic_array{typename array_ref::types::layout_t{extensions}, data} {} - - template - // cppcheck-suppress noExplicitConstructor ; to allow terse syntax and because a reference to c-array can be represented as an array_ref - constexpr array_ref( // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : to allow terse syntax and because a reference to c-array can be represented as an array_ref - TT(&array)[N] // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - ) - : array_ref( - multi::data_elements(array), - extensions(array) - ) {} - - using basic_array::operator=; - - private: - template constexpr auto copy_elements(It first) { - return adl_copy_n(first, array_ref::num_elements(), array_ref::data_elements()); - } - - public: - HD constexpr auto data_elements() const& -> typename array_ref::element_ptr {return array_ref::base_;} - - constexpr auto operator=(array_ref const& other) & -> array_ref& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - assert(this->num_elements() == other.num_elements()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - array_ref::copy_elements(other.data_elements()); - return *this; - } - constexpr auto operator=(array_ref const& other) && -> array_ref& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - operator=(other); - return *this; - } - - constexpr auto operator=(array_ref&& other) & // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - -> array_ref& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - operator=(std::as_const(other)); - return *this; - } - constexpr auto operator=(array_ref&& other) && // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - -> array_ref& { - if(this == std::addressof(other)) {return *this;} // lints(cert-oop54-cpp) - operator=(std::as_const(other)); - return *this; - } - - template -// constexpr - auto operator=(array_ref const& other)& -> array_ref& { - assert( this->extensions() == other.extensions() ); - // MULTI_MARK_SCOPE(std::string{"multi::operator= D="}+std::to_string(D)+" from "+typeid(TT).name()+" to "+typeid(T).name() ); - adl_copy_n(other.data_elements(), other.num_elements(), this->data_elements()); - return *this; - } - - template - constexpr auto operator=(array_ref const& other) && -> array_ref& { - this->operator=(other); - return *this; // lints (cppcoreguidelines-c-copy-assignment-signature) - } - - using elements_type = array_ref; - using celements_type = array_ref; - - private: - constexpr auto elements_aux() const { - return elements_type{ - this->data_elements(), - typename elements_type::extensions_type{multi::iextension{this->num_elements()}} - }; - } - - public: - constexpr auto elements() const& -> celements_type {return elements_aux();} - constexpr auto elements() & -> elements_type {return elements_aux();} - constexpr auto elements() && -> elements_type {return elements_aux();} - - friend constexpr auto elements(array_ref & self) -> elements_type {return self . elements();} - friend constexpr auto elements(array_ref && self) -> elements_type {return std::move(self). elements();} - friend constexpr auto elements(array_ref const& self) -> celements_type {return self . elements();} - - constexpr auto celements() const& {return celements_type{array_ref::data_elements(), array_ref::num_elements()};} - friend constexpr auto celements(array_ref const& self) {return self.celements();} - - template - /*[[gnu::pure]]*/ friend constexpr auto operator==(array_ref const& self, array_ref const& other) -> bool { - if(self.extensions() != other.extensions()) {return false;} // TODO(correaa) : or assert? - return adl_equal(other.data_elements(), other.data_elements() + self.num_elements(), self.data_elements()); - } - template - /*[[gnu::pure]]*/ friend constexpr auto operator!=(array_ref const& self, array_ref const& other) -> bool { - if(self.extensions() != other.extensions()) {return true;} // TODO(correaa) : or assert? - return not adl_equal(other.data_elements(), other.data_elements() + self.num_elements(), self.data_elements()); - } - - HD constexpr auto data_elements() && -> typename array_ref::element_ptr {return array_ref::base_;} - friend constexpr auto data_elements(array_ref&& self) -> typename array_ref::element_ptr {return std::move(self).data_elements();} - - // data() is here for compatibility with std::vector - template = 0> constexpr auto data() const& {return data_elements();} - template = 0> constexpr auto data() && {return data_elements();} - template = 0> constexpr auto data() & {return data_elements();} - - // TODO(correaa) : find a way to use [[deprecated("use data_elements()")]] for friend functions - friend constexpr auto data(array_ref const& self) -> typename array_ref::element_ptr {return self .data_elements();} - friend constexpr auto data(array_ref & self) -> typename array_ref::element_ptr {return self .data_elements();} - friend constexpr auto data(array_ref && self) -> typename array_ref::element_ptr {return std::move(self).data_elements();} - - using decay_type = typename array_ref::decay_type; - - constexpr auto decay() const& -> decay_type const& {return static_cast(*this);} - friend constexpr auto decay(array_ref const& self) -> decay_type const& {return self.decay();} - - private: - template - auto serialize_structured(Ar& arxiv, const unsigned int version) { - basic_array::serialize(arxiv, version); - } - template - auto serialize_flat(Archive& arxiv, const unsigned int /*version*/) { - using AT = multi::archive_traits; - arxiv & AT::make_nvp("elements", AT::make_array(this->data_elements(), static_cast(this->num_elements()))); - } -// template> -// auto serialize_binary_if(std::true_type, Ar& ar) { -// ar & AT::make_nvp("binary_data", AT::make_binary_object(this->data_elements(), static_cast(this->num_elements())*sizeof(typename array_ref::element))); -// } -// template -// auto serialize_binary_if(std::false_type, Ar& ar) {return serialize_flat(ar);} - - public: - template - auto serialize(Archive& arxiv, const unsigned int version) { - serialize_flat(arxiv, version); -// serialize_structured(ar, version); -// switch(version) { -// case static_cast( 0): return serialize_flat(arxiv); -// case static_cast(-1): return serialize_structured(arxiv, version); -// // case 2: return serialize_binary_if(std::is_trivially_copy_assignable{}, arxiv); -// default: -// if( this->num_elements() <= version ){serialize_structured(arxiv, version);} -// else {serialize_flat (arxiv );} -// } - } -}; - -template -using array_cref = array_ref< - std::decay_t, D, - typename std::pointer_traits::template rebind ->; - -template -using array_mref = array_ref< - std::decay_t, D, - std::move_iterator ->; - -template -struct array_ptr -: basic_array_ptr -, typename array_ref::layout_t> { - using basic_ptr = basic_array_ptr, typename array_ref::layout_t>; - - constexpr array_ptr(Ptr data, multi::extensions_t extensions) - : basic_ptr{data, multi::layout_t{extensions}} {} - - constexpr explicit array_ptr(std::nullptr_t nil) : array_ptr{nil, multi::extensions_t{}} {} - - template - constexpr explicit array_ptr(CArray* data) : array_ptr{data_elements(*data), extensions(*data)} {} - - constexpr auto operator*() const { - return array_ref{this->base(), (*this)->extensions()}; - } -}; - -template -class array_ptr : multi::array_ref{ - public: - constexpr explicit array_ptr(Ptr data, typename multi::array_ref::extensions_type extensions) : multi::array_ref(data, extensions) {} - constexpr explicit array_ptr(Ptr data) : array_ptr(data, typename multi::array_ref::extensions_type{}) {} - - constexpr explicit operator bool() const {return this->base();} - constexpr explicit operator Ptr () const {return this->base();} - - friend constexpr auto operator==(array_ptr const& self, array_ptr const& other) -> bool {return self.base() == other.base();} - friend constexpr auto operator!=(array_ptr const& self, array_ptr const& other) -> bool {return self.base() != other.base();} - - constexpr auto operator* () const -> multi::array_ref& {return const_cast(*this);} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) : find a way to avoid using const_cast - constexpr auto operator->() const -> multi::array_ref* {return const_cast( this);} // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) : find a way to avoid using const_cast -}; - -template -constexpr auto addressof(TT(&array)[N]) { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - return array_ptr< - std::decay_t>, static_cast(std::rank{}), std::remove_all_extents_t* // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - >{&array}; -} - -template -using array_cptr = array_ptr::template rebind>; - -template -constexpr auto make_array_ref(P data, multi::extensions_t extensions) { - return array_ref::value_type, D, P>(data, extensions); -} - -template auto make_array_ref(P data, extensions_t<0> exts) {return make_array_ref<0>(data, exts);} -template auto make_array_ref(P data, extensions_t<1> exts) {return make_array_ref<1>(data, exts);} -template auto make_array_ref(P data, extensions_t<2> exts) {return make_array_ref<2>(data, exts);} -template auto make_array_ref(P data, extensions_t<3> exts) {return make_array_ref<3>(data, exts);} -template auto make_array_ref(P data, extensions_t<4> exts) {return make_array_ref<4>(data, exts);} -template auto make_array_ref(P data, extensions_t<5> exts) {return make_array_ref<5>(data, exts);} - -// In ICC you need to specify the dimensionality in make_array_ref -// #if defined(__INTEL_COMPILER) -// template -// auto make_array_ref(P p, std::initializer_list il){return make_array_ref(p, detail::to_tuple(il));} -// template -// auto make_array_ref(P p, std::initializer_list il){return make_array_ref(p, detail::to_tuple(il));} -// #endif - -#if defined(__cpp_deduction_guides) - -template::value_type> // pointer_traits doesn't have ::value_type -array_ptr(It)->array_ptr; - -template::value_type> // pointer_traits doesn't have ::value_type -array_ptr(It, index_extensions<0>)->array_ptr; - -template::value_type> -array_ptr(It, index_extensions<1>)->array_ptr; -template::value_type> -array_ptr(It, index_extensions<2>)->array_ptr; -template::value_type> -array_ptr(It, index_extensions<3>)->array_ptr; - -template< - class T, std::size_t N, - typename V = typename std::remove_all_extents::type, std::size_t D = std::rank{} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility -> -array_ptr(T(*)[N])->array_ptr(D)>; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - -template array_ref(Ptr, index_extensions<0>) -> array_ref::value_type, 0, Ptr>; -template array_ref(Ptr, index_extensions<1>) -> array_ref::value_type, 1, Ptr>; -template array_ref(Ptr, index_extensions<2>) -> array_ref::value_type, 2, Ptr>; -template array_ref(Ptr, index_extensions<3>) -> array_ref::value_type, 3, Ptr>; -template array_ref(Ptr, index_extensions<4>) -> array_ref::value_type, 4, Ptr>; -template array_ref(Ptr, index_extensions<5>) -> array_ref::value_type, 5, Ptr>; - -template array_ref(It, Tuple)->array_ref::value_type, std::tuple_size::value, It>; -#endif - -// TODO(correaa) move to utility -template -constexpr auto rotated(const T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - return multi::array_ref, std::rank{}, decltype(base(array))>( // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - base(array), extensions(array) - ).rotated(); -} -template -constexpr auto rotated(T(&array)[N]) noexcept { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - return multi::array_ref, std::rank{}, decltype(base(array))>( // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) : backwards compatibility - base(array), extensions(array) - ).rotated(); -} - -template -constexpr auto operator/(RandomAccessIterator data, multi::extensions_t extensions) --> multi::array_ptr::value_type, D, RandomAccessIterator> -{return {data, extensions};} - -template -constexpr auto is_basic_array_aux(basic_array const&) -> std::true_type; -constexpr auto is_basic_array_aux(... ) -> std::false_type; - -template struct is_basic_array: decltype(is_basic_array_aux(std::declval())){}; - -template 1)>, class = decltype((void)adl_begin(*In{}), adl_end(*In{}))> -constexpr auto uninitialized_copy -// require N>1 (this is important because it forces calling placement new on the pointer -(In first, In last, multi::array_iterator dest) { - while(first != last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - adl_uninitialized_copy(adl_begin(*first), adl_end(*first), adl_begin(*dest)); - ++first; - ++dest; - } - return dest; -} - -// begin and end for forwarding reference are needed in this namespace -// to overwrite the behavior of std::begin and std::end -// which take rvalue-references as const-references. - -template auto begin(T&& rng) -> decltype(std::forward(rng).begin()) {return std::forward(rng).begin();} -template auto end (T&& rng) -> decltype(std::forward(rng).end() ) {return std::forward(rng).end() ;} - -template -auto transposed(T(&array)[N][M]) -> decltype(auto) {return ~multi::array_ref(array);} // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) - -} // end namespace boost::multi - -//namespace std { // NOLINT(cert-dcl58-cpp) - -// template -// auto move(boost::multi::basic_array&& array) -// ->boost::multi::basic_array, L> { -// return std::move(array).moved(); -// } - -//} // end namespace std - -namespace boost::serialization { - -#ifndef MULTI_SERIALIZATION_ARRAY_VERSION - #define MULTI_SERIALIZATION_ARRAY_VERSION 0 // NOLINT(cppcoreguidelines-macro-usage) gives user opportunity to select serialization version -#endif - -// #define MULTI_SERIALIZATION_ARRAY_VERSION 0 // save data as flat array -// #define MULTI_SERIALIZATION_ARRAY_VERSION -1 // save data as structured nested labels array -// this is disabled! #define MULTI_SERIALIZATION_ARRAY_VERSION 2 // save data as binary object if possible even in XML and text mode (not portable) -// #define MULTI_SERIALIZATION_ARRAY_VERSION 16 // any other value, structure for N <= 16, flat otherwise N > 16 - -//template -//struct version< boost::multi::array_ref > { -// using type = std::integral_constant; // typedef mpl::int_<1> type; -//// typedef mpl::integral_c_tag tag; -// enum { value = type::value }; -//}; - -} // end namespace boost::serialization - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/complex.hpp b/external_codes/boost_multi/multi/include/multi/complex.hpp deleted file mode 100644 index 1162612eee..0000000000 --- a/external_codes/boost_multi/multi/include/multi/complex.hpp +++ /dev/null @@ -1,210 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo Correa - -#ifndef MULTI_COMPLEX_HPP -#define MULTI_COMPLEX_HPP - -#include "array_ref.hpp" - -#include "detail/fix_complex_traits.hpp" - -#include -#include // for forward - -namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace multi { - -constexpr class adl_conj_t { - template constexpr auto _(priority<1>/**/, As&&... args) const JUSTRETURN( std:: conj(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( conj(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).conj(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<3>{}, std::forward(args)...)) -} adl_conj; - -constexpr class adl_real_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std::real(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( real(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).real(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<3>{}, std::forward(args)...)) -} adl_real; - -constexpr class adl_imag_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std::imag(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( imag(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).imag(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<3>{}, std::forward(args)...)) -} adl_imag; - -struct real_t; -struct imag_t; - -template -struct complex { - using value_type = ValueType; - - private: - value_type re; - value_type im; - - public: - complex() = default; - - constexpr explicit complex(value_type real) : re{real}, im{value_type{0}} {} - constexpr complex(value_type real, value_type imag) // NOLINT(bugprone-easily-swappable-parameters) - : re{real}, im{imag} {} - - constexpr explicit complex(std::complex const& other) : re{other.real()}, im{other.imag()} {} - - template< - class T, - std::enable_if_t< - sizeof(T)==2*sizeof(value_type) and - std::is_assignable().real())>{} and - std::is_assignable().imag())>{}, int - > =0 - > - constexpr explicit operator T const&() const& { - return reinterpret_cast(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - } - template< - class T, - std::enable_if_t< - sizeof(T)==2*sizeof(value_type) and - std::is_assignable().real())>{} and - std::is_assignable().imag())>{}, int - > = 0 - > - constexpr explicit operator T&()& {return reinterpret_cast(*this);} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - - constexpr auto std() const& -> std::complex const& { - return reinterpret_cast const&>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - } - constexpr auto std() & -> std::complex & { - return reinterpret_cast &>(*this); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - } - - friend constexpr auto abs(complex const& self) {return abs(self.std());} - friend constexpr auto operator-(complex const& self, complex const& other) - -> complex{return self.std() - other.std();} - - constexpr auto real() & -> value_type & {return re;} - constexpr auto real() const& -> value_type const& {return re;} - - constexpr auto imag() & -> value_type & {return im;} - constexpr auto imag() const& -> value_type const& {return im;} - - template constexpr auto operator+=(Real const& other)&->decltype(re += other, *this) {return re += other, *this;} - template constexpr auto operator-=(Real const& other)&->decltype(re -= other, *this) {return re -= other, *this;} - template constexpr auto operator*=(Real const& other)&->decltype(re *= other, im *= other, *this) {return re *= other, im *= other, *this;} - template constexpr auto operator/=(Real const& other)&->decltype(re /= other, im /= other, *this) {return re /= other, im /= other, *this;} - - template constexpr auto operator+=(Complex const& other)&->decltype(re += other.re, im += other.im, *this) {return re += other.re, im += other.im, *this;} - template constexpr auto operator-=(Complex const& other)&->decltype(re -= other.re, im -= other.im, *this) {return re -= other.re, im -= other.im, *this;} -}; - -struct real_t { - template::element, typename ValueType = typename E::value_type> - constexpr auto operator()(Array&& array) const - ->decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex::real)) { - return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex::real); } - template::value_type, - std::enable_if_t< - sizeof(T)==2*sizeof(ValueType) and - std::is_assignable()))>{} and - std::is_assignable()))>{}, int - > =0 - > - constexpr auto operator()(T& value) const -> ValueType& {return reinterpret_cast&>(value).real;} // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[0] - template::value_type, - std::enable_if_t< - sizeof(T)==2*sizeof(ValueType) and - std::is_assignable()))>{} and - std::is_assignable()))>{}, int - > =0 - > - auto operator()(T const& value) const -> ValueType const& { - return reinterpret_cast const&>(value).real; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[0] - } -}; - -struct imag_t { - template::element, typename ValueType = typename E::value_type> - constexpr auto operator()(Array&& array) const - ->decltype(std::forward(array).template reinterpret_array_cast>().template member_cast(&complex::imag)) { - return std::forward(array).template reinterpret_array_cast>().template member_cast(&complex::imag); } - template::value_type, - std::enable_if_t< - sizeof(T) == 2*sizeof(ValueType) and - std::is_assignable()))>{} and - std::is_assignable()))>{}, int - > =0 - > - constexpr auto operator()(T& value) const -> ValueType& { - return reinterpret_cast&>(value).imag; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[1] - } - template::value_type, - std::enable_if_t< - sizeof(T)==2*sizeof(ValueType) and - std::is_assignable()))>{} and - std::is_assignable()))>{}, int - > =0 - > - constexpr auto operator()(T const& value) const -> ValueType const&{ - return reinterpret_cast const&>(value).imag; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) : t[1] - } -}; - -[[maybe_unused]] static constexpr real_t real; -[[maybe_unused]] static constexpr imag_t imag; - -} // end namespace multi -} // end namespace boost - -static_assert( boost::multi::is_trivially_default_constructible>::value ); -static_assert( boost::multi::is_trivially_default_constructible>::value ); - -static_assert( boost::multi::is_trivial>::value ); -static_assert( boost::multi::is_trivial>::value ); - - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -#include -#include "array.hpp" - -namespace multi = boost::multi; - -template void what(T&&)=delete; - -int main() { - - using complex = multi::complex; - - multi::array A = { - { {1. , 2.}, {3., 4.} }, - { {22., 33.}, {5., 9.} } - }; - - { - auto&& Areal = A.member_cast(&multi::complex::re); - auto&& Aimag = A.member_cast(&multi::complex::im); - - assert( Areal[1][0] == 22. ); - assert( Aimag[1][0] == 33. ); - } { - auto&& Areal = A.member_cast(&multi::complex::re); - auto&& Aimag = A.member_cast(&multi::complex::im); - - assert( Areal[1][0] == 22. ); - assert( Aimag[1][0] == 33. ); - } -} - -#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/ASSERT.hpp b/external_codes/boost_multi/multi/include/multi/config/ASSERT.hpp deleted file mode 100644 index 3d7f59d7dd..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/ASSERT.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_CONFIG_ASSERT_HPP -#define MULTI_CONFIG_ASSERT_HPP - -#include - -#if defined(MULTI_ACCESS_NDEBUG) or defined(__CUDACC__) - #define MULTI_ACCESS_ASSERT(Expr) -#else - // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) this is for very inefficient asserts - #define MULTI_ACCESS_ASSERT(Expr) assert(Expr) -#endif - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/DELETE.hpp b/external_codes/boost_multi/multi/include/multi/config/DELETE.hpp deleted file mode 100644 index 41be2d7bb9..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/DELETE.hpp +++ /dev/null @@ -1,17 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -// Copyright 2018-2021 Alfredo A. Correa - -#ifndef MULTI_CONFIG_DELETE_HPP -#define MULTI_CONFIG_DELETE_HPP - -namespace boost::multi { - -template struct disable_if_impl{}; -template struct disable_if_impl{using type = T;}; - -template using disable_if = typename disable_if_impl::type; - -} // end namespace boost::multi - -#define DELETE(ConD) boost::multi::disable_if =0 // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) remove -#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/DEPRECATED.hpp b/external_codes/boost_multi/multi/include/multi/config/DEPRECATED.hpp deleted file mode 100644 index e7a7b8d46c..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/DEPRECATED.hpp +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef MULTI_CONFIG_DEPRECATED_HPP // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -#define MULTI_CONFIG_DEPRECATED_HPP -// © Alfredo A. Correa 2019-2020 - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(name) 0 -#endif - -#ifdef __NVCC__ - #define DEPRECATED(MsG) __attribute__((deprecated)) -#else - #if __has_cpp_attribute(deprecated) - #define DEPRECATED(MsG) [[deprecated(MsG)]] - #else - #define DEPRECATED(MsG) - #endif -#endif - -#if not defined(__INTEL_COMPILER) -#define BEGIN_NO_DEPRECATED \ -\ -_Pragma("GCC diagnostic push") \ -_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") \ -\ - -#else -#define BEGIN_NO_DEPRECATED \ -_Pragma("warning push") \ -_Pragma("warning disable 1786") \ - -#endif - -#if not defined(__INTEL_COMPILER) -#define END_NO_DEPRECATED \ -\ -_Pragma("GCC diagnostic pop") \ -\ - -#else -#define END_NO_DEPRECATED \ -\ -_Pragma("warning pop") \ -\ - -#endif - -#define BEGIN_CUDA_SLOW BEGIN_NO_DEPRECATED -#define END_CUDA_SLOW END_NO_DEPRECATED - -#define NO_DEPRECATED(ExpR) \ - BEGIN_NO_DEPRECATED \ - ExpR \ - END_NO_DEPRECATED - -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/config/MARK.hpp b/external_codes/boost_multi/multi/include/multi/config/MARK.hpp deleted file mode 100644 index b093e4dd5e..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/MARK.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -//#ifndef MULTI_CONFIG_MARK_HPP -//#define MULTI_CONFIG_MARK_HPP - -#ifndef MULTI_MARK_SCOPE // NOLINT(llvm-header-guard) this is a configuration header, can be included many times - #ifdef CALI_CXX_MARK_SCOPE - #define MULTI_MARK_SCOPE(MsG) CALI_CXX_MARK_SCOPE(MsG) - #else - #define MULTI_MARK_SCOPE(MsG) ((void)0) // NOLINT(cppcoreguidelines-macro-usage) to mark scopes - #endif -#endif - -//#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/MAYBE_UNUSED.hpp b/external_codes/boost_multi/multi/include/multi/config/MAYBE_UNUSED.hpp deleted file mode 100644 index 13718cf38b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/MAYBE_UNUSED.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#ifndef MULTI_CONFIG_MAYBE_UNUSED_HPP -#define MULTI_CONFIG_MAYBE_UNUSED_HPP - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(name) 0 -#endif - -#if (__has_cpp_attribute(maybe_unused)) and (__cplusplus>=201703L) - #define MULTI_MAYBE_UNUSED [[maybe_unused]] // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is really necessary in C++17 -#elif __has_cpp_attribute(gnu::unused) - #define MULTI_MAYBE_UNUSED [[gnu::unused]] // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is really necessary in C++17 -#elif __has_cpp_attribute(__attribute__((unused))) - #define MULTI_MAYBE_UNUSED __attribute__((unused)) // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is really necessary in C++17 -#else - #define MULTI_MAYBE_UNUSED // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is really necessary in C++17 -#endif - -#ifndef MAYBE_UNUSED - #define MAYBE_UNUSED MULTI_MAYBE_UNUSED // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is really necessary in C++17 -#endif - -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/config/NODISCARD.hpp b/external_codes/boost_multi/multi/include/multi/config/NODISCARD.hpp deleted file mode 100644 index 5f07390e67..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/NODISCARD.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_CONFIG_NODISCARD_HPP -#define MULTI_CONFIG_NODISCARD_HPP - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(name) 0 -#endif - -#ifndef NODISCARD -#if defined(__NVCC__) - #define NODISCARD(MsG) -#elif (__has_cpp_attribute(nodiscard) and (__cplusplus>=201703L)) - #if (__has_cpp_attribute(nodiscard)>=201907) and (__cplusplus>201703L) - #define NODISCARD(MsG) [[nodiscard(MsG)]] - #else - #define NODISCARD(MsG) [[nodiscard]] // NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) check if this is needed in C++17 - #endif -#elif __has_cpp_attribute(gnu::warn_unused_result) - #define NODISCARD(MsG) [[gnu::warn_unused_result]] -#else - #define NODISCARD(MsG) -#endif -#endif - -#ifndef NODISCARD_CLASS - #if(__has_cpp_attribute(nodiscard) and not defined(__NVCC__) and (not defined(__clang__) or (defined(__clang__) and (__cplusplus >= 202002L)))) - #if (__has_cpp_attribute(nodiscard)>=201907) - #define NODISCARD_CLASS(MsG) [[nodiscard_(MsG)]] - #else - #define NODISCARD_CLASS(MsG) [[nodiscard]] - #endif - #else - #define NODISCARD_CLASS(MsG) - #endif -#endif - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/NO_UNIQUE_ADDRESS.hpp b/external_codes/boost_multi/multi/include/multi/config/NO_UNIQUE_ADDRESS.hpp deleted file mode 100644 index bd2caa35c7..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/NO_UNIQUE_ADDRESS.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#ifndef MULTI_CONFIG_NO_UNIQUE_ADDRESS_HPP -#define MULTI_CONFIG_NO_UNIQUE_ADDRESS_HPP - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(name) 0 -#endif - -#if __has_cpp_attribute(no_unique_address) >=201803 and not defined(__NVCC__) and not defined(__PGI) - // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) this macro will be needed until C++20 - #define MULTI_NO_UNIQUE_ADDRESS [[no_unique_address]] -#else - // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) this macro will be needed until C++20 - #define MULTI_NO_UNIQUE_ADDRESS -#endif - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/config/UNREACHABLE.hpp b/external_codes/boost_multi/multi/include/multi/config/UNREACHABLE.hpp deleted file mode 100644 index 71d3ebd4b7..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/UNREACHABLE.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -echo $X -$CXXX $CXXFLAGS $0 -o $0.$X &&$0.$X&&rm $0.$X;exit -#endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_CONFIG_NODISCARD_HPP -#define MULTI_CONFIG_NODISCARD_HPP - -#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) || defined(__NVCC__) -#define MULTI_UNREACHABLE __builtin_unreachable() -#else -#define MULTI_UNREACHABLE do { std::abort(); } while(0) -#endif - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -enum color{red, green, blue}; - -int f(enum color c){ - switch(c){ - case red : return 1; - case green: return 2; - case blue : return 3; // comment case make gcc, clang, culang causes -Wswitch warning - } MULTI_UNREACHABLE; // commnet unreachable in gcc and nvcc causes -Wreturn-type warning -} - -int main(){ -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/config/no_unique_address_.hpp b/external_codes/boost_multi/multi/include/multi/config/no_unique_address_.hpp deleted file mode 100644 index ab2d51bd39..0000000000 --- a/external_codes/boost_multi/multi/include/multi/config/no_unique_address_.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo '#include"'$0'"'>$0.cpp)&&$CXX -D_TEST_MULTI_CONFIG_NO_UNIQUE_ADDRESS $0.cpp -o$0x&&$0x&&rm $0x $0.cpp;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef MULTI_CONFIG_NO_UNIQUE_ADDRESS_HPP -#define MULTI_CONFIG_NO_UNIQUE_ADDRESS_HPP - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(name) 0 -#endif - -#if __has_cpp_attribute(no_unique_address) >=201803 - #define NO_UNIQUE_ADDRESS [[no_unique_address]] - #define no_unique_address_ no_unique_address -#else - #define NO_UNIQUE_ADDRESS -#endif - -//////////////////////////////////////////////////////////////////////////////// -#ifdef _TEST_MULTI_CONFIG_NO_UNIQUE_ADDRESS - -class A{}; - -class B{ - NO_UNIQUE_ADDRESS A x; - double y; -}; - -int main(){ - static_assert( sizeof(B) == sizeof(double) , "may fail with no unique feauture"); // for example fails with clang++-8 -} -#endif -#endif - - diff --git a/external_codes/boost_multi/multi/include/multi/detail/adl.hpp b/external_codes/boost_multi/multi/include/multi/detail/adl.hpp deleted file mode 100644 index 9185d25aee..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/adl.hpp +++ /dev/null @@ -1,595 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa - -#ifndef MULTI_DETAIL_ADL_HPP -#define MULTI_DETAIL_ADL_HPP - -#include // std::size_t -#include // std::conditional_t -#include - -#include "multi/detail/memory.hpp" - -#if defined(__NVCC__) -#include -#include -#include -#include -#endif - -#include // for std::copy, std::copy_n, std::equal, etc -#include // for begin, end -#include // for uninitialized_copy, etc - -#define BOOST_MULTI_DEFINE_ADL(FuN) /*NOLINT(cppcoreguidelines-macro-usage) TODO(correaa) consider replacing for all ADL'd operations*/ \ -namespace boost { \ -namespace multi { \ -namespace adl { \ - namespace custom {template struct FuN##_t;} __attribute__((unused)) \ - static constexpr class FuN##_t { \ - template [[deprecated]] auto _(priority<0>, As&&... args) const = delete; \ - template auto _(priority<1>, As&&... args) const DECLRETURN(std::FuN(std::forward(args)...)) \ - template auto _(priority<2>, As&&... args) const DECLRETURN( FuN(std::forward(args)...)) \ - template auto _(priority<3>, T&& t, As&&... args) const DECLRETURN(std::forward(t).FuN(std::forward(args)...)) \ - template auto _(priority<4>, As&&... args) const DECLRETURN(custom::FuN##_t::_(std::forward(args)...)) \ - public: \ - template auto operator()(As&&... args) const-> decltype(_(priority<4>{}, std::forward(args)...)) {return _(priority<4>{}, std::forward(args)...);} \ - } (FuN); \ -} /* end namespace adl */ \ -} /* end namespace multi */ \ -} /* end namespace boost */ - -namespace boost::multi { - -template struct priority : std::conditional_t> {}; - -#define DECLRETURN(ExpR) ->decltype(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing -#define JUSTRETURN(ExpR) {return ExpR;} // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing - -constexpr class adl_copy_n_t { - template constexpr auto _(priority<0>/**/, As&&... args) const DECLRETURN(std:: copy_n( std::forward(args)...)) -#if defined(__NVCC__) - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN(::thrust:: copy_n( std::forward(args)...)) -#endif - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( copy_n( std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: copy_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).copy_n( std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_copy_n; - -constexpr class adl_move_t { - template constexpr auto _(priority<0>/**/, As&&... args) const DECLRETURN( std:: move( std::forward(args)...)) -#if defined(__NVCC__) // there is no thrust::move algorithm - template constexpr auto _(priority<1>/**/, It first, It last, As&&... args) const DECLRETURN( thrust::copy(std::make_move_iterator(first), std::make_move_iterator(last), std::forward(args)...)) -#endif - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( move( std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: move(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).move( std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_move; - -constexpr class adl_fill_n_t { - template< class... As> constexpr auto _(priority<0>/**/, As&&... args) const DECLRETURN( std:: fill_n (std::forward(args)...)) -#if defined(__NVCC__) - template< class... As> constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( thrust:: fill_n (std::forward(args)...)) -#endif - template< class... As> constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( fill_n (std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: fill_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).fill_n (std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_fill_n; - -constexpr class adl_equal_t { - template< class...As> /*[[gnu::pure]]*/ constexpr auto _(priority<1>/**/, As&&...args) const DECLRETURN( std:: equal( std::forward(args)...)) -#if defined(__NVCC__) - template< class...As> constexpr auto _(priority<2>/**/, As&&...args) const DECLRETURN( ::thrust:: equal( std::forward(args)...)) -#endif - template< class...As> /*[[gnu::pure]]*/ constexpr auto _(priority<3>/**/, As&&...args) const DECLRETURN( equal( std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&...args) const DECLRETURN( std::decay_t:: equal(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&...args) const DECLRETURN( std::forward(arg).equal( std::forward(args)...)) - - public: - template /*[[gnu::pure]]*/ constexpr auto operator()(As&&...args) const DECLRETURN(_(priority<5>{}, std::forward(args)...)) -} adl_equal; - -template struct adl_custom_copy; - -constexpr class adl_copy_t { - class Copy; - template::reference, typename std::iterator_traits::reference>> - > - constexpr auto _(priority<1>/**/, InputIt first, InputIt last, OutputIt d_first) const DECLRETURN(std::copy(first, last, d_first)) -#if defined(__NVCC__) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( thrust::copy(std::forward(args)...)) -#endif - template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const DECLRETURN( copy(std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t::copy(std::forward(arg), std::forward(args)...)) -// template constexpr auto _(priority<5>/**/, As&&... args) const DECLRETURN(boost::multi::adl_custom_copy...>::copy(std::forward(as)...)) - template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).copy(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN( _(priority<6>{}, std::forward(args)...) ) \ -} adl_copy; - -namespace adl { - namespace custom {template struct fill_t;} - static constexpr class fill_t { - template auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: fill (std::forward(args)...)) - template auto _(priority<2>/**/, As&&... args) const DECLRETURN( fill (std::forward(args)...)) - template auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).fill (std::forward(args)...)) - template auto _(priority<4>/**/, As&&... args) const DECLRETURN(custom:: fill_t::_(std::forward(args)...)) - - public: - template auto operator()(As&&... args) const DECLRETURN(_(priority<5>{}, std::forward(args)...)) - } fill [[maybe_unused]]; -} // end namespace adl - -template -struct alloc_construct_elem_t { - Alloc* palloc_; - template auto operator()(T&& ptr) const - ->decltype(std::allocator_traits::construct(*palloc_, std::addressof(ptr))) { - return std::allocator_traits::construct(*palloc_, std::addressof(ptr)); } -}; - -namespace xtd { - -template // this one goes last!!! -constexpr auto to_address(const T& ptr) noexcept; - -template -constexpr auto me_to_address(priority<0>/**/, const T& ptr) noexcept -->decltype(to_address(ptr.operator->())) { - return to_address(ptr.operator->()); } - -template -constexpr auto me_to_address(priority<1>/**/, const T& ptr) noexcept -->decltype(std::pointer_traits::to_address(ptr)) { - return std::pointer_traits::to_address(ptr); } - -template{}, int> = 0> -constexpr auto me_to_address(priority<2>/**/, T const& ptr) noexcept -> T { - static_assert(not std::is_function_v, "!"); - return ptr; -} - -template // this one goes last!!! -constexpr auto to_address(T const& ptr) noexcept -->decltype(me_to_address(priority<2>{}/**/, ptr)) { - return me_to_address(priority<2>{} , ptr); } - -template::value_type, typename = decltype(std::addressof(*ForwardIt{})), typename = decltype(Value())> -auto alloc_uninitialized_value_construct_n(Alloc& alloc, ForwardIt first, Size count) -> ForwardIt { -// ->std::decay_t::construct(alloc, std::addressof(*first), Value()), first)> - ForwardIt current = first; - try { - for (; count > 0 ; ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::construct(alloc, std::addressof(*current), Value()); // !!!!!!!!!!!!!! if you are using std::complex type consider making complex default constructible (e.g. by type traits) - } - // ::new (static_cast(std::addressof(*current))) Value(); - return current; - } catch(...) { - for(; current != first; ++first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::destroy(alloc, std::addressof(*first)); - } - throw; - } -} - -template::value_type> -auto alloc_uninitialized_default_construct_n(Alloc& alloc, ForwardIt first, Size count) -->std::decay_t::construct(alloc, std::addressof(*first)), first)> { - ForwardIt current = first; - if constexpr(std::is_trivially_default_constructible_v) { - std::advance(current, count); - } else { - using _ = std::allocator_traits; - try { - for(; count > 0; ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - _::construct(alloc, std::addressof(*current)); - } - } catch(...) { - for(; current != first; ++first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - _::destroy(alloc, std::addressof(*first)); - } - throw; - } - } - return current; -} - -template -auto uninitialized_default_construct_n(ForwardIt first, Size count) -> ForwardIt { - using T = typename std::iterator_traits::value_type; - ForwardIt current = first; - try { - for (; count > 0 ; (void) ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - ::new (static_cast(std::addressof(*current))) T; - } - return current; - } catch (...) {assert(0); -// std::destroy(first, current); - throw; - } -} - -} // end namespace xtd - -template struct alloc_destroy_elem_t { - Alloc* palloc_; - template constexpr auto operator()(T&& ptr) const { // ->decltype(std::allocator_traits::construct(*palloc_, std::forward(t)...)){ - return std::allocator_traits::destroy(*palloc_, std::addressof(ptr)); - } -}; - -template::value_type> -constexpr auto destroy_n(BidirIt first, Size count) -->std::decay_t { - first += count; - for(; count != 0; --first, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::addressof(*(first-1))->~T(); - } - return first; -} - -template::value_type> -constexpr auto alloc_destroy_n(Alloc& alloc, BidirIt first, Size count) -->std::decay_t { - first += count; - for (; count != 0; --first, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::destroy(alloc, std::addressof(*(first - 1))); - } - return first; -} - -constexpr class adl_uninitialized_copy_t { - template // sfinae friendy std::uninitialized_copy - [[nodiscard]] constexpr auto _(priority<1>/**/, InIt first, InIt last, FwdIt d_first) const DECLRETURN( std::uninitialized_copy(first, last, d_first)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( uninitialized_copy(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: uninitialized_copy(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).uninitialized_copy(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<5>{}, std::forward(args)...)) -} adl_uninitialized_copy; - -namespace xtd { - -template::value_type> -auto uninitialized_copy_n(InputIt first, Size count, ForwardIt d_first) -->std::decay_t(std::addressof(*d_first))) Value(*first), d_first)> { - ForwardIt current = d_first; - try { - for (; count > 0; ++first, (void) ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - ::new (static_cast(std::addressof(*current))) Value(*first); - } - } catch(...) { - for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - d_first->~Value(); - } - throw; - } - return current; -} - -template::value_type> -auto uninitialized_move_n(InputIt first, Size count, ForwardIt d_first) -->std::decay_t(std::addressof(*d_first))) Value(std::move(*first)), d_first)> { - ForwardIt current = d_first; - try { - for (; count > 0; ++first, (void) ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - ::new (static_cast(std::addressof(*current))) Value(std::move(*first)); - } - } catch(...) { - for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - d_first->~Value(); - } - throw; - } - return current; -} - -} // end namespace xtd - -constexpr class adl_uninitialized_copy_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std::uninitialized_copy_n(std::forward(args)...)) -#if defined(__NVCC__) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( thrust::copy_n( std::forward(args)...)) -#endif - template constexpr auto _(priority<3>/**/, As&&... args) const DECLRETURN( uninitialized_copy_n(std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: uninitialized_copy_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).uninitialized_copy_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return _(priority<5>{}, std::forward(args)...);} // TODO(correaa) this might trigger a compiler crash with g++ 7.5 because of operator&() && overloads -} adl_uninitialized_copy_n; - -constexpr class adl_uninitialized_move_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( xtd:: uninitialized_move_n(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( uninitialized_move_n(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: uninitialized_move_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).uninitialized_move_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return _(priority<4>{}, std::forward(args)...);} -} adl_uninitialized_move_n; - -namespace xtd { - -template -constexpr auto alloc_uninitialized_copy_n(std::allocator& /*alloc*/, InputIt first, Size count, ForwardIt d_first) { - return adl_uninitialized_copy_n(first, count, d_first);} - -template -constexpr auto alloc_uninitialized_move_n(std::allocator& /*alloc*/, InputIt first, Size count, ForwardIt d_first) { - return adl_uninitialized_move_n(first, count, d_first);} - -template -auto alloc_uninitialized_copy_n(Alloc& alloc, InputIt first, Size count, ForwardIt d_first) { - ForwardIt current = d_first; - try { - for(; count > 0; ++first, ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::construct(alloc, std::addressof(*current), *first); - } - return current; - } catch(...) { - for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::destroy(alloc, std::addressof(*d_first)); - } - throw; - } -} - -template -auto alloc_uninitialized_move_n(Alloc& alloc, InputIt first, Size count, ForwardIt d_first) { - ForwardIt current = d_first; - try { - for(; count > 0; ++first, ++current, --count) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::construct(alloc, std::addressof(*current), std::move(*first)); - } - return current; - } catch(...) { - for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::destroy(alloc, std::addressof(*d_first)); - } - throw; - } -} - -template -constexpr auto alloc_uninitialized_copy(std::allocator&/*allocator*/, InputIt first, InputIt last, ForwardIt d_first) { - return adl_uninitialized_copy(first, last, d_first); -} - -template())), class=std::enable_if_t::value_type, typename std::iterator_traits::reference>>> -auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt d_first) { -// ->std::decay_t // problematic in clang-11 + gcc-9 - ForwardIt current = d_first; - try { - for(; first != last; ++first, (void)++current) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits>::construct(alloc, std::addressof(*current), *first); - } - return current; - } catch(...) { - for(; d_first != current; ++d_first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits>::destroy(alloc, std::addressof(*d_first)); - } - throw; - } -} - -template -auto alloc_uninitialized_fill_n(Alloc& alloc, ForwardIt first, Size n, T const& value) -->std::decay_t::construct(alloc, std::addressof(*first), value), first)> { - ForwardIt current = first; // using std::to_address; - try { - for(; n > 0; ++current, --n) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::construct(alloc, std::addressof(*current), value); - } - return current; - } catch(...) { - for(; first != current; ++first) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - std::allocator_traits::destroy(alloc, std::addressof(*first)); - } - throw; - } -} -} // end namespace xtd - -constexpr class adl_distance_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: distance(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( distance(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: distance(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).distance(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_distance; - -constexpr class adl_begin_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std::begin(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( begin(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t::begin(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).begin(std::forward(args)...)) - - public: - template [[nodiscard]] constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_begin; - -constexpr class adl_end_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: end(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( end(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: end(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).end(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_end; - -constexpr class adl_swap_ranges_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: swap_ranges(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( swap_ranges(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: swap_ranges(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).swap_ranges(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_swap_ranges; - -constexpr class adl_lexicographical_compare_t { - template /*[[gnu::pure]]*/ constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: lexicographical_compare(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( lexicographical_compare(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: lexicographical_compare(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).lexicographical_compare(std::forward(args)...)) - - public: - template /*[[gnu::pure]]*/ constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_lexicographical_compare; - -constexpr class adl_uninitialized_value_construct_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: uninitialized_value_construct_n(std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( uninitialized_value_construct_n(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t::uninitialized_value_construct_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).uninitialized_value_construct_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return (_(priority<4>{}, std::forward(args)...));} -} adl_uninitialized_value_construct_n; - -constexpr class adl_alloc_uninitialized_value_construct_n_t { - template constexpr auto _(priority<1>/**/, Alloc&& /*alloc*/, As&&... args) const DECLRETURN( adl_uninitialized_value_construct_n(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( xtd:: alloc_uninitialized_value_construct_n(std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? - template constexpr auto _(priority<3>/**/, As&&... args) const DECLRETURN( alloc_uninitialized_value_construct_n(std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: alloc_uninitialized_value_construct_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_uninitialized_value_construct_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return (_(priority<5>{}, std::forward(args)...));} -} adl_alloc_uninitialized_value_construct_n; - -constexpr class adl_uninitialized_default_construct_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const {return xtd:: uninitialized_default_construct_n( std::forward(args)...);} - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( uninitialized_default_construct_n( std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: uninitialized_default_construct_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).uninitialized_default_construct_n( std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return (_(priority<4>{}, std::forward(args)...));} -} adl_uninitialized_default_construct_n; - -constexpr class adl_alloc_uninitialized_default_construct_n_t { - template constexpr auto _(priority<1>/**/, Alloc&&/*unused*/, As&&... args) const JUSTRETURN( adl_uninitialized_default_construct_n(std::forward(args)...)) -// #if defined(__NVCC__) -// template constexpr auto _(priority<3>/**/, As&&... as) const DECLRETURN( thrust:: uninitialized_construct_n_with_allocator( std::forward(as)...)) -// #endif - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( xtd:: alloc_uninitialized_default_construct_n( std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? - template constexpr auto _(priority<4>/**/, As&&... args) const DECLRETURN( alloc_uninitialized_default_construct_n( std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: alloc_uninitialized_default_construct_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_uninitialized_default_construct_n( std::forward(args)...)) - public: - template constexpr auto operator()(As&&... args) const {return (_(priority<6>{}, std::forward(args)...));} -} adl_alloc_uninitialized_default_construct_n; - -constexpr class destroy_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( multi:: destroy_n (std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( destroy_n (std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t:: destroy_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).destroy_n (std::forward(args)...)) -public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<4>{}, std::forward(args)...)) -} adl_destroy_n; - -constexpr class alloc_destroy_n_t { - template constexpr auto _(priority<1>/**/, Alloc&&/*unused*/, As&&... args) const DECLRETURN( adl_destroy_n (std::forward(args)...)) - template< class... As> constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN(multi:: alloc_destroy_n (std::forward(args)...)) // TODO(correaa) use boost alloc_X functions? - template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const DECLRETURN( alloc_destroy_n (std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN(std::decay_t:: alloc_destroy_n(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_destroy_n (std::forward(args)...)) - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<5>{}, std::forward(args)...)) -} adl_alloc_destroy_n; - -constexpr class adl_alloc_uninitialized_copy_t { - template constexpr auto _(priority<1>/**/, Alloc&&/*alloc*/, As&&... args) const DECLRETURN( adl_uninitialized_copy(std::forward(args)...)) -// TODO(correaa) : remove T from below? - template constexpr auto _(priority<2>/**/, T&& arg, As&&... args) const DECLRETURN( xtd::alloc_uninitialized_copy(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( alloc_uninitialized_copy(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<4>/**/, T&& arg, As&&... args) const DECLRETURN( std::decay_t::alloc_uninitialized_copy(std::forward(arg), std::forward(args)...)) - template constexpr auto _(priority<5>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_uninitialized_copy(std::forward(args)...)) - public: - template constexpr auto operator()(As&&... args) const DECLRETURN(_(priority<5>{}, std::forward(args)...)) -} adl_alloc_uninitialized_copy; - -constexpr class alloc_uninitialized_copy_n_t { - template constexpr auto _(priority<1>/**/, Alloc&&/*alloc*/, As&&... args) const DECLRETURN( uninitialized_copy_n(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( xtd::alloc_uninitialized_copy_n(std::forward(args)...)) -#if defined(__NVCC__) - template constexpr auto _(priority<3>/**/, Alloc&&/*alloc*/, As&&... as) const DECLRETURN( thrust::uninitialized_copy_n(std::forward(as)...)) -#endif -// TODO(correaa) revise - template constexpr auto _(priority<4>/**/, Alloc&&/*alloc*/, As&&... args) const DECLRETURN( uninitialized_copy_n(std::forward(args)...)) - template constexpr auto _(priority<5>/**/, As&&... args) const DECLRETURN( alloc_uninitialized_copy_n(std::forward(args)...)) - template constexpr auto _(priority<6>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_uninitialized_copy_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return _(priority<6>{}, std::forward(args)...);} -} adl_alloc_uninitialized_copy_n; - -constexpr class alloc_uninitialized_move_n_t { -// TODO(correaa) : fallback to no alloc version - template constexpr auto _(priority<1>/**/, As&&... args) const {return( xtd:: alloc_uninitialized_move_n(std::forward(args)...));} - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( alloc_uninitialized_move_n(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN(std::forward(arg).alloc_uninitialized_move_n(std::forward(args)...)) - - public: - template constexpr auto operator()(As&&... args) const {return _(priority<3>{}, std::forward(args)...);} \ -} adl_alloc_uninitialized_move_n; - -constexpr class uninitialized_fill_n_t { - template constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( std:: uninitialized_fill_n(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, As&&... args) const DECLRETURN( uninitialized_fill_n(std::forward(args)...)) - template constexpr auto _(priority<3>/**/, T&& arg, As&&... args) const DECLRETURN( std::forward(arg).uninitialized_fill_n(std::forward(args)...)) - - public: - template constexpr auto operator()(T1&& arg, As&&... args) const DECLRETURN(_(priority<3>{}, arg, std::forward(args)...)) -} adl_uninitialized_fill_n; - -constexpr class alloc_uninitialized_fill_n_t { - template< class... As> constexpr auto _(priority<1>/**/, As&&... args) const DECLRETURN( xtd::alloc_uninitialized_fill_n(std::forward(args)...)) - template constexpr auto _(priority<2>/**/, Alloc&&/*alloc*/, As&&... args) const DECLRETURN( adl_uninitialized_fill_n(std::forward(args)...)) - template< class... As> constexpr auto _(priority<3>/**/, As&&... args) const DECLRETURN( alloc_uninitialized_fill_n(std::forward(args)...)) - template constexpr auto _(priority<4>/**/, Alloc&& alloc , As&&... args) const DECLRETURN( std::forward(alloc).alloc_uninitialized_fill_n(std::forward(args)...)) - public: - template constexpr auto operator()(T1&& arg, As&&... args) const DECLRETURN(_(priority<4>{}, arg, std::forward(args)...)) -} adl_alloc_uninitialized_fill_n; - -template -struct recursive { - template - static constexpr auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt dest){ - using std::begin; using std::end; - while(first!=last) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - recursive::alloc_uninitialized_copy(alloc, begin(*first), end(*first), begin(*dest)); - ++first; - ++dest; - } - return dest; - } -}; - -template<> struct recursive<1> { - template - static auto alloc_uninitialized_copy(Alloc& alloc, InputIt first, InputIt last, ForwardIt dest){ - return adl_alloc_uninitialized_copy(alloc, first, last, dest); - } -}; - -} // end namespace boost::multi -#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/config.hpp b/external_codes/boost_multi/multi/include/multi/detail/config.hpp deleted file mode 100644 index 7a1acca464..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/config.hpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright 2019-2021 Alfredo A. Correa - -#ifndef MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP -#define MULTI_ADAPTORS_BLAS_TEST_CONFIG_HPP - -#define CUDA_FOUND 1 - -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/detail/cuda/ptr.hpp b/external_codes/boost_multi/multi/include/multi/detail/cuda/ptr.hpp deleted file mode 100644 index 17df9faabb..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/cuda/ptr.hpp +++ /dev/null @@ -1,318 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo "#include\""$0"\"" > $0.cpp) && nvcc --compiler-options -std=c++14,-Wall,-Wextra,-Wpedantic`#,-Wfatal-errors` -D_TEST_BOOST_MULTI_DETAIL_MEMORY_CUDA_PTR $0.cpp -o $0x && $0x && rm $0x $0.cpp; exit -#endif - -#ifndef BOOST_MULTI_DETAIL_MEMORY_CUDA_PTR_HPP -#define BOOST_MULTI_DETAIL_MEMORY_CUDA_PTR_HPP - -#include // cudaError_t - -#include -#include // nullptr_t -#include // random_access_iterator_tag - -#include // is_const - -namespace boost { -namespace multi {namespace detail { -namespace memory {namespace cuda { - -template struct ref; - -//template class allocator; - -template struct ptr; - -template -class ptr { - protected: - using impl_t = Ptr; - impl_t impl_; - - private: - template friend class allocator; - template friend class ptr; - template{}>> - ptr(ptr const& p) : impl_{const_cast(impl_)}{} - template friend ptr const_pointer_cast(ptr const&); - - public: - explicit ptr(impl_t impl) : impl_{impl}{} - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - ptr(std::nullptr_t n) : impl_{n} {} - - template().impl_})> - // cppcheck-suppress noExplicitConstructor ; TODO(correaa) : implement implicit propagation - ptr(Other const& o) : impl_{o.impl_}{} - - ptr& operator=(ptr const&) = default; - auto operator==(ptr const& other) const{return impl_==other.impl_;} - auto operator!=(ptr const& other) const{return impl_!=other.impl_;} - - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; - using value_type = T; - using pointer = ptr; - using reference = ref; - using iterator_category = typename std::iterator_traits::iterator_category; -// using iterator_concept = typename std::iterator_traits::iterator_concept; - explicit operator bool() const{return impl_;} - - ptr& operator++(){++impl_; return *this;} - ptr& operator--(){--impl_; return *this;} - ptr operator++(int){auto tmp = *this; ++(*this); return tmp;} - ptr operator--(int){auto tmp = *this; --(*this); return tmp;} - ptr& operator+=(typename ptr::difference_type n){impl_+=n; return *this;} - ptr& operator-=(typename ptr::difference_type n){impl_+=n; return *this;} - ptr operator+(typename ptr::difference_type n) const{return ptr{impl_ + n};} - ptr operator-(typename ptr::difference_type n) const{return ptr{impl_ - n};} - ref operator*() const{return {*this};} - ref operator[](difference_type n){return *operator+(n);}//*((*this)+n);} - friend ptr to_address(ptr const& p){return p;} - typename ptr::difference_type operator-(ptr const& other) const{return impl_-other.impl_;} -}; - -template -class ptr{ - using T = void const; - using impl_t = Ptr; - impl_t impl_; - template friend class ptr; - template friend ptr const_pointer_cast(ptr const&); - explicit ptr(impl_t impl) : impl_{impl}{} - - public: - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - ptr(std::nullptr_t n) : impl_{n}{} - - template().impl_})> - // cppcheck-suppress noExplicitConstructor ; any pointer is convertible to void pointer - ptr(Other const& o) : impl_{o.impl_}{} - - ptr& operator=(ptr const&) = default; - - using pointer = ptr; - using element_type = typename std::pointer_traits::element_type; - using difference_type = void;//typename std::pointer_traits::difference_type; - explicit operator bool() const{return impl_;} - auto operator==(ptr const& other) const{return impl_==other.impl_;} - auto operator!=(ptr const& other) const{return impl_!=other.impl_;} - friend ptr to_address(ptr const& p){return p;} -}; - -template -struct ptr{ - protected: - using T = void; - using impl_t = Ptr; - impl_t impl_; - - private: - explicit ptr(impl_t impl) : impl_{impl} {} - ptr(ptr const& p) : impl_{const_cast(p.impl_)} {} - template friend ptr const_pointer_cast(ptr const&); - template friend class ptr; - - public: - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - ptr(std::nullptr_t n) : impl_{n} {} - - template().impl_})> - // cppcheck-suppress noExplicitConstructor ; any pointer is convertible to void pointer - ptr(Other const& o) : impl_{o.impl_} {} - - ptr& operator=(ptr const&) = default; - - auto operator==(ptr const& other) const{return impl_==other.impl_;} - auto operator!=(ptr const& other) const{return impl_!=other.impl_;} - - using pointer = ptr; - using element_type = typename std::pointer_traits::element_type; - using difference_type = void;// typename std::pointer_traits::difference_type; - - explicit operator bool() const{return impl_;} - friend ptr to_address(ptr const& p){return p;} -}; - -template -ptr const_pointer_cast(ptr const& p){return {p.impl_};} - -template struct overload{}; //template<> struct overload<>{}; -template -struct overload : F, Fs...{ - overload(F f, Fs... fs) : F{std::move(f)}, Fs{std::move(fs)}...{} - using F::operator(); -}; -template -overload make_overload(Fs&&... fs){return {std::forward(fs)...};} - -template struct ref; - -template<> struct ref{}; - -template -struct ref : private ptr{ - using value_type = T; - using reference = value_type&; - using pointer = ptr; - -private: - explicit ref(pointer p) : ptr{std::move(p)}{} - friend class ptr; - ptr operator&(){return *this;} - struct skeleton_t { - std::array buff; // char buff[sizeof(T)]; - T* p_; - explicit skeleton_t(T* p) : p_{p} {cudaError_t s = cudaMemcpy(buff.data(), p_, buff.size(), cudaMemcpyDeviceToHost); assert(s == cudaSuccess);} - operator T&() && {return reinterpret_cast(buff);} - void conditional_copyback_if_not(std::false_type) const { - cudaError_t s = cudaMemcpy(p_, buff.data(), buff.size(), cudaMemcpyHostToDevice); assert(s == cudaSuccess); - } - void conditional_copyback_if_not(std::true_type) const {} - ~skeleton_t(){conditional_copyback_if_not(std::is_const{});} - }; - skeleton_t skeleton()&&{return {this->impl_};} - - public: - // cppcheck-suppress noExplicitConstructor ; bug in cppcheck 2.3 - ref(ref&& r) : ptr{r}{} - ref& operator=(ref const&)& = delete; - - private: - ref& move_assign(ref&& other, std::true_type)&{ - cudaError_t s = cudaMemcpy(this->impl_, other.impl_, sizeof(T), cudaMemcpyDeviceToDevice); assert(s == cudaSuccess); - return *this; - } - ref& move_assign(ref&& other, std::false_type)&{ - cudaError_t s = cudaMemcpy(this->impl_, other.impl_, sizeof(T), cudaMemcpyDeviceToDevice); assert(s == cudaSuccess); - return *this; - } -public: - ref&& operator=(ref&& other)&&{return std::move(move_assign(std::move(other), std::is_trivially_copy_assignable{}));} -private: -public: - template - auto operator+(Other&& o)&& - ->decltype(std::move(*this).skeleton() + std::forward(o)) { - return std::move(*this).skeleton() + std::forward(o); } -// template, ref>{}> > -// friend auto operator+(Self&& self, O&& o) -// ->decltype(std::forward(self).skeleton() + std::forward(o)){ -// return std::forward(self).skeleton() + std::forward(o);} - ref&& operator=(value_type const& t) && { - make_overload( - [&](std::true_type ) {cudaError_t s= cudaMemcpy(this->impl_, std::addressof(t), sizeof(T), cudaMemcpyHostToDevice); assert(s == cudaSuccess);}, - [&](std::false_type) { - std::array buff; // char buff[sizeof(T)]; - cudaError_t s1 = cudaMemcpy(buff.data(), this->impl_, buff.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); - reinterpret_cast(buff) = t; - cudaError_t s2 = cudaMemcpy(this->impl_, buff.data(), buff.size(), cudaMemcpyHostToDevice); assert(s2 == cudaSuccess); - } - )(std::is_trivially_copy_assignable{}); - return std::move(*this); - } - template - decltype(auto) operator==(ref&& other) && { - std::array buff1; // char buff1[sizeof(T)]; - cudaError_t s1 = cudaMemcpy(buff1.data(), this->impl_, buff1.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); - std::array buff2; // char buff2[sizeof(Other)]; - cudaError_t s2 = cudaMemcpy(buff2.data(), other.impl_, buff2.size(), cudaMemcpyDeviceToHost); assert(s2 == cudaSuccess); - return reinterpret_cast(buff1) == reinterpret_cast(buff2); - } - template - decltype(auto) operator!=(ref&& other) && { - std::array buff1; // char buff1[sizeof(T)]; - cudaError_t s1 = cudaMemcpy(buff1.data(), this->impl_, buff1.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); - std::array buff2; // char buff2[sizeof(Other)]; - cudaError_t s2 = cudaMemcpy(buff2.data(), other.impl_, buff2.size(), cudaMemcpyDeviceToHost); assert(s2 == cudaSuccess); - return reinterpret_cast(buff1) != reinterpret_cast(buff2); - } - operator T() && { - static_assert(not std::is_same{}, "!"); - std::array buff; // char buff[sizeof(T)]; - cudaError_t s = cudaMemcpy(buff.data(), this->impl_, buff.size(), cudaMemcpyDeviceToHost); assert(s == cudaSuccess ); - return std::move(reinterpret_cast(buff)); - } - template()+=std::declval())> - decltype(auto) operator+=(Other&& o) && {std::move(*this).skeleton()+=o; return *this;} - template()-=std::declval())> - decltype(auto) operator-=(Other&& o) && {std::move(*this).skeleton()-=o;} - friend void swap(ref&& a, ref&& b) {T tmp = std::move(a); a = std::move(b); b = std::move(tmp);} - decltype(auto) operator++() && {++(std::move(*this).skeleton()); return *this;} - decltype(auto) operator--() && {--(std::move(*this).skeleton()); return *this;} -}; - -} // end namespace cuda -} // end namespace memory -} // end namespace detail - -} // end namespace multi -} // end namespace boost - -#ifdef _TEST_BOOST_MULTI_DETAIL_MEMORY_CUDA_PTR - -#include -#include -#include "../../../multi/array.hpp" -#include "../cuda/allocator.hpp" - -namespace boost{ -namespace multi{namespace cuda{ - template - using array = multi::array>; -}} -} - -namespace multi = boost::multi; -namespace cuda = multi::detail::memory::cuda; - -void add_one(double& d){d += 1.;} -template -void add_one(T&& t){std::forward(t) += 1.;} - -int main() { - - static_assert(std::is_same>::element_type, double>{}, "!"); - cuda::allocator calloc; - cuda::ptr p = calloc.allocate(100); - cuda::ptr v = p; - cuda::ptr vc{v}; - v = const_pointer_cast(vc); - assert( vc == v ); - std::pointer_traits::rebind pc = p; // cuda::ptr pc = p; - assert( pc == p ); - using cuda::const_pointer_cast; - auto end = p + 100; - auto rbegin = std::make_reverse_iterator(end); - auto rend = std::make_reverse_iterator(p); - std::transform(rbegin, rend, rbegin, [](auto&& e){return std::forward(e) + 99.;}); - assert( p[11] == 99. ); - p[33] = 123.; - p[99] = 321.; -// p[33] += 1; - add_one(p[33]); - double p33 = p[33]; - assert( p33 == 124. ); - assert( p[33] == 124. ); - assert( p[33] == p[33] ); - swap(p[33], p[99]); - assert( p[99] == 124. ); - assert( p[33] == 321. ); - std::cout << p[33] << std::endl; - calloc.deallocate(p, 100); - - multi::array> arr2(multi::array::extensions_type{100l}, 999.); - - assert(size(arr2) == 100); -} -#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/cuda/test/cuda_stack.cpp b/external_codes/boost_multi/multi/include/multi/detail/cuda/test/cuda_stack.cpp deleted file mode 100644 index cbcbe1b544..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/cuda/test/cuda_stack.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -nvcc -ccbin cuda-c++ -std=c++14 $0 -o $0x && $0x && rm -f $0x; exit -#endif - -#include "../../../../multi/array.hpp" -#include "../../../../multi/detail/stack_allocator.hpp" -#include "../../../../multi/detail/cuda/allocator.hpp" - -#include - -namespace multi = boost::multi; -namespace cuda = multi::detail::memory::cuda; - -using std::cout; - -int main(){ - { - std::size_t stack_size = 4000; - multi::stack_buffer> buf{stack_size}; - for(int i = 0; i != 3; ++i){ - cout<<"pass "<< i << std::endl; - { - multi::array>> A({2, 10}, &buf); - multi::array>> B({3, 10}, &buf); - multi::array>> C({4, 10}, &buf); - for(int j = 0; j != 100; ++j) - multi::array>> D({4, 10}, &buf); - B[1][1] = 33.; - B[2][2] = 33.; - assert( B[1][1] == B[2][2] ); - } - cout - <<" size: "<< buf.size() - <<"\n hits: "<< buf.hits() - <<"\n misses "<< buf.misses() - <<"\n allocated(bytes) "<< buf.allocated_bytes() - <<"\n deallocated(bytes) "<< buf.deallocated_bytes() - <<"\n max_needed(bytes) "<< buf.max_needed() - <<"\n stack recovered(bytes) " << buf.stack_recovered() - << std::endl - ; - assert( buf.allocated_bytes() == buf.deallocated_bytes() ); - if(buf.max_needed() > buf.size()) buf.reset(buf.max_needed()); - } - } - assert( cuda::allocation_counter::n_allocations == 1 ); -} - diff --git a/external_codes/boost_multi/multi/include/multi/detail/fix_complex_traits.hpp b/external_codes/boost_multi/multi/include/multi/detail/fix_complex_traits.hpp deleted file mode 100644 index 7b4cc2b184..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/fix_complex_traits.hpp +++ /dev/null @@ -1,44 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa - -#ifndef MULTI_DETAIL_FIX_COMPLEX_TRAITS_HPP -#define MULTI_DETAIL_FIX_COMPLEX_TRAITS_HPP - -#include "../detail/type_traits.hpp" - -#include "../detail/fix_complex_traits.hpp" - -#include - -namespace boost::multi { - -#ifndef NDEBUG -// NOLINTNEXTLINE(clang-diagnostic-#pragma-messages) -#pragma message "By including this header, the behavior of initialization of std::complex in multi::array's changes. std::complex elements will not be initialized." -#endif - -template struct is_trivially_default_constructible> : std::is_trivially_default_constructible {}; -template struct is_trivial> : std::is_trivial {}; - -template constexpr bool is_trivially_default_constructible_v = boost::multi::is_trivially_default_constructible::value; -template constexpr bool is_trivial_v = boost::multi::is_trivial::value; - -} // end namespace boost::multi - -static_assert(not std::is_trivially_default_constructible<::std::complex>::value); -static_assert(not std::is_trivially_default_constructible<::std::complex>::value); - -static_assert(boost::multi::is_trivially_default_constructible<::std::complex>::value); -static_assert(boost::multi::is_trivially_default_constructible<::std::complex>::value); - -static_assert(boost::multi::is_trivial<::std::complex>::value); -static_assert(boost::multi::is_trivial<::std::complex>::value); - -static_assert(std::is_trivially_assignable<::std::complex&, ::std::complex>::value); -static_assert(std::is_trivially_assignable<::std::complex&, ::std::complex>::value); - -static_assert(std::is_trivially_copyable<::std::complex>::value); -static_assert(std::is_trivially_copyable<::std::complex>::value); - -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/detail/generic_allocator.hpp b/external_codes/boost_multi/multi/include/multi/detail/generic_allocator.hpp deleted file mode 100644 index 52b2f7fe2e..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/generic_allocator.hpp +++ /dev/null @@ -1,110 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2021 Alfredo A. Correa - -#ifndef BOOST_MULTI_DETAIL_GENERIC_ALLOCATOR_HPP -#define BOOST_MULTI_DETAIL_GENERIC_ALLOCATOR_HPP - -#include "../detail/memory.hpp" - -#include -#include - -#if __cplusplus > 201703L -#include -#endif -// static_assert(__cpp_lib_experimental_memory_resources==201402, "!"); -#include // for forward - -namespace boost { -namespace multi { - -template -auto allocator_of(MR& mr) -->decltype(mr->allocator()) { - return mr->allocator(); } - -inline std::allocator& allocator_of(...) { - static std::allocator instance; - return instance; -} - -template -class generic_allocator{ - using memory_resource_type = MemoryResource; - memory_resource_type* mr_; - template friend class generic_allocator; - - public: - using value_type = T; - using pointer = typename std::pointer_traits()->allocate(0))>::template rebind; - using difference_type = typename std::pointer_traits::difference_type; - using size_type = std::make_unsigned_t; - - generic_allocator() : mr_{nullptr} {} - - // cppcheck-suppress noExplicitConstructor ; allocators are pointers to memory resources - generic_allocator(memory_resource_type* mr) : mr_{mr} {} // NOLINT(runtime/explicit) - - template - generic_allocator(generic_allocator const& other) - : mr_{other.mr_} {} - - bool operator==(generic_allocator const& o) const {return mr_ == o.mr_;} - bool operator!=(generic_allocator const& o) const {return not(o==*this);} - - pointer allocate(size_type n) { - if(n and !mr_) throw std::bad_alloc{}; - return static_cast(mr_->allocate(n*sizeof(value_type))); - } - void deallocate(pointer p, size_type n) { - if(n==0 and p == nullptr) return; - mr_->deallocate(p, n*sizeof(value_type)); - } - template - void construct(pointer p, Args&&... args) { -// ->decltype(allocator_traits()))>>::construct(allocator_of(*mr_), p, std::forward(args)...)){ - // mr_->allocator().construct(p, std::forward(args)...); - // using TA = allocator_traits>; - allocator_traits>::construct(allocator_of(mr_), p, std::forward(args)...); - } - decltype(auto) destroy(pointer p){ - // mr_->allocator().destroy(p); - allocator_traits>::destroy(allocator_of(mr_), p); - } -}; - -} // end namespace multi -} // end namespace boost - -#ifdef _TEST_BOOST_MULTI_DETAIL_GENERIC_ALLOCATOR - -#include -#include "../array.hpp" -#include - -namespace multi = boost::multi; -using std::cout; - -int main() { -#if 1 - multi::generic_allocator ga(std::pmr::get_default_resource()); - double* p = ga.allocate(1); - std::allocator_traits>::construct(ga, p, 8.); -// ga.construct(p, 8.); - assert( *p == 8. ); - - std::vector> v(100, std::pmr::get_default_resource()); -// std::vector v(100, 1.2, multi::allocator{}); // needs C++17 CTAD - multi::array> m({2, 4}, 0., std::pmr::get_default_resource()); -// multi::array m({2,4}, 0., pmr::get_default_resource()); // needs C++17 CTAD - m[1][3] = 99.; - assert( m[1][3] == 99. ); -#endif -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/detail/layout.hpp b/external_codes/boost_multi/multi/include/multi/detail/layout.hpp deleted file mode 100644 index 8c8acec8bd..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/layout.hpp +++ /dev/null @@ -1,705 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#ifndef MULTI_DETAIL_LAYOUT_HPP -#define MULTI_DETAIL_LAYOUT_HPP - -#include "index_range.hpp" - -#include "tuple_zip.hpp" - -#include "multi/config/ASSERT.hpp" - -#include "multi/detail/operators.hpp" - -//#include // for apply -#include // for make_signed_t -#include // for swap - -#if defined(__NVCC__) -#define HD __host__ __device__ -#else -#define HD -#endif - -namespace boost::multi { - -namespace detail { - -template -constexpr auto tuple_tail_impl(Tuple&& t, std::index_sequence /*012*/) { // NOLINT(readability-identifier-length) std naming - (void)t; // workaround bug warning in nvcc - using boost::multi::detail::get; - return boost::multi::detail::tuple{std::forward(t))>(get(t))...}; -} - -template -constexpr auto tuple_tail(Tuple&& t) // NOLINT(readability-identifier-length) std naming -->decltype(tuple_tail_impl(t, std::make_index_sequence> - 1U>())) { - return tuple_tail_impl(t, std::make_index_sequence> - 1U>()); } - -} // end namespace detail - -template struct layout_t; - -template -struct extensions_t { -// using base_ = std::decay_t()), std::declval::base_>()))>; - using base_ = boost::multi::detail::tuple_prepend_t::base_>; - - private: - base_ impl_; - - public: - static constexpr dimensionality_type dimensionality = D; - - extensions_t() = default; - using nelems_type = multi::index; - - template = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(multi::size_t size) : extensions_t{index_extension{size}} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : allow terse syntax - - template = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(index_extension ext1) : impl_{ext1} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) allow terse syntax - - template = 0> - constexpr extensions_t(index_extension ext1, index_extension ext2) : impl_{ext1, ext2} {} - - template = 0> - constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3) : impl_{ext1, ext2, ext3} {} - - template = 0> - constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4) : impl_{ext1, ext2, ext3, ext4} {} - - template = 0> - constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4, index_extension ext5) : impl_{ext1, ext2, ext3, ext4, ext5} {} - - template = 0> - constexpr extensions_t(index_extension ext1, index_extension ext2, index_extension ext3, index_extension ext4, index_extension ext5, index_extension ext6) : impl_{ext1, ext2, ext3, ext4, ext5, ext6} {} - - template{}}), std::enable_if_t = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(tuple extensions) : impl_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - template{}}), std::enable_if_t = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(tuple extensions) : impl_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - template{}}), std::enable_if_t = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(tuple extensions) : impl_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - template{}}), std::enable_if_t = 0> - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(tuple extensions) : impl_{std::move(extensions)} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - template - constexpr explicit extensions_t(tuple const& tup) - : extensions_t(tup, std::make_index_sequence(D)>()) {} - - constexpr extensions_t(index_extension const& extension, typename layout_t::extensions_type const& other) - : extensions_t(tuple{extension, other.base()}) {} - - constexpr auto base() const& -> base_ const& {return impl_;} - - friend constexpr auto operator*(index_extension const& extension, extensions_t const& self) -> extensions_t { - return extensions_t{tuple{extension, self.base()}}; - } - - friend HD auto operator==(extensions_t const& self, extensions_t const& other) {return self.impl_ == other.impl_;} - friend HD auto operator!=(extensions_t const& self, extensions_t const& other) {return self.impl_ != other.impl_;} - -// using indices_type = decltype(tuple_cat(make_tuple(multi::index{}), typename extensions_t::indices_type{})); - using indices_type = multi::detail::tuple_prepend_t::indices_type>; - - [[nodiscard]] /*[[gnu::pure]]*/ constexpr auto from_linear(nelems_type const& n) const -> indices_type { - // auto const sub_extensions = extensions_t{detail::tuple_tail(this->base())}; - auto const sub_num_elements = extensions_t{tail(this->base())}.num_elements(); - assert( sub_num_elements != 0 ); - // return multi::detail::tuple{n/sub_num_elements, sub_extensions.from_linear(n%sub_num_elements)}; - return multi::detail::tuple{ - n/sub_num_elements, - extensions_t{tail(this->base())}.from_linear(n%sub_num_elements) - }; - } - - friend constexpr auto operator%(nelems_type idx, extensions_t const& extensions) {return extensions.from_linear(idx);} - - constexpr explicit operator bool() const {return not layout_t{*this}.empty();} - - template - constexpr auto to_linear(index const& idx, Indices const&... rest) const { - auto const sub_extensions = extensions_t{tail(this->base())}; - return idx*sub_extensions.num_elements() + sub_extensions.to_linear(rest...); - } - template - constexpr auto operator()(index idx, Indices... rest) const {return to_linear(idx, rest...);} - - template - constexpr auto next_canonical(index& idx, Indices&... rest) const -> bool { - if(extensions_t{tail(this->base())}.next_canonical(rest...)) {++idx;} - if(idx == head(impl_).last()) { - idx = head(impl_).first(); - return true; - } - return false; - } - template - constexpr auto prev_canonical(index& idx, Indices&... rest) const -> bool { - if(extensions_t{tail(this->base())}.prev_canonical(rest...)) {--idx;} - if(idx < head(impl_).first()) { - idx = head(impl_).back(); - return true; - } - return false; - } - - private: - template - void serialize_impl(Archive& arxiv, std::index_sequence /*012*/) { - using boost::multi::detail::get; - (void)std::initializer_list{(arxiv & multi::archive_traits::make_nvp("extension", get(impl_)) , 0U)...}; - // (void)std::initializer_list{(arxiv & boost::serialization:: make_nvp("extension", std::get(impl_)) , 0U)...}; - // (void)std::initializer_list{(arxiv & cereal:: make_nvp("extension", std::get(impl_)) , 0U)...}; - // (void)std::initializer_list{(arxiv & std::get(impl_) , 0U)...}; - } - - public: - template - void serialize(Archive& arxiv, const unsigned int /*version*/) { - serialize_impl(arxiv, std::make_index_sequence(D)>()); - } - - private: - template(std::declval())...})> - constexpr extensions_t(Array const& tup, std::index_sequence /*012*/) : impl_{boost::multi::detail::get(tup)...} {} - - static constexpr auto multiply_fold() -> size_type {return static_cast(1);} - static constexpr auto multiply_fold(size_type const& size) -> size_type {return static_cast(size);} - template - static constexpr auto multiply_fold(size_type const& size, As const&... rest) -> size_type {return static_cast(size)*static_cast(multiply_fold(rest...));} // TODO(correaa) revise casts - - template constexpr auto num_elements_impl(std::index_sequence /*012*/) const -> size_type { - using boost::multi::detail::get; - return static_cast(multiply_fold(static_cast(get(impl_).size())...)); - } - - public: - constexpr auto num_elements() const -> size_type { - return static_cast(num_elements_impl(std::make_index_sequence(D)>())); - } - friend constexpr auto intersection(extensions_t const& self, extensions_t const& other) -> extensions_t{ - using boost::multi::detail::get; - return extensions_t{ - tuple{ - index_extension{intersection(get<0>(self.impl_), get<0>(other.impl_))}, - intersection( extensions_t{tail(self.base())}, extensions_t{tail(other.base())} ).base() - } - }; - } - - template - friend constexpr auto get(extensions_t const& self) -> typename std::tuple_element::type { - using boost::multi::detail::get; - return get(self.base()); - } - -}; - -template<> struct extensions_t<0> { - using base_ = tuple<>; - - private: - base_ impl_; - - public: - static constexpr dimensionality_type dimensionality = 0; // TODO(correaa): consider deprecation - - using rank = std::integral_constant; - - using nelems_type = index; - - explicit extensions_t(tuple<> const& tup) : impl_{tup} {} - - extensions_t() = default; - - constexpr auto base() const -> base_ const& {return impl_;} - - template void serialize(Archive&/*ar*/, unsigned /*version*/) {} - - static constexpr auto num_elements() /*const*/ -> size_type {return 1;} - - using indices_type = tuple<>; - - [[nodiscard]] static constexpr auto from_linear(nelems_type const& n) /*const*/ -> indices_type { - assert(n == 0); (void)n; // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : constexpr function - return indices_type{}; - } - friend constexpr auto operator%(nelems_type const& n, extensions_t const& /*s*/) -> tuple<> {return /*s.*/from_linear(n);} - - static constexpr auto to_linear() /*const*/ -> difference_type {return 0;} - constexpr auto operator()() const {return to_linear();} - - static constexpr auto next_canonical() /*const*/ -> bool {return true;} - static constexpr auto prev_canonical() /*const*/ -> bool {return true;} - - friend constexpr auto intersection(extensions_t const& /*x1*/, extensions_t const& /*x2*/) -> extensions_t {return {};} - - constexpr HD auto operator==(extensions_t const& /*other*/) const {return true ;} - constexpr HD auto operator!=(extensions_t const& /*other*/) const {return false;} - - template - friend constexpr auto get(extensions_t const& self) -> typename std::tuple_element::type { - using boost::multi::detail::get; - return get(self.base()); - } -}; - -template<> struct extensions_t<1> { - using base_ = tuple; - - private: - base_ impl_; - - public: - static constexpr auto dimensionality = 1; // TODO(correaa): consider deprecation - - using nelems_type = index; - - // cppcheck-suppress noExplicitConstructor ; to allow terse syntax (compatible with std::vector(int) constructor - constexpr extensions_t(multi::size_t size) : impl_{multi::index_extension{0, size}} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - template - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(tuple extensions) : impl_{static_cast(head(extensions))} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) - - // cppcheck-suppress noExplicitConstructor ; to allow passing tuple // NOLINTNEXTLINE(runtime/explicit) - constexpr extensions_t(multi::index_extension const& other) : impl_{other} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) allow terse syntax - - constexpr explicit extensions_t(base_ tup) : impl_{tup} {} - - extensions_t() = default; - constexpr auto base() const -> base_ const& {return impl_;} - - HD constexpr auto operator==(extensions_t const& other) const -> bool {return impl_ == other.impl_;} - HD constexpr auto operator!=(extensions_t const& other) const -> bool {return impl_ != other.impl_;} - - constexpr auto num_elements() const -> size_type { - return head(impl_).size(); - } - - using indices_type = multi::detail::tuple; - - [[nodiscard]] constexpr auto from_linear(nelems_type const& n) const -> indices_type { // NOLINT(readability-convert-member-functions-to-static) TODO(correaa) - // assert(n <= num_elements()); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in constexpr function - // return std::make_tuple(n); - // return std::tuple{n}; - return indices_type{n}; - } - - friend - constexpr auto operator%(nelems_type idx, extensions_t const& extensions) - -> multi::detail::tuple { - return extensions.from_linear(idx); - } - - static constexpr auto to_linear(index const& idx) -> difference_type /*const*/ {return idx;} - constexpr auto operator()(index const& idx) const -> difference_type {return to_linear(idx);} - - template - constexpr auto next_canonical(index& idx) const -> bool { - ++idx; - using boost::multi::detail::get; - if(idx == get<0>(impl_).last()) { - idx = get<0>(impl_).first(); - return true; - } - return false; - } - constexpr auto prev_canonical(index& idx) const -> bool { - --idx; - using boost::multi::detail::get; - if(idx == get<0>(impl_).first() - 1) { - idx = get<0>(impl_).back(); - return true; - } - return false; - } - - friend auto intersection(extensions_t const& self, extensions_t const& other) { - return extensions_t{ - intersection( - boost::multi::detail::get<0>(self .impl_), - boost::multi::detail::get<0>(other.impl_) - ) - }; - } - template - void serialize(Archive& arxiv, unsigned /*version*/) { - using boost::multi::detail::get; - auto& extension_ = get<0>(impl_); - arxiv & multi::archive_traits::make_nvp("extension", extension_); - // arxiv & boost::serialization:: make_nvp("extension", extension ); - // arxiv & cereal:: make_nvp("extension", extension ); - // arxiv & extension ; - } - - template - friend constexpr auto get(extensions_t const& self) -> typename std::tuple_element::type { - using boost::multi::detail::get; - return get(self.base()); - } -}; - -template using iextensions = extensions_t; - -template -constexpr auto array_size_impl(const boost::multi::extensions_t&) - -> std::integral_constant(D)>; - -} // end namespace boost::multi - -namespace std { // NOLINT(cert-dcl58-cpp) : to implement structured bindings - - template - struct tuple_size> : std::integral_constant(D)> {}; - - template - struct tuple_element> { - using type = typename std::tuple_element::base_>::type; - }; - - template - constexpr auto get(boost::multi::extensions_t const& self) -> typename std::tuple_element>::type { - using boost::multi::detail::get; - return get(self.base()); - } - -} // end namespace std - -namespace boost::multi { - -struct monostate : equality_comparable { - friend HD constexpr auto operator==(monostate const& /*self*/, monostate const& /*other*/) {return true;} -}; - -template -struct layout_t<0, SSize> -: multi::equality_comparable > -{ - using dimensionality_type = multi::dimensionality_type; - using rank = std::integral_constant; - - using size_type = SSize; - using difference_type = std::make_signed_t; - using index = difference_type; - using index_extension = multi::index_extension; - using index_range = multi::range; - - using sub_type = monostate; - using stride_type = monostate; - using offset_type = index; - using nelems_type = index; - - using strides_type = tuple<>; - using offsets_type = tuple<>; - using nelemss_type = tuple<>; - - using extension_type = void; - - using extensions_type = extensions_t; - using sizes_type = tuple<>; - - static constexpr dimensionality_type rank_v = rank::value; - static constexpr dimensionality_type dimensionality = rank_v; // TODO(correaa) : consider deprecation - - friend constexpr auto dimensionality(layout_t const& /*self*/) {return rank_v;} - - private: - sub_type sub_ = {}; // TODO(correaa) use [[no_unique_address]] - stride_type stride_ = {}; // TODO(correaa) use [[no_unique_address]] - offset_type offset_ = 0; - nelems_type nelems_ = 1; // TODO(correaa) : or std::numeric_limits::max(); ? - - template friend struct layout_t; - - public: - layout_t() = default; - HD constexpr explicit layout_t(extensions_type const& /*nil*/) {} - - HD constexpr layout_t(sub_type sub, stride_type stride, offset_type offset, nelems_type nelems) // NOLINT(bugprone-easily-swappable-parameters) - : sub_{sub}, stride_{stride}, offset_{offset}, nelems_{nelems} {} - - [[nodiscard]] constexpr auto extensions() const {return extensions_type{};} - friend constexpr auto extensions(layout_t const& self) {return self.extensions();} - - [[nodiscard]] constexpr auto num_elements() const {return nelems_;} - friend constexpr auto num_elements(layout_t const& self) {return self.num_elements();} - - [[nodiscard]] constexpr auto sizes() const {return tuple<>{};} - friend constexpr auto sizes(layout_t const& self) {return self.sizes();} - - [[nodiscard]] constexpr auto strides() const {return strides_type{};} - [[nodiscard]] constexpr auto offsets() const {return offsets_type{};} - [[nodiscard]] constexpr auto nelemss() const {return nelemss_type{};} - - constexpr auto operator()() const {return offset_;} - constexpr explicit operator offset_type() const {return offset_;} - - constexpr auto stride() const -> stride_type = delete; - constexpr auto offset() const -> offset_type {return offset_;} - constexpr auto nelems() const -> nelems_type {return nelems_;} - constexpr auto sub() const -> sub_type = delete; - - constexpr auto size() const -> size_type = delete; - constexpr auto extension() const -> extension_type = delete; - - constexpr auto is_empty() const noexcept -> bool = delete; // or {return false;} or return nelems_ == 0; - [[nodiscard]] - constexpr auto empty() const noexcept {return nelems_ == 0;} - friend - constexpr auto empty(layout_t const& self) noexcept {return self.empty();} - - constexpr auto is_compact() const -> bool = delete; - - constexpr auto base_size() const -> size_type {return 0;} - constexpr auto origin() const -> offset_type {return 0;} - - constexpr auto reverse() -> layout_t& {return *this;} - constexpr auto scale(size_type /*size*/) const {return *this;} - -// friend constexpr auto operator!=(layout_t const& self, layout_t const& other) {return not(self == other);} - friend HD constexpr auto operator==(layout_t const& self, layout_t const& other) { - return - std::tie(self .sub_, self .stride_, self .offset_, self .nelems_) - == std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) - ; - } - constexpr auto operator< (layout_t const& other) const -> bool { - return std::tie(offset_, nelems_) < std::tie(other.offset_, other.nelems_); - } - - constexpr auto rotate() -> layout_t& {return *this;} - constexpr auto unrotate() -> layout_t& {return *this;} - - constexpr auto hull_size() const -> size_type {return num_elements();} // not in bytes -}; - -template -struct layout_t -: multi::equality_comparable> -{ - using dimensionality_type = multi::dimensionality_type; - using rank = std::integral_constant; - - using sub_type = layout_t; - using size_type = SSize; - using difference_type = std::make_signed_t; - using index = difference_type; - - using index_extension = multi::index_extension; - using index_range = multi::range; - using stride_type = index; - using offset_type = index; - using nelems_type = index; - - using strides_type = typename boost::multi::detail::tuple_prepend::type; - using offsets_type = typename boost::multi::detail::tuple_prepend::type; - using nelemss_type = typename boost::multi::detail::tuple_prepend::type; - - using extension_type = index_extension; // not index_range! - - using extensions_type = extensions_t; - using sizes_type = typename boost::multi::detail::tuple_prepend::type; - - static constexpr dimensionality_type rank_v = rank::value; - static constexpr dimensionality_type dimensionality = rank_v; // TODO(correaa): consider deprecation - - friend constexpr auto dimensionality(layout_t const& /*self*/) {return rank_v;} - - private: - sub_type sub_ = {}; - stride_type stride_ = 1; // or std::numeric_limits::max()? - offset_type offset_ = 0; - nelems_type nelems_ = 0; - - template friend struct layout_t; - - public: - layout_t() = default; - HD constexpr explicit layout_t(extensions_type const& extensions) : - sub_{ - std::apply( - [](auto const&... subextensions) {return multi::extensions_t{subextensions...};}, - detail::tail(extensions.base()) - ) - }, - stride_{sub_.num_elements()}, - offset_{boost::multi::detail::get<0>(extensions.base()).first()*stride_}, - nelems_{boost::multi::detail::get<0>(extensions.base()).size()*sub().num_elements()} - {} - - HD constexpr layout_t(sub_type sub, stride_type stride, offset_type offset, nelems_type nelems) // NOLINT(bugprone-easily-swappable-parameters) - : sub_{sub}, stride_{stride}, offset_{offset}, nelems_{nelems} {} - - constexpr auto origin() const {return sub_.origin() - offset_;} - - private: - constexpr auto at_aux(index idx) const { - return sub_type{sub_.sub_, sub_.stride_, sub_.offset_ + offset_ + idx*stride_, sub_.nelems_}(); - } - - public: - constexpr auto operator[](index idx) const {return at_aux(idx);} - - template - constexpr auto operator()(index idx, Indices... rest) const {return operator[](idx)(rest...);} - constexpr auto operator()(index idx) const {return at_aux(idx);} - constexpr auto operator()() const {return *this;} - - constexpr auto sub() & -> sub_type & {return sub_ ;} - constexpr auto sub() const& -> sub_type const& {return sub_ ;} - friend constexpr auto sub(layout_t const& self) -> sub_type const& {return self.sub();} - - constexpr auto nelems() & -> nelems_type & {return nelems_ ;} - constexpr auto nelems() const& -> nelems_type const& {return nelems_ ;} - friend constexpr auto nelems(layout_t const& self) -> nelems_type const& {return self.nelems();} - - constexpr auto nelems(dimensionality_type dim) const {return (dim != 0)?sub_.nelems(dim - 1):nelems_;} - - friend HD constexpr auto operator==(layout_t const& self, layout_t const& other) -> bool { - return - std::tie(self .sub_, self .stride_, self .offset_, self. nelems_) - == std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) - ; - } - constexpr auto operator< (layout_t const& other) const -> bool { - return - std::tie( sub_, stride_, offset_, nelems_) - < std::tie(other.sub_, other.stride_, other.offset_, other.nelems_) - ; - } - - constexpr auto reindex(index idx) -> layout_t& {offset_ = idx*stride_; return *this;} - template - constexpr auto reindex(index idx, Indices... rest) -> layout_t& {reindex(idx).rotate().reindex(rest...).unrotate(); return *this;} - - constexpr auto num_elements() const noexcept -> size_type {return size()*sub_.num_elements();} - friend constexpr auto num_elements(layout_t const& self) noexcept -> size_type {return self.num_elements();} - - constexpr auto is_empty() const noexcept {return nelems_ == 0;} - friend constexpr auto is_empty(layout_t const& self) noexcept {return self.is_empty();} - - constexpr auto empty() const noexcept {return is_empty();} - - friend constexpr auto size(layout_t const& self) noexcept -> size_type {return self.size();} - /*[[gnu::pure]]*/ - constexpr auto size() const noexcept -> size_type { - // if(nelems_ == 0) {return 0;} - // MULTI_ACCESS_ASSERT(stride_); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - if(nelems_ != 0) {MULTI_ACCESS_ASSERT(stride_ != 0);} - return nelems_ == 0?0:nelems_/stride_; - } - - constexpr auto stride() -> stride_type & {return stride_;} - constexpr auto stride() const -> stride_type const& {return stride_;} - - friend constexpr auto stride(layout_t const& self) -> index {return self.stride();} - - constexpr auto strides() const -> strides_type {return strides_type{stride(), sub_.strides()};} - friend constexpr auto strides(layout_t const& self) -> strides_type {return self.strides();} - - constexpr auto offset(dimensionality_type dim) const -> index {return (dim != 0)?sub_.offset(dim - 1):offset_;} - constexpr auto offset() const -> index {return offset_;} - friend constexpr auto offset(layout_t const& self) -> index {return self.offset();} - constexpr auto offsets() const {return boost::multi::detail::tuple{offset(), sub_.offsets()};} - constexpr auto nelemss() const {return boost::multi::detail::tuple{nelems(), sub_.nelemss()};} - - constexpr auto base_size() const {using std::max; return max(nelems_, sub_.base_size());} - - constexpr auto is_compact() const& {return base_size() == num_elements();} - friend constexpr auto is_compact(layout_t const& self) {return self.is_compact();} - - constexpr auto shape() const& -> decltype(auto) {return sizes();} - friend constexpr auto shape(layout_t const& self) -> decltype(auto) {return self.shape();} - - constexpr auto sizes() const noexcept {return tuple{size(), sub_.sizes()};} - - friend constexpr auto extension(layout_t const& self) {return self.extension();} - [[nodiscard]] /*[[gnu::pure]]*/ constexpr auto extension() const -> extension_type { - if(nelems_ == 0) {return index_extension{};} - assert(stride_ != 0); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay,hicpp-no-array-decay) : normal in a constexpr function - assert(offset_ % stride_ == 0); - assert(nelems_ % stride_ == 0); - return index_extension{offset_/stride_, (offset_ + nelems_)/stride_}; - } - - constexpr auto extensions() const {return extensions_type{tuple{extension(), sub_.extensions().base()}};} // tuple_cat(make_tuple(extension()), sub_.extensions().base())};} - friend constexpr auto extensions(layout_t const& self) -> extensions_type {return self.extensions();} - -// [[deprecated("use get(m.extensions()")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output - constexpr auto extension(dimensionality_type dim) const {return std::apply([](auto... extensions) {return std::array(D)>{extensions...};}, extensions().base()).at(static_cast(dim));} -// [[deprecated("use get(m.strides()) ")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output - constexpr auto stride (dimensionality_type dim) const {return std::apply([](auto... strides ) {return std::array(D)>{strides ...};}, strides () ).at(static_cast(dim));} -// [[deprecated("use get(m.sizes()) ")]] // TODO(correaa) redeprecate, this is commented to give a smaller CI output -// constexpr auto size (dimensionality_type dim) const {return std::apply([](auto... sizes ) {return std::array(D)>{sizes ...};}, sizes () ).at(static_cast(dim));} - - template - constexpr auto partition(Size const& count) -> layout_t& { - using std::swap; - stride_ *= count; - nelems_ *= count; - sub_.partition(count); - return *this; - } - - constexpr auto transpose() -> layout_t& { - using std::swap; - swap(stride_, sub_.stride_); - swap(offset_, sub_.offset_); - swap(nelems_, sub_.nelems_); - return *this; - } - constexpr auto reverse() -> layout_t& { - unrotate(); - sub_.reverse(); - return *this; - } - - constexpr auto rotate() -> layout_t& {if constexpr(D > 1) {transpose(); sub_. rotate();} return *this;} - constexpr auto unrotate() -> layout_t& {if constexpr(D > 1) {sub_.unrotate(); transpose();} return *this;} - - constexpr auto hull_size() const -> size_type { - if(is_empty()) {return 0;} - return std::abs(size()*stride())>std::abs(sub_.hull_size())?size()*stride():sub_.hull_size(); - } - - constexpr auto scale(size_type factor) const { - return layout_t{sub_.scale(factor), stride_*factor, offset_*factor, nelems_*factor}; - } -}; - -inline constexpr auto -operator*(layout_t<0>::index_extension const& extensions_0d, layout_t<0>::extensions_type const& /*zero*/) --> typename layout_t<1>::extensions_type { - return typename layout_t<1>::extensions_type{tuple::index_extension>{extensions_0d}}; -} - -inline constexpr auto operator*(extensions_t<1> const& extensions_1d, extensions_t<1> const& self) { - using boost::multi::detail::get; - return extensions_t<2>({get<0>(extensions_1d.base()), get<0>(self.base())}); -} - -} // end namespace boost::multi - -namespace std { - template<> struct tuple_size> : std::integral_constant {}; - template<> struct tuple_size> : std::integral_constant {}; - template<> struct tuple_size> : std::integral_constant {}; - template<> struct tuple_size> : std::integral_constant {}; - template<> struct tuple_size> : std::integral_constant {}; -} // end namespace std - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/operators.hpp b/external_codes/boost_multi/multi/include/multi/detail/operators.hpp deleted file mode 100644 index 04b49a96f8..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/operators.hpp +++ /dev/null @@ -1,196 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#ifndef MULTI_DETAIL_OPERATORS_HPP -#define MULTI_DETAIL_OPERATORS_HPP - -#include // for enable_if -#include // for forward - -namespace boost::multi { - -struct empty_base {}; - -template struct selfable { - using self_type = Self; - constexpr auto self() const -> self_type const& {return static_cast(*this);} - constexpr auto self() -> self_type & {return static_cast(*this);} - friend constexpr auto self(selfable const& self) -> self_type const& {return self.self();} -}; - -template struct equality_comparable2; - -template -struct equality_comparable2 : selfable { -// friend constexpr auto operator==(equality_comparable2 const& self, equality_comparable2 const& other) {return self.self() == other.self() ;} - friend constexpr auto operator!=(equality_comparable2 const& self, equality_comparable2 const& other) {return not(self.self() == other.self());} -}; - -template struct equality_comparable : equality_comparable2 {}; - -template struct totally_ordered2; - -template -struct totally_ordered2 : equality_comparable2, totally_ordered2> { - using self_type = Self; - constexpr auto self() const -> self_type const& {return static_cast(*this);} - -// friend auto operator< (totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return self.self() < other.self() ;} - friend auto operator==(totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return not(self.self() < other.self()) and not(other.self() < self.self());} -// friend auto operator!=(totally_ordered2 const& self, totally_ordered2 const& other) {return (s.self() < o.self()) or (o.self() < s.self());} - - friend auto operator<=(totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return not(other.self() < self.self());} - - friend auto operator> (totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return not(self.self() < other.self()) and not(self.self() == other.self());} - friend auto operator>=(totally_ordered2 const& self, totally_ordered2 const& other) -> bool {return not(self.self() < other.self());} -}; - -template using totally_ordered = totally_ordered2; - -template -struct totally_ordered2 { - template - friend constexpr auto operator<=(const T& self, const U& other) {return (self < other) or (self == other);} - template - friend constexpr auto operator>=(const T& self, const U& other) {return (other < self) or (self == other);} - template - friend constexpr auto operator> (const T& self, const U& other) {return other < self;} -}; - -template -struct copy_constructible {}; - -template -struct weakly_incrementable { -// friend T& operator++(weakly_incrementable& t){return ++static_cast(t);} -}; - -template -struct weakly_decrementable { -// friend T& operator--(weakly_decrementable& t){return --static_cast(t);} -}; - -template struct incrementable : totally_ordered {//, self_mutable { - friend constexpr auto operator++(incrementable& self, int) -> Self {Self tmp{self.self()}; ++self.self(); assert(self.self() > tmp); return tmp;} -}; - -template -struct decrementable : weakly_decrementable { - template{}>> - friend constexpr auto operator--(U& self, int) -> T {T tmp{self}; --self; return tmp;} -}; - -template -struct steppable : totally_ordered { - using self_type = Self; - constexpr auto self() const -> self_type const& {return static_cast(*this);} - constexpr auto self() -> self_type & {return static_cast(*this);} - - friend constexpr auto operator++(steppable& self, int) -> Self {Self tmp{self.self()}; ++self.self(); return tmp;} - friend constexpr auto operator--(steppable& self, int) -> Self {Self tmp{self.self()}; --self.self(); return tmp;} -}; - -template -struct affine_with_unit : steppable {//affine_with_unit > { - using self_type = Self; - constexpr auto cself() const -> self_type const& {return static_cast(*this);} - constexpr auto self() const -> self_type const& {return static_cast(*this);} - constexpr auto self() -> self_type & {return static_cast(*this);} - - using difference_type = Difference; - friend constexpr auto operator++(affine_with_unit& self) -> Self& {return self.self() += difference_type{1};} - friend constexpr auto operator--(affine_with_unit& self) -> Self& {return self.self() -= difference_type{1};} - - friend constexpr auto operator-(affine_with_unit const& self, difference_type const& diff) -> Self { - auto ret{self.self()}; - ret += (-diff); - return ret; - } - constexpr auto operator+(difference_type const& diff) const -> Self { - auto ret{cself()}; - ret += diff; - return ret; - } - friend constexpr auto operator+(difference_type const& diff, affine_with_unit const& self) -> Self { - auto ret{self.self()}; - ret += diff; - return ret; - } - friend constexpr auto operator<(affine_with_unit const& self, affine_with_unit const& other) -> bool { - return difference_type{0} < other.self() - self.self(); - } -}; - -template -struct dereferenceable { - using self_type = Self; - constexpr auto self() const -> self_type const& {return static_cast(*this);} - constexpr auto self() -> self_type & {return static_cast(*this);} - - using reference = Reference; - - constexpr auto operator*() const -> reference {return *(self().operator->());} -}; - -template -struct random_accessable // NOLINT(fuchsia-multiple-inheritance) -: affine_with_unit -, dereferenceable { - using difference_type = Difference; - using reference = Reference; - using iterator_category = std::random_access_iterator_tag; - - using self_type = Self; - constexpr auto self() const -> self_type const& {return static_cast(*this);} - constexpr auto self() -> self_type & {return static_cast(*this);} - - constexpr auto operator[](difference_type idx) const -> reference {return *(self() + idx);} -}; - -//template -//struct dereferenceable { -// using reference = Reference; -// friend constexpr auto operator*(dereferenceable const& t) -> reference {return *static_cast(t);} -//}; - -template -struct addable2 { - using difference_type = D; - template{}> > - friend constexpr auto operator+(TT&& self, difference_type const& diff) -> T {T tmp{std::forward(self)}; tmp += diff; return tmp;} - template{}> > - friend constexpr auto operator+(difference_type const& diff, TT&& self) -> T {return std::forward(self) + diff;} -}; - -template -struct subtractable2 { - using difference_type = D; - template - friend auto operator-(TT&& self, difference_type const& diff) -> T {T tmp{std::forward(self)}; tmp -= diff; return tmp;} -}; - -template -struct affine : addable2, subtractable2 { - using difference_type = Difference; -}; - -template -struct random_iterable { - constexpr auto cfront() const& -> decltype(auto) {return static_cast(*this).front();} - constexpr auto cback () const& -> decltype(auto) {return static_cast(*this).back() ;} - friend constexpr auto cfront(T const& self) -> decltype(auto) {return self.cfront();} - friend constexpr auto cback (T const& self) -> decltype(auto) {return self.cback() ;} -}; - -template -struct random_access_iterator : equality_comparable2 { - using difference_type = Difference; - using value_type = Value; - using pointer = Pointer; - using reference = Reference; - using iterator_category = std::random_access_iterator_tag; - auto operator*() const -> Reference {return *static_cast(*this);} -}; - -} // end namespace boost::multi -#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/serialization.hpp b/external_codes/boost_multi/multi/include/multi/detail/serialization.hpp deleted file mode 100644 index 5afd2e5564..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/serialization.hpp +++ /dev/null @@ -1,148 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2021 Alfredo A. Correa - -#ifndef MULTI_DETAIL_SERIALIZATION_HPP -#define MULTI_DETAIL_SERIALIZATION_HPP - -#include // for std::for_each -#include // for std::uint32_t - -namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace archive { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace detail { - -template class common_iarchive; -template class common_oarchive; - -} // end namespace detail -} // end namespace archive - -namespace serialization { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat - -template class nvp; // dependency "in name only" -template class array_wrapper; // dependency "in name only" - struct binary_object; // dependency "in name only", if you get an error here, it means that eventually you need to include #include - -template struct version; - -//template//, std::enable_if_t>{}, int> =0> -//auto operator>>(Archive& ar, T&& t) -> Archive& {return ar>> t;} - -} // end namespace serialization -} // end namespace boost - -namespace cereal { - -template struct OutputArchive; -template struct InputArchive; - -template class NameValuePair; // dependency "in name only", if you get an error here you many need to #include at some point - -} // end namespace cereal - -namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace multi { - -template> and (MA::dimensionality > -1) , int> =0> -auto operator>>(Archive& arxiv, MA&& self) // this is for compatibility with Archive type -->decltype(arxiv>> self) { - return arxiv>> self; } - -template> and (MA::dimensionality > -1), int> =0> -auto operator& (Archive& arxiv, MA&& self) // this is for compatibility with Archive type -->decltype(arxiv& self) { - return arxiv& self; } - -template -struct archive_traits { - template - inline static auto make_nvp (char const* /*n*/, T&& value) noexcept {return std::forward(value);} -}; - -template -struct archive_traits, Ar> || std::is_base_of_v, Ar>>::type> { - template using nvp = boost::serialization::nvp ; - template using array_wrapper = boost::serialization::array_wrapper; - template struct binary_object_t {using type = boost::serialization::binary_object;}; - template inline static auto make_nvp (char const* name, T& value) noexcept -> const nvp {return nvp{name, value};} // NOLINT(readability-const-return-type) : original boost declaration - template inline static auto make_nvp (char const* name, T&& value) noexcept -> const nvp {return nvp{name, value};} // NOLINT(readability-const-return-type) : original boost declaration - - template inline static auto make_array ( T* first, std::size_t size) noexcept -> const array_wrapper {return array_wrapper{first, size};} // NOLINT(readability-const-return-type) : original boost declaration - template inline static auto make_binary_object( const void* first, std::size_t size) noexcept -> const typename binary_object_t::type {return typename binary_object_t::type(first, size); } // if you get an error here you need to eventually `#include`// NOLINT(readability-const-return-type,clang-diagnostic-ignored-qualifiers) : original boost declaration -}; - -#if 1 -template -struct archive_traits< - Ar, - typename std::enable_if< - std::is_base_of_v, Ar> or std::is_base_of_v, Ar> - or std::is_base_of_v, Ar> or std::is_base_of_v, Ar> - >::type -> { - using self_t = archive_traits, Ar> or std::is_base_of_v, Ar> - or std::is_base_of_v, Ar> or std::is_base_of_v, Ar> - >::type>; - - template - inline static auto make_nvp (char const* name, T&& value) noexcept {return cereal::NameValuePair{name, value};} // if you get an error here you many need to #include at some point - template - inline static auto make_nvp (char const* name, T& value) noexcept {return cereal::NameValuePair{name, value};} // if you get an error here you many need to #include at some point - - template - struct array_wrapper { - T* p_; - std::size_t c_; - template - void serialize(Archive& arxiv, const unsigned int /*version*/) { - for(std::size_t i = 0; i != c_; ++i) { // NOLINT(altera-unroll-loops) TODO(correaa) consider using an algorithm - auto& item = p_[i]; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) - arxiv & make_nvp("item" , item ); // "item" is the name used by Boost.Serialization XML make_array - // arxiv & boost::multi::archive_traits::make_nvp("element", element); - // arxiv & cereal::make_nvp("element", element); - // arxiv & CEREAL_NVP( element); - // arxiv & element ; - } - } - }; - - template - inline static auto make_array(T* ptr, std::size_t count) -> array_wrapper {return array_wrapper{ptr, count};} - - template - inline static auto make_nvp (char const* name, array_wrapper&& value) noexcept {return make_nvp(name, value);} -}; - -//template auto make_nvp(char const* n, T&& v) -> decltype(auto) {return archive_traits::make_nvp(n, std::forward(v));} // NOLINT(readability-const-return-type) -#endif - -} // end namespace multi -} // end namespace boost - -namespace boost { - -template -class multi_array; - -} // end namespace boost - -namespace boost { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace serialization { - -//template -//auto serialize(Archive& ar, boost::multi_array& arr, unsigned int /*version*/) -//{ -// auto x = boost::multi::extensions(arr); -// ar & multi::archive_traits::make_nvp("extensions", x); -// if( x != boost::multi::extensions(arr) ) { -// arr.resize( std::array{} ); -// arr.resize( std::array{static_cast(std::get<0>(x).size()), static_cast(std::get<1>(x).size())} ); -// } -// ar & multi::archive_traits::make_nvp("data_elements", multi::archive_traits::make_array(boost::multi::data_elements(arr), static_cast(boost::multi::num_elements(arr)))); -//} - -} // end namespace serialization -} // end namespace boost - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/detail/tuple_zip.hpp b/external_codes/boost_multi/multi/include/multi/detail/tuple_zip.hpp deleted file mode 100644 index f9060ef9e2..0000000000 --- a/external_codes/boost_multi/multi/include/multi/detail/tuple_zip.hpp +++ /dev/null @@ -1,322 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2021-2022 Alfredo A. Correa - -#ifndef MULTI_DETAIL_TUPLE_ZIP_HPP -#define MULTI_DETAIL_TUPLE_ZIP_HPP - -#include -#include - -#include // for deprecated functions - -namespace boost::multi { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace detail { - -template class tuple; - -template<> class tuple<> { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - public: - constexpr tuple() = default; - constexpr tuple(tuple const&) = default; - - constexpr auto operator=(tuple const&) -> tuple& = default; - - constexpr auto operator==(tuple const& /*other*/) const -> bool {return true ;} - constexpr auto operator!=(tuple const& /*other*/) const -> bool {return false;} - - constexpr auto operator< (tuple const& /*other*/) const {return false;} - constexpr auto operator> (tuple const& /*other*/) const {return false;} -}; - -template class tuple : tuple { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - T0 head_; - using tail_type = tuple; -// tuple tail_; // TODO(correaa) use [[no_unique_address]] in C++20 - - public: - constexpr auto head() const& -> T0 const& {return head_ ;} - constexpr auto head() && -> T0 && {return std::move(head_);} - constexpr auto head() & -> T0 & {return head_ ;} - - constexpr auto tail() const& -> tail_type const& {return static_cast(*this);} - constexpr auto tail() && -> tail_type && {return static_cast(*this);} - constexpr auto tail() & -> tail_type & {return static_cast(*this);} - - constexpr tuple() = default; - constexpr tuple(tuple const&) = default; - - // cppcheck-suppress noExplicitConstructor ; allow bracket init in function argument // NOLINTNEXTLINE(runtime/explicit) - constexpr tuple(T0 head, tuple tail) : tail_type{std::move(tail) }, head_{std::move(head)} {} - constexpr explicit tuple(T0 head, Ts... tail) : tail_type{std::move(tail)...}, head_{std::move(head)} {} - - constexpr auto operator=(tuple const&) -> tuple& = default; - - constexpr auto operator==(tuple const& other) const -> bool {return head_ == other.head_ and tail() == other.tail();} - constexpr auto operator!=(tuple const& other) const -> bool {return head_ != other.head_ or tail() != other.tail();} - - constexpr auto operator< (tuple const& other) const { - if(head_ < other.head_) {return true ;} - if(other.head_ < head_) {return false;} - return tail() < other.tail(); - } - constexpr auto operator> (tuple const& other) const { - if(head_ > other.head_) {return true ;} - if(other.head_ > head_) {return false;} - return tail() > other.tail(); - } -}; - -#if defined(__INTEL_COMPILER) // this instance is necessary due to a bug in intel compiler icpc -// TODO(correaa) : this class can be collapsed with the general case with [[no_unique_address]] in C++20 -template class tuple { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - T0 head_; - tuple<> tail_; - - public: - constexpr auto head() const& -> T0 const& {return head_ ;} - constexpr auto head() && -> T0 && {return std::move(head_);} - constexpr auto head() & -> T0 & {return head_ ;} - - constexpr auto tail() const& -> tuple<> const& {return tail_ ;} - constexpr auto tail() && -> tuple<> && {return std::move(tail_);} - constexpr auto tail() & -> tuple<> & {return tail_ ;} - - constexpr tuple() = default; - constexpr tuple(tuple const&) = default; - - // cppcheck-suppress noExplicitConstructor ; allow bracket init in function argument // NOLINTNEXTLINE(runtime/explicit) - constexpr tuple(T0 t0, tuple<> sub) : head_{std::move(t0)}, tail_{sub} {} - constexpr explicit tuple(T0 t0) : head_{std::move(t0)}, tail_{} {} - - constexpr auto operator=(tuple const& other) -> tuple& = default; - - constexpr auto operator==(tuple const& other) const {return head_ == other.head_;} - constexpr auto operator!=(tuple const& other) const {return head_ != other.head_;} - - constexpr auto operator< (tuple const& other) const {return head_ < other.head_;} - constexpr auto operator> (tuple const& other) const {return head_ > other.head_;} -}; -#endif - -template tuple(T0, tuple) -> tuple; - -template constexpr auto mk_tuple(T0 head, Ts... tail) { - return tuple(std::move(head), std::move(tail)...); -} - -template struct tuple_prepend; - -template -struct tuple_prepend> { - using type = tuple; -}; - -template -using tuple_prepend_t = typename tuple_prepend::type; - -template -constexpr auto head(tuple const& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return t.head(); -} - -template -constexpr auto head(tuple && t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return std::move(t.head()); -} - -template -constexpr auto head(tuple & t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return t.head(); -} - -template -constexpr auto tail(tuple const& t) -> decltype(t.tail()) {return t.tail();} // NOLINT(readability-identifier-length) std naming - -template -constexpr auto tail(tuple && t) -> decltype(std::move(t).tail()) {return std::move(t).tail();} // NOLINT(readability-identifier-length) std naming - -template -constexpr auto tail(tuple & t) -> decltype(t.tail()) {return t.tail();} // NOLINT(readability-identifier-length) std naming - -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif -template -constexpr auto get(tuple const& t) -> auto const& { // NOLINT(readability-identifier-length) std naming - if constexpr(N == 0) { - return t.head(); - } else { - return get(t.tail()); - } -} - -template -constexpr auto get(tuple& t) -> auto& { // NOLINT(readability-identifier-length) std naming - if constexpr(N == 0) { - return t.head(); - } else { - return get(t.tail()); - } -} - -template -constexpr auto get(tuple&& t) -> auto&& { // NOLINT(readability-identifier-length) std naming - if constexpr(N == 0) { - return std::move(t).head(); - } else { - return get(std::move(t.tail())); - } -} -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #pragma diagnostic pop - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop -#endif - -} // end namespace detail -} // end namespace boost::multi - -namespace std { // NOLINT(cert-dcl58-cpp) define stuff in STD - -template -struct tuple_size> { - // cppcheck-suppress unusedStructMember - static constexpr std::size_t value = sizeof...(Ts); -}; - -template -struct tuple_element<0, boost::multi::detail::tuple> { - using type = T0; -}; - -template -struct tuple_element> { - using type = typename tuple_element>::type; -}; - -template -constexpr auto get(boost::multi::detail::tuple const& t) // NOLINT(readability-identifier-length) std naming -->decltype(boost::multi::detail::get(t)) { - return boost::multi::detail::get(t); } - -template -constexpr auto get(boost::multi::detail::tuple & t) // NOLINT(readability-identifier-length) std naming -->decltype(boost::multi::detail::get(t)) { - return boost::multi::detail::get(t); } - -template -constexpr auto get(boost::multi::detail::tuple && t) // NOLINT(readability-identifier-length) std naming -->decltype(boost::multi::detail::get(std::move(t))) { - return boost::multi::detail::get(std::move(t)); } - -template -constexpr auto apply_timpl(F&& f, Tuple&& t, std::index_sequence/*012*/) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - (void)t; // fix "error #827: parameter "t" was never referenced" in NVC++ and "error #869: parameter "t" was never referenced" in oneAPI-ICPC - return std::forward(f)(boost::multi::detail::get(std::forward(t))...); -} - -template -constexpr auto apply(F&& f, boost::multi::detail::tuple const& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return apply_timpl( - std::forward(f), t, - std::make_index_sequence{} - ); -} - -template -constexpr auto apply(F&& f, boost::multi::detail::tuple& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return apply_timpl( - std::forward(f), t, - std::make_index_sequence{} - ); -} - -template -constexpr auto apply(F&& f, boost::multi::detail::tuple&& t) -> decltype(auto) { // NOLINT(readability-identifier-length) std naming - return apply_timpl( - std::forward(f), std::move(t), - std::make_index_sequence{} - ); -} - -} // end namespace std - -namespace boost::multi { // NOLINT(modernize-concat-nested-namespaces) keep c++14 compat -namespace detail { - -template -constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, std::index_sequence /*012*/) { - using boost::multi::detail::get; - return boost::multi::detail::mk_tuple( - boost::multi::detail::mk_tuple( - get(std::forward(tup1)), - get(std::forward(tup2)) - )... - ); -} - -template -constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, Tuple3&& tup3, std::index_sequence /*012*/) { - using boost::multi::detail::get; - return boost::multi::detail::mk_tuple( - boost::multi::detail::mk_tuple( - get(std::forward(tup1)), - get(std::forward(tup2)), - get(std::forward(tup3)) - )... - ); -} - -template -constexpr auto tuple_zip_impl(Tuple1&& tup1, Tuple2&& tup2, Tuple3&& tup3, Tuple4&& tup4, std::index_sequence /*012*/) { - using boost::multi::detail::get; - return boost::multi::detail::mk_tuple( - boost::multi::detail::mk_tuple( - get(std::forward(tup1)), - get(std::forward(tup2)), - get(std::forward(tup3)), - get(std::forward(tup4)) - )... - ); -} - -template -constexpr auto tuple_zip(T1&& tup1, T2&& tup2) { - return detail::tuple_zip_impl( - std::forward(tup1), std::forward(tup2), - std::make_index_sequence::type>::value>() - ); -} - -template -constexpr auto tuple_zip(T1&& tup1, T2&& tup2, T3&& tup3) { - return detail::tuple_zip_impl( - std::forward(tup1), std::forward(tup2), std::forward(tup3), - std::make_index_sequence::type>::value>() - ); -} - -template -constexpr auto tuple_zip(T1&& tup1, T2&& tup2, T3&& tup3, T4&& tup4) { - return detail::tuple_zip_impl( - std::forward(tup1), std::forward(tup2), std::forward(tup3), std::forward(tup4), - std::make_index_sequence::type>::value>() - ); -} - -} // end namespace detail -} // end namespace boost::multi -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/algorithm.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/algorithm.hpp deleted file mode 100644 index 43b436c3f3..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/algorithm.hpp +++ /dev/null @@ -1,688 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -$CXXX $CXXFLAGS $0 -o $0x -lcudart -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit -#endif -#ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_ALGORITHM_HPP -#define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_ALGORITHM_HPP - -#include "../cuda/cstring.hpp" -#include "../../../array_ref.hpp" -#include "../../../config/MARK.hpp" -#include "../../../detail/adl.hpp" - -#include -#include -#include -#include - -#include "../cuda/error.hpp" - -#include //TODO remove, handle complex_fix instead? - -namespace boost {namespace multi { -namespace memory {namespace cuda { - -template{}>> -ptr copy_n(ptr first, Size count, ptr result) { - return memcpy(result, first, count*sizeof(T)), result + count; -} - -template{}>> -ptr copy_n(U* first, Size count, ptr result) {return memcpy(result, first, count*sizeof(T)), result + count;} - -template{}>> -T* copy_n(ptr first, Size count, T* result) {return memcpy(result, first, count*sizeof(T)), result + count;} - -template{}>> -ptr uninitialized_copy_n(ptr first, Size count, ptr result) {return memcpy(result, first, count*sizeof(T)), result + count;} - -template{}>> -ptr uninitialized_copy_n(U* first, Size count, ptr result) {return memcpy(result, first, count*sizeof(T)), result + count;} - -template{}>> -T* uninitialized_copy_n(ptr first, Size count, T* result) {return memcpy(result, first, count*sizeof(T)), result + count;} - -template -auto copy(PtrU first, PtrU last, ptr result){ - return copy_n(first, std::distance(first, last), result); -} - -template -auto copy(ptr first, ptr last, ptr result){ - return copy_n(first, std::distance(first, last), result); -} - -//->decltype(copy_n(first, std::distance(first, last), result)){ -// return copy_n(first, std::distance(first, last), result);} - - -template -auto fill(memory::cuda::ptr first, memory::cuda::ptr last, T const& value) -->decltype(fill_n(first, std::distance(first, last), value)){ - return fill_n(first, std::distance(first, last), value);} - -template{}>> -memory::cuda::ptr fill_n(ptr const first, Size count, U const& value){ - if(std::find_if((char const*)(&value), (char const*)(&value) + sizeof(value), [](char c){return c!=0;}) == (char const*)(&value) + sizeof(value)){ -// if(std::find(reinterpret_cast(&value), reinterpret_cast(&value) + sizeof(value), true) == reinterpret_cast(&value) + sizeof(value)){ -// if(value == 0.){ - cuda::memset(first, 0, count*sizeof(T)); - } - else if(count--) for(ptr new_first = adl_copy_n(&value, 1, first); count;){ - auto n = std::min(Size(std::distance(first, new_first)), count); - new_first = copy_n(first, n, new_first); - count -= n; - } - return first + count; -} - -template//, typename = std::enable_if_t{}>> -memory::cuda::ptr fill_n(array_iterator> const first, Size count, U const& value){ - if(count--) - for(ptr new_first = adl_copy_n(&value, 1, first); count;){ - auto n = std::min(Size(std::distance(first, new_first)), count); - new_first = copy_n(first, n, new_first); - count -= n; - } - return first + count; -} - -// TODO fix traits std::complex instead -template, T2>{},int> =0> -ptr> copy_n(ptr first, Size count, ptr> result){ - fill_n(result, count, std::complex{0}); - copy_n( - multi::array_iterator, 1, ptr>{first, 1}, count, - multi::array_iterator>{reinterpret_pointer_cast(result), 2} - ); - return result + count; -} - -template, T2>{},int> =0> -auto copy_n(iterator> first, Size count, iterator, 1, ptr>> result){ - MULTI_MARK_SCOPE("cuda copy_n 1D complex destination"); - if(stride(first) == 1 and stride(result)==1) copy_n(base(first), count, base(result)); - else assert(0); - return result + count; -} - -//template{}, int> =0> -//auto uninitialized_fill_n(cuda::ptr first, Size n, V const& v){return fill_n(first, n, v);} - -template>::value_type>{}, int> =0> -auto uninitialized_fill_n(ptr first, Size n, T const& t) -{ - return fill_n(first, n, t); -} - - -template::value_type, std::enable_if_t{}, int> = 0> -auto uninitialized_value_construct_n(It first, Size n){ - return uninitialized_fill_n(first, n, T()); -} - -template>::value_type>{}, int> = 0> -auto uninitialized_copy_n(It first, Size n, ptr d_first) -->decltype(copy_n(first, n, d_first)){ - return copy_n(first, n, d_first);} - -//template -//auto alloc_uninitialized_value_construct_n(Alloc&, It first, Size n){ -// return uninitialized_value_construct_n(first, n); -//} - -#if 1 - -#if 1 - -template void what() = delete; - -template -array_iterator> copy_n( - array_iterator> first_ , Size count, - array_iterator> result_ -){ - MULTI_MARK_SCOPE("cuda copy_n 1D"); - array_iterator> first ; std::memcpy((void*)&first , (void const*)&first_ , sizeof(first_)); - array_iterator> result; std::memcpy((void*)&result, (void const*)&result_, sizeof(first_)); - static_assert( sizeof(first ) == sizeof(first_ ) ); - static_assert( sizeof(result) == sizeof(result_) ); - ::thrust::for_each( - ::thrust::make_counting_iterator(0L), - ::thrust::make_counting_iterator(count), - [first, result, x = multi::extensions_t<1>(count)] __device__ (auto n){ // requires --extended-lambda nvcc flag - // std::tuple - auto const i = x.from_linear(n); - using boost::multi::detail::get; - result[get<0>(i)] = T2(first[get<0>(i)]); - } - ); - return result_ + count; -} - -template -array_iterator> -copy_n( - array_iterator> first_ , Size count, - array_iterator> result_ -) { - MULTI_MARK_SCOPE("cuda copy_n 2D"); - - array_iterator> first ; std::memcpy((void*)&first , (void const*)&first_ , sizeof(first_)); - array_iterator> result; std::memcpy((void*)&result, (void const*)&result_, sizeof(first_)); - - static_assert( sizeof(first ) == sizeof(first_ ) ); - static_assert( sizeof(result) == sizeof(result_) ); - - assert(first->extensions() == result->extensions()); - ::thrust::for_each( - ::thrust::make_counting_iterator(0L), - ::thrust::make_counting_iterator(count*first->num_elements()), - [first, count, result, x = first->extensions()] __device__ (auto n){ - auto const ij = (count*x).from_linear(n); - result[std::get<0>(ij)][std::get<1>(ij)] = T2(first[std::get<0>(ij)][std::get<1>(ij)]); - } - ); - - return result_ + count; -} - -template -array_iterator> -copy_n( - array_iterator> first_ , Size count, - array_iterator> result_ -) { - MULTI_MARK_SCOPE("cuda copy_n 3D"); - array_iterator> first ; std::memcpy((void*)&first , (void const*)&first_ , sizeof(first_)); - array_iterator> result; std::memcpy((void*)&result, (void const*)&result_, sizeof(first_)); - static_assert( sizeof(first ) == sizeof(first_ ) ); - static_assert( sizeof(result) == sizeof(result_) ); - assert(first->extensions() == result->extensions()); - ::thrust::for_each( - ::thrust::make_counting_iterator(0L), - ::thrust::make_counting_iterator(count*first->num_elements()), - [first, count, result, x = first->extensions()] __device__ (auto n){ - auto const ijk = (count*x).from_linear(n); - result.apply(ijk) = T2(first.apply(ijk)); - } - ); - return result_ + count; -} - -template -array_iterator> -copy_n( - array_iterator> first_ , Size count, - array_iterator> result_ -) { - MULTI_MARK_SCOPE("cuda copy_n 4D"); - array_iterator> first ; std::memcpy((void*)&first , (void const*)&first_ , sizeof(first_)); - array_iterator> result; std::memcpy((void*)&result, (void const*)&result_, sizeof(first_)); - static_assert( sizeof(first ) == sizeof(first_ ) ); - static_assert( sizeof(result) == sizeof(result_) ); - assert(first->extensions() == result->extensions()); - ::thrust::for_each( - ::thrust::make_counting_iterator(0L), - ::thrust::make_counting_iterator(count*first->num_elements()), - [first, count, result, x = first->extensions()] __device__ (auto n){ - auto const ijk = (count*x).from_linear(n); - result.apply(ijk) = T2(first.apply(ijk)); - } - ); - return result_ + count; -} - -template -array_iterator> -copy_n( - array_iterator> first_ , Size count, - array_iterator> result_ -) { - MULTI_MARK_SCOPE("cuda copy_n 5D"); - array_iterator> first ; std::memcpy((void*)&first , (void const*)&first_ , sizeof(first_)); - array_iterator> result; std::memcpy((void*)&result, (void const*)&result_, sizeof(first_)); - static_assert( sizeof(first ) == sizeof(first_ ) ); - static_assert( sizeof(result) == sizeof(result_) ); - assert(first->extensions() == result->extensions()); - ::thrust::for_each( - ::thrust::make_counting_iterator(0L), - ::thrust::make_counting_iterator(count*first->num_elements()), - [first, count, result, x = first->extensions()] __device__ (auto n){ - auto const ijk = (count*x).from_linear(n); - result.apply(ijk) = T2(first.apply(ijk)); - } - ); - return result_ + count; -} - - -template* =0> -array_iterator> -copy( - array_iterator> first_ , array_iterator> last_, - array_iterator> result_ -) {return copy_n(first_, last_ - first_, result_);} - -#endif - -template{}>> -auto copy_n(iterator first, Size count, iterator> result) -->decltype(memcpy2D(result.base(), sizeof(T2)*stride(result), first.base(), sizeof(T1)*stride(first ), sizeof(T1), count), result + count){ - MULTI_MARK_SCOPE("cuda copy_n 1D cpu source gpu destination"); - return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first ), sizeof(T1), count), result + count;} - -template{}>> -auto copy_n(iterator> first, Size count, iterator result) -->decltype(memcpy2D(result.base(), sizeof(T2)*stride(result), first.base(), sizeof(T1)*stride(first), sizeof(T1), count), result+count){ - MULTI_MARK_SCOPE("cuda copy_n 1D gpu source cpu destination"); - return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), count), result+count;} - -//template{}>> -//auto copy_n(iterator> first, Size count, iterator> result) -//->decltype(memcpy2D(result.base(), sizeof(T2)*stride(result), first.base(), sizeof(T1)*stride(first), sizeof(T1), count), result + count){assert(0); -// return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), count), result + count;} - -template{}>> -auto copy_n(iterator> first, Size count, iterator> result) -->decltype(memcpy2D(result.base(), sizeof(T2)*stride(result), first.base(), sizeof(T1)*stride(first), sizeof(T1), count), result + count){ - MULTI_MARK_SCOPE("cuda copy_n 1D gpu managed source gpu managed destination"); - return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), count), result + count;} - -template{}>> -auto copy(iterator> first, iterator> last, iterator> result) -->decltype(copy_n(first, last - first, result)){ - return copy_n(first, last - first, result);} - -#endif - -#define ENABLE_IF class=std::enable_if_t - -template{}, int> =0 - ENABLE_IF{}> -// typename = std::enable_if_t{}> -> -auto copy_n(array_iterator> first, Size count, array_iterator> result) -//->decltype(memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), count), result + count) -{ return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), count), result + count;} - -template -auto copy(array_iterator> first, array_iterator> last, array_iterator> result) -//->decltype(cuda::copy_n(first, last - first, result)) -{ return cuda::copy_n(first, last - first, result);} - -//template -//auto copy(array_iterator> first, array_iterator> last, array_iterator> result){ -// std::cout << boost::stacktrace::stacktrace() << std::endl; -// assert(0); -//} - - -template{}, int> =0> -auto uninitialized_value_construct_n(ptr first, Size n){return uninitialized_fill_n(first, n, T{});} - -namespace managed { - -//template{}>> -//auto copy_n( -// array_iterator> first, Size n, -// array_iterator> result -//) -//->decltype(memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), n), result + n){ -// return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T1)*stride(first), sizeof(T1), n), result + n;} - -//template//, std::enable_if_t{}, int> =0> -//auto copy_n( -// managed::ptr first, Size count, -// managed::ptr result -//) -//->decltype(cuda::copy_n(cuda::ptr(first), count, cuda::ptr(result))){ -//{ return cuda::copy_n(cuda::ptr(first), count, cuda::ptr(result));} - - -template//, std::enable_if_t{}, int> =0> -auto copy_n( - managed::ptr first, Size count, - managed::ptr result -) -//->decltype(cuda::copy_n(cuda::ptr(first), count, cuda::ptr(result))){ -{ return cuda::copy_n(cuda::ptr(first), count, cuda::ptr(result));} - -template//, std::enable_if_t{}, int> =0> -auto copy( - managed::ptr first, managed::ptr last, - managed::ptr result -) -//->decltype(cuda::copy(first, last, result)){assert(0); -{ return cuda::copy(cuda::ptr(first), cuda::ptr(last), cuda::ptr(result)), result + (last - first);} - - -//template, T2>{},int> =0> -inline -managed::ptr> copy_n(managed::ptr /*first*/, std::size_t /*count*/, managed::ptr> result) { - assert(0); - return result; -} - - -template -auto copy_n( - array_iterator> first, Size count, - array_iterator> d_first -){ - return cuda::copy_n(array_iterator>(first), count, array_iterator>(d_first)), d_first + count; -} - -template -auto copy_n( - array_iterator> first, Size count, - array_iterator> d_first -){ - return cuda::copy_n(array_iterator>(first), count, array_iterator>(d_first)), d_first + count; -} - -template -auto copy_n( - array_iterator> first, Size count, - array_iterator> d_first -){ - return cuda::copy_n(array_iterator>(first), count, array_iterator>(d_first)), d_first + count; -} - -template -auto copy_n( - array_iterator> first, Size count, - array_iterator> d_first -){ - return cuda::copy_n(array_iterator>(first), count, array_iterator>(d_first)), d_first + count; -} - -template -auto copy_n( - array_iterator> first, Size count, - array_iterator> d_first -){ - return cuda::copy_n(array_iterator>(first), count, array_iterator>(d_first)), d_first + count; -} - - -template -auto copy( - array_iterator> first, - array_iterator> last, - array_iterator> d_first -){ - return managed::copy_n(first, last - first, d_first); -} - -template -auto copy( - array_iterator> first, - array_iterator> last, - array_iterator> d_first -){ - return managed::copy_n(first, last - first, d_first); -} - -template -auto copy( - array_iterator> first, - array_iterator> last, - array_iterator> d_first -){ - return managed::copy_n(first, last - first, d_first); -} - -template -auto copy( - array_iterator> first, - array_iterator> last, - array_iterator> d_first -){ - return managed::copy_n(first, last - first, d_first); -} - -template -auto copy( - array_iterator> first, - array_iterator> last, - array_iterator> d_first -){ - return managed::copy_n(first, last - first, d_first); -} - -template//, typename = std::enable_if_t{}>> -auto fill_n(managed::ptr first, Size count, U const& value){ - return fill_n(cuda::ptr(first), count, value), first + count; -} - -template{}, int> =0> -auto uninitialized_fill_n(managed::ptr first, Size n, V const& v){return fill_n(first, n, v);} - -template::value_type>{}, int> = 0> -auto alloc_uninitialized_copy(Alloc&, Ptr first, Ptr last, ForwardIt dest) -->decltype(cuda::copy(first, last, dest)){ - return cuda::copy(first, last, dest);} - -template::value_type>{}, int> = 0> -auto alloc_uninitialized_copy(Alloc&, Ptr first, Size n, ForwardIt dest) -->decltype(cuda::copy_n(first, n, dest)){ - return cuda::copy_n(first, n, dest);} - -template{}, int> =0> -auto uninitialized_default_construct_n(cuda::managed::ptr first, Size n){return first + n;} - -template{}, int> =0> -auto uninitialized_default_construct_n(cuda::managed::ptr, TP> first, Size n){ - return first + n; // TODO remove? -} - -template{}, int> =0> -auto destroy_n(cuda::managed::ptr first, Size n){return first + n;} - -} - -}} - -/* -template{}>> -array_iterator> copy_n( - T1* first, Size count, - array_iterator> result -){ - std::cout << "count " << std::endl; - return copy_n, 1, T1*>>(first, count, result); -}*/ - -#if 0 -template -auto copy( - array_iterator> f, array_iterator> l, - array_iterator> d -) -->decltype(copy_n(f, std::distance(f, l), d)){assert(stride(f)==stride(l)); - return copy_n(f, std::distance(f, l), d);} - -template -auto copy( - array_iterator f, array_iterator> l, - array_iterator> d -) -->decltype(copy_n(f, std::distance(f, l), d)){assert(stride(f)==stride(l)); - return copy_n(f, std::distance(f, l), d);} -#endif - -}} - -#define FWD(x) std::forward(x) - -//////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////// - -//#if not __INCLUDE_LEVEL__ // _TEST_MULTI_MEMORY_ADAPTORS_CUDA_ALGORITHM - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi initializer_list" -//#define BOOST_TEST_DYN_LINK -//#include - -//#include "../../../array.hpp" -//#include "../../../adaptors/cuda.hpp" - -//#include "../cuda/allocator.hpp" -//#include -//#include - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//BOOST_AUTO_TEST_CASE(copy_1d){ -// auto const A_cpu = []{ -// multi::array ret(10); -// std::generate( -// ret.data_elements(), ret.data_elements() + ret.num_elements(), std::rand -// ); -// return ret; -// }(); -// std::cout<<"memory size "<< A_cpu.num_elements()*sizeof(decltype(A_cpu)::element)/1e6 <<" MB\n"; -// { -// multi::array B_cpu(size(A_cpu)); -// boost::timer::auto_cpu_timer t{"cpu->cpu %ws wall, CPU (%p%)\n"}; -// B_cpu = A_cpu; -// } -// { -// multi::cuda::array A_gpu(size(A_cpu)); -// multi::cuda::array B_gpu(size(A_cpu)); -// boost::timer::auto_cpu_timer t{"gpu->gpu %ws wall, CPU (%p%)\n"}; -// B_gpu = A_gpu; -// cudaDeviceSynchronize(); -// } -// { -// multi::cuda::array B_gpu(size(A_cpu)); -// boost::timer::auto_cpu_timer t{"cpu->gpu %ws wall, CPU (%p%)\n"}; -// B_gpu = A_cpu; -// } -// multi::cuda::array C_cpu(size(A_cpu)); -// { -// multi::cuda::array B_gpu(size(A_cpu)); -// boost::timer::auto_cpu_timer t{"gpu->cpu %ws wall, CPU (%p%)\n"}; -// C_cpu = B_gpu; -// cudaDeviceSynchronize(); -// } -// { -// multi::cuda::managed::array const A_mng(size(A_cpu)); -// multi::cuda::managed::array B_mng(size(A_cpu)); -// { -// boost::timer::auto_cpu_timer t{"cold mng->mng %ws wall, CPU (%p%)\n"}; -// B_mng = A_mng; -// cudaDeviceSynchronize(); -// } -// { -// boost::timer::auto_cpu_timer t{"haut mng->mng %ws wall, CPU (%p%)\n"}; -// B_mng = A_mng; -// cudaDeviceSynchronize(); -// } -// } - -//// multi::array Bcpu(3); -//// Bcpu = B; -//// BOOST_REQUIRE( Bcpu[1] == 3. ); -//} - -//#if 0 -//BOOST_AUTO_TEST_CASE(multi_memory_adaptors_cuda_copy_2D){ -// multi::array A(50, 99.); -// multi::cuda::array B(50); -// BOOST_REQUIRE( size(B) == 50 ); - -//// using std::copy_n; -//// using std::copy; -// using boost::multi::adl::copy_n; -//// copy_n(&A[0], size(A), &B[0]); -// copy_n(begin(A), size(A), begin(B)); - -// multi::cuda::array D(50); -// copy_n(begin(B), size(B), begin(D)); - -// multi::array C(50, 88.); -// copy_n(begin(D), size(D), begin(C)); -//// C = B; - -//// BOOST_REQUIRE( C == A ); -//} - -//BOOST_AUTO_TEST_CASE(multi_cuda_managed_array_initialization_complex){ -// multi::cuda::managed::array B = {1. + 2.*I, 3. + 1.*I, 4. + 5.*I}; -// multi::array Bcpu(3); -// Bcpu = B; -// BOOST_REQUIRE( Bcpu[1] == 3. + 1.*I ); -//} - -//namespace utf = boost::unit_test; - -//#if 0 -//BOOST_AUTO_TEST_CASE(multi_memory_adaptors_cuda_algorithm, *utf::disabled()){ -// BOOST_REQUIRE(false); -// multi::cuda::array const A(10, 99.); -// multi::cuda::array B(10, 88.); -// B = A; - -// B() = A(); - -//// B.assign({1., 2., 3., 4.}); -// B = {1., 2., 3., 4.}; -// BOOST_REQUIRE( size(B) == 4 ); - -// B().assign({11., 22., 33., 44.});//.begin(), il.end()); -//// BOOST_REQUIRE( B[2] == 33. ); -///// B.assign - - -//// multi::cuda::array B({10, 10}, 88.); -//// B = A; -//#if 0 -// { -// cuda::allocator calloc; -// std::size_t n = 2e9/sizeof(double); -// [[maybe_unused]] cuda::ptr p = calloc.allocate(n); -// { -// boost::timer::auto_cpu_timer t; -// // using std::fill_n; fill_n(p, n, 99.); -// } -// // assert( p[0] == 99. ); -// // assert( p[n/2] == 99. ); -// // assert( p[n-1] == 99. ); -// [[maybe_unused]] cuda::ptr q = calloc.allocate(n); -// { -// boost::timer::auto_cpu_timer t; -// // multi::omp_copy_n(static_cast(p), n, static_cast(q)); -// // using std::copy_n; copy_n(p, n, q); -// using std::copy; copy(p, p + n, q); -// } -// { -// boost::timer::auto_cpu_timer t; -// // using std::copy_n; copy_n(p, n, q); -// } -// // assert( q[23] == 99. ); -// // assert( q[99] == 99. ); -// // assert( q[n-1] == 99. ); -// } -// { -// multi::array const A(100);//, double{99.}); -// multi::array> A_gpu = A; -// #pragma GCC diagnostic push // allow cuda element access -// #pragma GCC diagnostic ignored "-Wdeprecated-declarations" -// // assert( A_gpu[1] == A_gpu[0] ); -// #pragma GCC diagnostic pop -// } -//#endif -// { -// multi::array A({32, 8}, 99.); -// multi::array> A_gpu({32, 8}, 0.);// = A;//({32, 8000});// = A; -// } -// -//} -//#endif -//#endif -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/allocator.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/allocator.hpp deleted file mode 100644 index 84e1b3fdbb..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/allocator.hpp +++ /dev/null @@ -1,239 +0,0 @@ -#ifdef COMPILATION// -*- indent-tabs-mode:t;c-basic-offset:4;tab-width:4; -*- -$CXXX $CXXFLAGS $0 -o $0x -lcudart -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2020 - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_ALLOCATOR_HPP -#define MULTI_MEMORY_ADAPTORS_CUDA_ALLOCATOR_HPP - -#include // cudaMalloc - -#include "../../adaptors/cuda/ptr.hpp" -#include "../../adaptors/cuda/algorithm.hpp" - -#include "../../adaptors/cuda/clib.hpp" // cuda::malloc -#include "../../adaptors/cuda/cstring.hpp" // cuda::memcpy -#include "../../adaptors/cuda/malloc.hpp" - -#include // bad_alloc -#include -#include // debug - -#include - -#include - -namespace boost{namespace multi{ -namespace memory{namespace cuda{ - -struct bad_alloc : std::bad_alloc {}; - -//struct allocation_counter { -// static long n_allocations; -// static long n_deallocations; -// static long bytes_allocated; -// static long bytes_deallocated; -//}; - -//long allocation_counter::n_allocations = 0; -//long allocation_counter::n_deallocations = 0; -//long allocation_counter::bytes_allocated = 0; -//long allocation_counter::bytes_deallocated = 0; - -template -class allocator {//: protected allocation_counter { - static_assert(std::is_same>{}, - "allocated type should be a value type, not a reference or decorated type"); - - public: - using value_type = T; - using pointer = ptr; - using const_pointer = ptr; - using void_pointer = ptr; - using const_void_pointer = ptr; - using difference_type = typename pointer::difference_type; - template using rebind = allocator; - using size_type = ::size_t; // as specified by CudaMalloc - - allocator() = default; - template - allocator(allocator const& /*other*/) noexcept {} - - pointer allocate(size_type n, const_void_pointer = 0) {//const void* = 0) { - if(n == 0) return pointer{nullptr}; - auto ret = static_cast(cuda::malloc(n*sizeof(T))); - if(not ret) throw bad_alloc{}; - // ++n_allocations; bytes_allocated+=sizeof(T)*n; - return ret; - } - void deallocate(pointer p, size_type n) { - cuda::free(p); - // ++n_deallocations; bytes_deallocated+=sizeof(T)*n; - } - - std::true_type operator==(allocator const&) const {return {};} - std::false_type operator!=(allocator const&) const {return {};} - - template - [[deprecated("cuda slow")]] - void construct(P p, Args&&... args) = delete;/*{ - if(sizeof...(Args) == 0 and std::is_trivially_default_constructible{}) - cuda::memset(p, 0, sizeof(T)); - else{ - char buff[sizeof(T)]; - ::new(buff) T(std::forward(args)...); - cuda::memcpy(p, buff, sizeof(T)); - } - }*/ - template - [[deprecated("cuda slow")]] - void destroy(P p) { - if(not std::is_trivially_destructible{}) { - std::array buff; // char buff[sizeof(T)]; - cuda::memcpy(buff.data(), p, buff.size()); - ((T*)buff)->~T(); - } - } - -#if 0 - template//, typename T1 = typename std::iterator_traits::value_type> - auto alloc_uninitialized_copy_n(InputIt first, Size count, ForwardIt d_first) - DECLRETURN(adl_uninitialized_copy_n(first, count, d_first)) - - template::value_type> - auto alloc_uninitialized_move_n(InputIt first, Size count, ForwardIt d_first) - DECLRETURN(adl_uninitialized_move_n(first, count, d_first)) - - template::value_type> - auto alloc_uninitialized_copy(InputIt first, InputIt last, ForwardIt d_first) - DECLRETURN(adl_uninitialized_copy(first, last, d_first)) - -// DECLRETURN(adl_uninitialized_copy(first, count, d_first)) - template::value_type> - auto alloc_uninitialized_copy(InputIt first, Size count, ForwardIt d_first) - DECLRETURN(uninitialized_copy(first, count, d_first)) - template//, std::enable_if_t > - auto alloc_uninitialized_value_construct_n(Ptr p, Size n) - DECLRETURN(uninitialized_value_construct_n(p, n)) - - template - auto alloc_uninitialized_default_construct_n(Ptr p, Size n) - DECLRETURN(uninitialized_default_construct_n(p, n)) - - template{}, int> =0>// = typename Ptr::element_type> - Ptr alloc_uninitialized_fill_n(Ptr p, Size n, V const& v){ - return uninitialized_fill_n(p, n, v);} - template - static std::true_type is_complex_(std::complex); - static std::false_type is_complex_(...); - template struct is_complex : decltype(is_complex_(TT{})){}; - template< - class Ptr, class Size, class V = typename Ptr::element_type, - std::enable_if_t{} or is_complex{}, int> = 0 - > - Ptr alloc_uninitialized_default_construct_n(Ptr const& p, Size n) const{return p + n;} - template - Ptr alloc_destroy_n(Ptr p, Size n){ - if(std::is_trivially_destructible{}) { - } else {assert(0);} - return p + n; - } -#endif -}; - -template<> -class allocator {//: allocation_counter{ - public: - using T = std::max_align_t; - using value_type = T; - using pointer = ptr; - -// using void_pointer = ptr; - using const_void_pointer = ptr; -// using difference_type = typename pointer::difference_type; - - using size_type = ::size_t; // as specified by CudaMalloc - auto allocate(size_type n, const_void_pointer = 0) { // const void* = 0){ - if(n == 0) return pointer{nullptr}; - auto ret = static_cast(cuda::malloc(n*sizeof(T))); - if(not ret) throw bad_alloc{}; - // ++n_allocations; bytes_allocated+=sizeof(T)*n; - return ret; - } - void deallocate(pointer p, size_type n) { - cuda::free(p); - // ++n_deallocations; bytes_deallocated+=sizeof(T)*n; - } - std::true_type operator==(allocator const&) const {return {};} // template explicit for nvcc - std::false_type operator!=(allocator const&) const {return {};} - - template - void construct(/*[[maybe_unused]]*/ P p, Args&&...) {(void)p; assert(0);} // TODO investigate who is calling this - template - void destroy(P) {} // TODO(correaa) investigate who is calling this -}; - -}}}} - -namespace std { - -#if __NVCC__ // this solves this error with nvcc error: ‘template using __pointer = typename _Tp::pointer’ is protected within this context -template -class allocator_traits> { - using Alloc = boost::multi::memory::cuda::allocator; - - public: - using allocator_type = Alloc; - using value_type = typename Alloc::value_type; - using pointer = typename Alloc::pointer; - using const_pointer = typename Alloc::const_pointer; - using void_pointer = typename Alloc::void_pointer; - using const_void_pointer = typename Alloc::const_void_pointer; - using difference_type = typename Alloc::difference_type; - using size_type = typename Alloc::size_type; - using propagate_on_container_copy_assignment = std::false_type; - using propagate_on_container_move_assignment = std::false_type; - using propagate_on_container_swap = std::false_type; - template - using rebind_alloc = typename Alloc::template rebind; - - static constexpr Alloc select_on_container_copy_construction(Alloc const& a) {return a;} - - template static auto deallocate(allocator_type& a, As&&... as) {return a.deallocate(std::forward(as)...);} - template static auto allocate(allocator_type& a, As&&... as) {return a. allocate(std::forward(as)...);} -}; -#endif - -} // end namespace std - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi memory allocator" -//#define BOOST_TEST_DYN_LINK -//#include - -//#include -//#include -//#include - -//#include "../../../array.hpp" -//#include "../cuda/algorithm.hpp" - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//void add_one(double& d){d += 1.;} -//template void add_one(T&& t){std::forward(t) += 1.;} - -//template void what(T&&) = delete; -//using std::cout; - -//BOOST_AUTO_TEST_CASE(multi_memory_allocator){ -// { -// multi::static_array A(32, double{}); A[17] = 3.; -// multi::static_array> A_gpu = A; -// BOOST_REQUIRE( A_gpu[17] == 3 ); -// } -//} -//#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/allocator.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/allocator.hpp deleted file mode 100644 index 2bcba1eb18..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/allocator.hpp +++ /dev/null @@ -1,229 +0,0 @@ -//#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -//$CXXX $CXXFLAGS $0 -o $0x -lcudart&&$0x&&rm $0x;exit -//#endif - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_CACHED_ALLOCATOR_HPP -#define MULTI_MEMORY_ADAPTORS_CUDA_CACHED_ALLOCATOR_HPP - -#include "../../../adaptors/cuda/allocator.hpp" -#include "../../../adaptors/cuda/cached/ptr.hpp" - -#include "../../../adaptors/cuda/cached/clib.hpp" // cuda::malloc -#include "../../../adaptors/cuda/cached/malloc.hpp" - -#include -#include -#include // debug -#include -#include // bad_alloc -#include -#include - -namespace boost::multi::memory::cuda { - -namespace cached { - -template -class allocator_cache { - - struct block { - size_t size; - PointerType loc; - }; - - static const size_t max_size = 4ul*1024ul*1024ul*1024ul; - static const unsigned max_entries = 200; - std::list blocks_; - std::unordered_multimap map_; - size_t mem_used; - - public: - - allocator_cache() - : mem_used(0) {} - - auto put(size_t size, PointerType loc) { - - if(size >= max_size) return false; - - while(size + mem_used > max_size or map_.size() >= max_entries) { - assert(map_.size() > 0); - - cuda::cached::free(blocks_.back().loc); - mem_used -= blocks_.back().size; - auto range = map_.equal_range(blocks_.back().size); - for(auto it = range.first; it != range.second; ++it) { - if(it->second == --blocks_.end()) { - map_.erase(it); - break; - } - }; - blocks_.pop_back(); - } - - blocks_.emplace_front(block{size, loc}); - map_.emplace(size, blocks_.begin()); - assert(map_.size() == blocks_.size()); - mem_used += size; - - return true; - } - - PointerType get(size_t size) { - PointerType loc; - auto pos = map_.find(size); - if(pos != map_.end()) { - auto block_pos = pos->second; - loc = block_pos->loc; - blocks_.erase(block_pos); - map_.erase(pos); - mem_used -= size; - } else { - loc = nullptr; - } - assert(map_.size() == blocks_.size()); - return loc; - } -}; - -auto & cache() { - static allocator_cache> alloc_cache; - return alloc_cache; -} - -struct bad_alloc : std::bad_alloc {}; - -template -class allocator : cuda::allocator { - static_assert( std::is_same>{}, "!" ); - - public: - using value_type = T; - using pointer = cached::ptr; - using size_type = ::size_t; // as specified by CudaMalloc - using const_void_pointer = cached::ptr; - template using rebind = cached::allocator; - - pointer allocate(typename allocator::size_type n) { - MULTI_MARK_SCOPE("cuda::cached::allocate"); - - if(n == 0) return pointer{nullptr}; - - auto ret = static_cast(cache().get(n*sizeof(T))); - if(ret == pointer{nullptr}) { - ret = static_cast(cuda::cached::malloc(n*sizeof(T))); - if(!ret) throw bad_alloc{}; - // ++allocator::n_allocations; allocator::bytes_allocated+=sizeof(T)*n; - } - if(PrefetchDevice::value != -99) { - auto const code = cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), PrefetchDevice::value); - if(code != cudaSuccess) { - throw std::runtime_error{"cannot prefetch for reason "+std::to_string(code)+" device is "+std::to_string(PrefetchDevice::value)}; - } - } - return ret; - } - - pointer allocate(typename allocator::size_type n, const_void_pointer hint){ - auto const ret = allocate(n); - if(not hint) { - if(cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), /*device*/ 0) != cudaSuccess) {throw std::runtime_error{"cannot prefetch"};} - return ret; - } - - cudaPointerAttributes attr; if(cudaPointerGetAttributes(&attr, raw_pointer_cast(hint))!=cudaSuccess) {throw std::runtime_error{"cannot use attributes for hint"};} - switch(attr.type) { - case cudaMemoryTypeUnregistered: {//std::cout<< n <<" cudaMemoryTypeUnregistered"<< attr.device <<" "<< attr.device <<" cpuid:"<< cudaCpuDeviceId <>(p))) { - cuda::cached::free(static_cast>(p)); - } - } - - template - void construct(P p, Args&&... args) { - ::new(p.rp_) T(std::forward(args)...); - } - - template - void construct(P* p, Args&&... args) { - ::new(p) T(std::forward(args)...); - } - - template void destroy(P p) {p.rp_->~T();} - template void destroy(P* p) {p->~T();} - - constexpr bool operator==(allocator const&) const {return true;} - constexpr bool operator!=(allocator const&) const {return false;} - - template - constexpr ForwardIt alloc_uninitialized_copy(InputIt first, InputIt last, ForwardIt d_first) const { - return ForwardIt{adl_uninitialized_copy(first, last, d_first)}; - } - template - constexpr ForwardIt alloc_uninitialized_copy_n(InputIt first, Size count, ForwardIt d_first) const{ - return ForwardIt{adl_uninitialized_copy_n(first, count, d_first)}; - } - template - constexpr ForwardIt alloc_uninitialized_default_construct_n(ForwardIt first, Size n) const{ - return ForwardIt{adl_uninitialized_default_construct_n(first, n)}; - } - template - constexpr ForwardIt alloc_destroy_n(ForwardIt first, Size n) const{return ForwardIt{destroy_n(first, n)};} -}; - -} - -} // end namespace boost::multi::memory::cuda - -//#if not __INCLUDE_LEVEL__ - -//#include -//#include -//#include "../../../../array.hpp" - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//int main(){ - -// multi::array > A(32); -// A[17] = 3.; -// assert( A[17] == 3. ); - -//} -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/clib.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/clib.hpp deleted file mode 100644 index bd1e9a6771..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/clib.hpp +++ /dev/null @@ -1,46 +0,0 @@ -//#ifdef COMPILATION_INSTRUCTIONS -//(echo '#include "'$0'" '>$0.cpp)&&c++ -std=c++11 -Wall -Wextra -Wpedantic -Wfatal-errors -D_TEST_MULTI_MEMORY_ADAPTOR_CUDA_CACHED_MALLOC $0.cpp -lcudart -o $0x &&$0x&& rm $0x $0.cpp; exit -//#endif - -#ifndef MULTI_MEMORY_ADAPTOR_CUDA_CACHED_CLIB_HPP -#define MULTI_MEMORY_ADAPTOR_CUDA_CACHED_CLIB_HPP - -#include // cudaMallocCached - -#include "../../../adaptors/cuda/clib.hpp" // Cuda::free -#include "../../../adaptors/cuda/error.hpp" - -namespace Cuda { - namespace Cached { - inline error Malloc(void** p, size_t bytes){return static_cast(cudaMallocManaged(p, bytes/*, cudaMemAttachGlobal*/));} - inline void* malloc(size_t bytes){ - void* ret; - switch(auto e = Malloc(&ret, bytes)){ - case success : return ret; - case memory_allocation : return nullptr; - default : - throw std::system_error{e, "cannot allocate "+std::to_string(bytes)+" bytes in '"+__PRETTY_FUNCTION__+"'"}; - } - } - inline void free(void* p){return Cuda::free(p);} - } -} - - -//#ifdef _TEST_MULTI_MEMORY_ADAPTOR_CUDA_CACHED_MALLOC - -//#include "../../cuda/cached/ptr.hpp" - -//#include - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//using std::cout; - -//int main(){ -// void* p = Cuda::Cached::malloc(100); -// Cuda::Cached::free(p); -//} -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/malloc.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/malloc.hpp deleted file mode 100644 index 29edce995b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/malloc.hpp +++ /dev/null @@ -1,33 +0,0 @@ -//#ifdef COMPILATION_INSTRUCTIONS -//(echo '#include"'$0'" '>$0.cpp)&& `#nvcc -ccbin=cuda-`c++ -D_TEST_MULTI_MEMORY_ADAPTORS_CUDA_CACHED_MALLOC $0.cpp -o $0x -lcudart &&$0x&&rm $0x; exit -//#endif - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_CACHED_MALLOC_HPP -#define MULTI_MEMORY_ADAPTORS_CUDA_CACHED_MALLOC_HPP - -#include "../../../adaptors/cuda/cached/clib.hpp" -#include "../../../adaptors/cuda/cached/ptr.hpp" - -namespace boost {namespace multi { -namespace memory { - -namespace cuda { - -namespace cached { - [[nodiscard]] - inline cached::ptr malloc(size_t bytes) { - MULTI_MARK_SCOPE("cuda::cached::malloc"); - return cached::ptr{Cuda::Cached::malloc(bytes)}; - } - inline void free(cached::ptr p) { - MULTI_MARK_SCOPE("cuda::cached::free"); - Cuda::Cached::free(static_cast(p)); - } -} - -} - -} -}} - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/ptr.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/ptr.hpp deleted file mode 100644 index 50f3a728a3..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cached/ptr.hpp +++ /dev/null @@ -1,366 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0`&&$0.$X&&rm $0.$X;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_CACHED_PTR_HPP -#define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_CACHED_PTR_HPP - -#include // nullptr_t -#include // random_access_iterator_tag - -#include // is_const - -#include "../../cuda/ptr.hpp" - -#include "../../../../detail/memory.hpp" - -#include // cudaDeviceSynchronize - -#ifndef _DISABLE_CUDA_SLOW -#ifdef NDEBUG -#define SLOW deprecated("because it implies a slow element access to GPU memory") -#else -#define SLOW -#endif -#else -#define SLOW -#endif - -#ifndef HD -#ifdef __CUDA_ARCH__ -#define HD __host__ __device__ -#else -#define HD -#endif -#endif - -namespace boost{ -namespace serialization{ - template class array_wrapper; - template const array_wrapper make_array(T* t, S s); -}} - -namespace boost{namespace multi{ -namespace memory{namespace cuda{ - -namespace cached{ - -template struct ptr; - -template -struct ptr : cuda::ptr { - using T = void const; - using raw_pointer = RawPtr; -// raw_pointer rp_; - template friend struct ptr; - template friend ptr const_pointer_cast(ptr const&); - explicit ptr(raw_pointer rp) : cuda::ptr{rp} {} - - public: - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialized from nullptr - ptr(std::nullptr_t n) : cuda::ptr{n} {} - - template().rp_})> - // cppcheck-suppress noExplicitConstructor ; any pointer is convertible to void pointer - ptr(Other const& o) : cuda::ptr{o.rp_} {} - - ptr& operator=(ptr const&) = default; - - using pointer = ptr; - using element_type = typename std::pointer_traits::element_type; - using difference_type = void;//typename std::pointer_traits::difference_type; -// explicit operator bool() const{return rp_;} -// explicit operator raw_pointer&()&{return rp_;} - friend constexpr bool operator==(ptr const& self, ptr const& other) {return self.rp_ == other.rp_;} - friend constexpr bool operator!=(ptr const& self, ptr const& other) {return self.rp_ != other.rp_;} - - void operator*() const = delete; - template using rebind = ptr::template rebind>; -// friend raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} -}; - -template -struct ptr : cuda::ptr { - using pointer = ptr; - using element_type = void; - using difference_type = typename std::pointer_traits::difference_type; - - protected: - using raw_pointer = RawPtr; -// raw_pointer rp_; - - private: - ptr(ptr const& p) : cuda::ptr{const_cast(p.rp_)} {} - template friend ptr const_pointer_cast(ptr const&); - template friend struct ptr; - template friend class allocator; - - public: - template ptr(ptr const& p) : cuda::ptr{p.rp_} {} - explicit ptr(raw_pointer rp) : cuda::ptr{rp} {} - ptr() = default; - ptr(ptr const& p) = default; - - // cppcheck-suppress noExplicitConstructor ; initialized from nullptr - ptr(std::nullptr_t n) : cuda::ptr{n} {} - - template().impl_})> - // cppcheck-suppress noExplicitConstructor ; any pointer is convertible to void pointer - ptr(Other const& o) : cuda::ptr{o.rp_}{} - - ptr& operator=(ptr const&) = default; - - friend constexpr bool operator==(ptr const& self, ptr const& other){return self.rp_==other.rp_;} - friend constexpr bool operator!=(ptr const& self, ptr const& other){return self.rp_!=other.rp_;} - - template using rebind = ptr::template rebind>; - -// explicit operator bool() const {return this->rp_;} - explicit operator raw_pointer&()& {return this->rp_;} - - void operator*() = delete; - friend raw_pointer raw_pointer_cast(ptr const& self){return self.rp_;} -}; - -template > class allocator; - -template -struct ptr : cuda::ptr { - using raw_pointer = RawPtr; -// raw_pointer rp_; - - protected: - friend struct cuda::ptr; // to allow automatic conversions - template friend class allocator; - template friend struct ptr; -// template{}>::type> -// ptr(ptr const& p) : rp_{const_cast(p.impl_)}{} - template friend ptr const_pointer_cast(ptr const&); - - public: - template using rebind = ptr::template rebind>; -// explicit ptr(cuda::ptr const& other) : rp_{other.rp_}{} - - template>().rp_)>, raw_pointer>{}>> - // cppcheck-suppress noExplicitConstructor ; propagate implicit of underlying pointer - constexpr /*explicit(false)*/ ptr(ptr const& o) : cuda::ptr{static_cast(o.rp_)} {} - - template>().rp_)>, raw_pointer>{}>, typename = decltype(static_cast(std::declval>().rp_))> - constexpr explicit/*(true)*/ ptr(ptr const& o, void** = 0) : cuda::ptr{static_cast(o.rp_)} {} - - constexpr explicit ptr(void* vp) : cuda::ptr{static_cast(vp)} {} -// template>().rp_)>, raw_pointer>{}>> -// ptr(ptr const& o) HD : rp_{static_cast(o.rp_)}{} -// template>().rp_)>, raw_pointer>{}>> -// explicit ptr(ptr const& o, void** = 0) HD : rp_{static_cast(o.rp_)}{} - explicit ptr(cuda::ptr const& other) : ptr{other.rp_} { - // assert(other.rp_!=nullptr or Cuda::pointer::type(other.rp_) == cudaMemoryTypeCached); - } - constexpr explicit ptr(raw_pointer p) : cuda::ptr{p} {} - ptr() = default; - - // cppcheck-suppress noExplicitConstructor ; bug in cppcheck 2.3 - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - constexpr ptr(std::nullptr_t n) : cuda::ptr{n} {} - - ptr& operator=(ptr const&) = default; - friend constexpr bool operator==(ptr const& s, ptr const& o) {return s.rp_==o.rp_;} - friend constexpr bool operator!=(ptr const& s, ptr const& o) {return s.rp_!=o.rp_;} - - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; - using value_type = T; - using pointer = ptr; - using iterator_category = typename std::iterator_traits::iterator_category; // using iterator_concept = typename std::iterator_traits::iterator_concept; - explicit constexpr operator bool() const {return this->rp_;} -// bool operator not() const{return !rp_;} - constexpr -#ifndef MULTI_ALLOW_IMPLICIT_CPU_CONVERSION - explicit -#endif - operator raw_pointer() const& {return this->rp_;} // do not =delete - constexpr operator ptr() const {return ptr{this->rp_};} -// template -// decltype(auto) operator->*(PM pm) const{return *ptr*pm)>, decltype(&(rp_->*pm))>{&(rp_->*pm)};} - explicit constexpr operator typename std::pointer_traits::template rebind() const{return typename std::pointer_traits::template rebind{this->rp_};} - explicit operator typename std::pointer_traits::template rebind() const{return typename std::pointer_traits::template rebind{this->rp_};} - - constexpr ptr& operator++() {++(this->rp_); return *this;} // remove - constexpr ptr& operator--() {--(this->rp_); return *this;} // remove - - ptr operator++(int) {auto tmp = *this; ++(*this); return tmp;} // remove - ptr operator--(int) {auto tmp = *this; --(*this); return tmp;} // remove - - constexpr ptr& operator+=(typename ptr::difference_type n) {(this->rp_)+=n; return *this;} // remove - constexpr ptr& operator-=(typename ptr::difference_type n) HD {(this->rp_)-=n; return *this;} // remove - - constexpr ptr operator+(typename ptr::difference_type n) const {return ptr{(this->rp_) + n};} // remove - constexpr ptr operator-(typename ptr::difference_type n) const {return (*this) + (-n);} // remove - - using reference = typename std::pointer_traits::element_type&;//ref; - constexpr reference operator*() const {return *(this->rp_);} - constexpr reference operator[](difference_type n) const {return *(this->rp_ +n);} - - constexpr typename ptr::difference_type operator-(ptr const& other) const {return (this->rp_)-other.rp_;} - constexpr raw_pointer raw_pointer_cast() const& {return this->rp_;} // remove - friend raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} - friend cuda::ptr cuda_pointer_cast(ptr const& self) {return cuda::ptr{self.rp_};} -// constexpr operator cuda::ptr() const{return cuda::ptr{this->rp_};} - friend constexpr allocator> get_allocator(ptr const&) {return {};} // do not =delete - using default_allocator_type = allocator>; - default_allocator_type default_allocator() const {return {};} - - template//, std::enable_if_t{}, int> =0> - static auto copy_n( - cached::ptr first, Size count, - cached::ptr result - ) { - return adl_copy_n(cuda::ptr(first), count, cuda::ptr(result)), result + count; - } -public: - friend allocator> default_allocator_of(ptr const&){return {};} - - template //, typename FromElement> - friend constexpr ToPointer - reinterpret_pointer_cast(ptr self) { - using to_element = typename std::pointer_traits::element_type; - return ToPointer(reinterpret_cast(self.raw_pointer_cast())); - } -}; - -template const boost::serialization::array_wrapper make_array(ptr t, S s) { - using boost::serialization::make_array; - return make_array(raw_pointer_cast(t), s); -} - -} - -}} -}} - -#undef SLOW - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -#include "../../cuda/cached/clib.hpp" // cuda::malloc -#include "../../cuda/cached/malloc.hpp" - -#include -#include -#include - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -void add_one(double& d){d += 1.;} -template -void add_one(T&& t){std::forward(t) += 1.;} - -// * Functions with a __global__ qualifier, which run on the device but are called by the host, cannot use pass by reference. -//__global__ void set_5(cuda::ptr const& p){ -//__global__ void set_5(cuda::ptr p){*p = 5.;} -//__global__ void check_5(cuda::ptr p){assert(*p == 5.);} - -double const* g(){double* p{nullptr}; return p;} - -cuda::cached::ptr f(){ - return cuda::cached::ptr{nullptr}; -} - -cuda::cached::ptr ff(){ - return cuda::cached::ptr{cuda::ptr{nullptr}}; -} - -std::string full_overload(double*){return "cpu";} -std::string full_overload(cuda::ptr){return "gpu";} -std::string full_overload(cuda::cached::ptr){return "mng";} - -std::string cpugpu_overload(double*){return "cpu";} -std::string cpugpu_overload(cuda::ptr){return "gpu";} - -std::string cpuonly_overload(double*){return "cpu";} - -std::string gpuonly_overload(cuda::ptr){return "gpu";} - -template void what(T&&) = delete; - -int main(){ - - - f(); - using T = double; static_assert( sizeof(cuda::cached::ptr) == sizeof(T*) , "!"); - std::size_t const n = 100; - { - auto p = static_cast>(cuda::cached::malloc(n*sizeof(T))); - // cuda::cached::ptr vp = p; - // T* rp = p; - // void* vrp = p; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - *p = 99.; - if(*p != 99.) assert(0); - if(*p == 11.) assert(0); -#pragma GCC diagnostic pop - cuda::cached::free(p); - } - { - double d = 1.; - assert( full_overload(&d) == "cpu" ); - assert( cpugpu_overload(&d) == "cpu" ); - assert( cpugpu_overload(&d) == "cpu" ); - - cuda::ptr p = nullptr; - assert( full_overload(p) == "gpu" ); - assert( cpugpu_overload(p) == "gpu" ); - assert( gpuonly_overload(p) == "gpu" ); - - cuda::cached::ptr pm = nullptr; - assert( full_overload(pm) == "mng" ); - assert( cpugpu_overload(pm) == "gpu" ); - assert( cpuonly_overload(pm) == "cpu" ); - assert( gpuonly_overload(pm) == "gpu" ); - } - { - auto p = static_cast>(cuda::cached::malloc(n*sizeof(T))); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - double* ppp = p; *ppp = 3.14; - assert( *p == 3.14 ); -#pragma GCC diagnostic pop - // cuda::cached::ptr P = nullptr; - } - { - cuda::cached::ptr p = nullptr; - cuda::cached::ptr pc = nullptr; - assert( p == pc ); - pc = static_cast>(p); - // double* dp = cuda::cached::ptr{nullptr}; - auto f = [](double const*){}; - f(p); - // cuda::ptr pp = p; -// std::reinterpret_pointer_cast(pp); - // cuda::cached::ptr ppp{pp}; - } - { - static_assert(std::is_convertible, double*>{}); - } - { - auto p = static_cast>(cuda::cached::malloc(n*sizeof(T))); - cuda::ptr cp = p; - cuda::cached::ptr mcp{cp}; - } - { - static_assert(std::is_same>::rebind, cuda::cached::ptr>{}, "!"); - } - std::cout << "Finish" << std::endl; -} -#endif -#endif - - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/clib.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/clib.hpp deleted file mode 100644 index 3eb5559366..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/clib.hpp +++ /dev/null @@ -1,85 +0,0 @@ -//#ifdef COMPILATION_INSTRUCTIONS -//(echo '#include "'$0'"'>$0.cpp)&&nvcc -x cu `#-Wall -Wextra` -D_TEST_MULTI_MEMORY_ADAPTOR_CUDA_CLIB $0.cpp -o $0x -lcudart&&$0x&&rm $0x $0.cpp;exit -//#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef MULTI_MEMORY_ADAPTOR_CUDA_CLIB_HPP -#define MULTI_MEMORY_ADAPTOR_CUDA_CLIB_HPP - -#include // cudaMalloc - -#include "../../adaptors/cuda/error.hpp" -#include "../../../config/NODISCARD.hpp" - -namespace Cuda { - using namespace std::string_literals; - - using size_t = ::size_t; - inline error Malloc(void** p, size_t bytes) {return static_cast(cudaMalloc(p, bytes));} - [[nodiscard]] // because it will produce a memory leak - inline void* malloc(size_t bytes) { - void* ret; - switch(auto e = Malloc(&ret, bytes)){ - case success : return ret; - case memory_allocation : return nullptr; - default : - throw std::system_error{e, "cannot allocate "+std::to_string(bytes)+" bytes in '"+__PRETTY_FUNCTION__+"'"}; - } - } - inline error Free(void* p){return static_cast(cudaFree(p));} - inline void free(void* p){ - auto e = Free(p); - // probably will terminate if called from noexcept functon - if(Cuda::success!=e) throw std::system_error{e, "cannot "s +__PRETTY_FUNCTION__}; - } - - namespace pointer { - using attributes_t = cudaPointerAttributes; - inline error GetAttributes(attributes_t* ret, void* p){return static_cast(cudaPointerGetAttributes(ret, p));} - /* attributes_t attributes(void* p){ - attributes_t ret; - auto e = GetAttributes(&ret, p); - if(e!=success) throw std::system_error{e, "cannot "s+__PRETTY_FUNCTION__}; - return ret; - }*/ - inline bool is_device(void* p) { - attributes_t ret; - auto e = GetAttributes(&ret, p); - if(e!=success) throw std::system_error{e, "cannot "s+__PRETTY_FUNCTION__}; - return ret.devicePointer or p==nullptr; - // return attributes(p).devicePointer or p==nullptr; - } - inline auto type(void* p) { - attributes_t ret; - auto e = GetAttributes(&ret, p); - if(e!=success) throw std::system_error{e, "cannot "s+__PRETTY_FUNCTION__}; - return ret.type; - } - } -} - -//#ifdef _TEST_MULTI_MEMORY_ADAPTOR_CUDA_CLIB - -//#include "../cuda/ptr.hpp" -//#include "../cuda/cstring.hpp" - -//#include - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//using std::cout; - -//int main(){ -// { -// void* p = Cuda::malloc(100); -// Cuda::free(p); -// } -// { -// char* p = (char*)Cuda::malloc(1ul<<40); -// assert(!p); -// Cuda::free(p); -// } -//} -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cstring.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cstring.hpp deleted file mode 100644 index 3e7e107150..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/cstring.hpp +++ /dev/null @@ -1,117 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -$CXXX $CXXFLAGS $0 -o $0x -lcudart -lboost_unit_test_framework -lboost_timer&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_CSTRING_HPP -#define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_CSTRING_HPP - -#include "../../adaptors/cuda/ptr.hpp" -#include "../../adaptors/cuda/managed/ptr.hpp" - -#include // cudaMemcpy/cudaMemset - -#include - -namespace boost{ -namespace multi{ -namespace memory{ -namespace cuda{ -#if (__cpp_nontype_template_parameter_auto>=201606) or defined(__NVCC__) -template // requires c++17 -void call(Args&&... args){ - auto s = static_cast(CudaFunction(args...)); - if( s != Cuda::error::success ) throw std::system_error{make_error_code(s), "cannot call cuda function "}; -} -#endif - -template -auto call_static(std::string const& name){ - return [=](auto... args)->decltype(CublasFunction(args...), void()){ - std::cerr << "Calling function " << name << std::endl; - Cuda::error s = CublasFunction(args...); - if( s != Cuda::error::success ) throw std::system_error{make_error_code(s), "cannot call cuda function "}; - }; -} - -//template -//auto call_static(){return call_static(std::string{});} - -#define CUDA(FunctionPostfix) ::boost::multi::memory::cuda::call_static(#FunctionPostfix) - -namespace memcpy_{ -//https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g18fa99055ee694244a270e4d5101e95b - enum class kind : std::underlying_type_t{ - host_to_host=cudaMemcpyHostToHost, host_to_device=cudaMemcpyHostToDevice, - device_to_host=cudaMemcpyDeviceToHost, device_to_device=cudaMemcpyDeviceToDevice, - inferred = cudaMemcpyDefault, default_ = cudaMemcpyDefault - }; - template constexpr kind type(T1* , T2* ){return kind::host_to_host ;} - template constexpr kind type(ptr, T2* ){return kind::host_to_device ;} - template constexpr kind type(T1* , ptr){return kind::device_to_host ;} - template constexpr kind type(ptr, ptr){return kind::device_to_device;} - template constexpr kind type(managed::ptr, P2){return kind::inferred;} - template constexpr kind type(P1, managed::ptr){return kind::inferred;} - template constexpr kind type(managed::ptr, managed::ptr){return kind::inferred;} - [[deprecated]] constexpr kind type(...) {return kind::inferred; } -} - -template -Dest memcpy(Dest dest, Src src, std::size_t byte_count){ - cuda::call(static_cast(dest), static_cast(src), byte_count, static_cast(memcpy_::type(dest, src))); - return dest; -} - -inline ptr memset(ptr dest, int ch, std::size_t byte_count){ - cuda::call(static_cast(dest), ch, byte_count); - return dest; -} - -template -auto memcpy2D(VoidPDst dst, std::size_t dpitch, VoidPCSrc src, std::size_t spitch, std::size_t width, std::size_t height) -->decltype(cuda::call(static_cast(dst), dpitch, static_cast(src), spitch, width, height, static_cast(memcpy_::type(dst, src)))){ - return cuda::call(static_cast(dst), dpitch, static_cast(src), spitch, width, height, static_cast(memcpy_::type(dst, src)));} - -}}}} - -//#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -//#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA cstring" -//#define BOOST_TEST_DYN_LINK -//#include - -//#include "../../adaptors/cuda/allocator.hpp" - -//#include - -//#include - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//BOOST_AUTO_TEST_CASE(multi_memory_cuda_cstring){ - -// std::size_t const n = 2e9/sizeof(double); -// cuda::ptr p = cuda::allocator{}.allocate(n); -// { -// boost::timer::auto_cpu_timer t; -// memset(p, 0, n*sizeof(double)); -// } -// BOOST_REQUIRE( p[n/2]==0 ); -// CUDA_SLOW ( -// p[n/2] = 99.; -// ) -// cuda::ptr q = cuda::allocator{}.allocate(n); -// { -// boost::timer::auto_cpu_timer t; -// memcpy(q, p, n*sizeof(double)); -// } -// BOOST_REQUIRE( p[n/2] == 99. ); -// BOOST_REQUIRE( q[n/2] == 99. ); - -// double a = 5.; -// BOOST_REQUIRE(a == 5.); - -//} -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/error.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/error.hpp deleted file mode 100644 index 47f05ea07a..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/error.hpp +++ /dev/null @@ -1,99 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS//-*-indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4;-*- -$CXXX $CXXFLAGS $0 -o $0x `pkg-config --cflags --libs cudart-11.1`&&$0x&&rm $0x;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef MULTI_MEMORY_ADAPTOR_CUDA_DETAIL_ERROR_HPP -#define MULTI_MEMORY_ADAPTOR_CUDA_DETAIL_ERROR_HPP - -#include // cudaError_t -#include // cudaGetErrorString - -#include -#include // underlying_type - -namespace Cuda{ - -enum /*class*/ error : std::underlying_type::type{ - success = cudaSuccess, // = 0 The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see cudaEventQuery() and cudaStreamQuery()). - missing_configuration = cudaErrorMissingConfiguration, -// invalid_value /*invalid_argument*/ = cudaErrorInvalidValue, // = 1, This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values. - memory_allocation = cudaErrorMemoryAllocation, // = 2 // The API call failed because it was unable to allocate enough memory to perform the requested operation. - initialization_error = cudaErrorInitializationError, - lauch_failure = cudaErrorLaunchFailure, - lauch_timeout = cudaErrorLaunchTimeout, - lauch_out_of_resources = cudaErrorLaunchOutOfResources, - invalid_device_function = cudaErrorInvalidDeviceFunction, - invalid_configuration = cudaErrorInvalidConfiguration, - invalid_device = cudaErrorInvalidDevice, - invalid_value = cudaErrorInvalidValue, ///*invalid_argument*/ = cudaErrorInvalidValue, // = 1 This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values. - invalid_pitch_value = cudaErrorInvalidPitchValue, - invalid_symbol = cudaErrorInvalidSymbol, - unmap_buffer_object_failed = cudaErrorUnmapBufferObjectFailed, - invalid_device_pointer = cudaErrorInvalidDevicePointer, - invalid_texture = cudaErrorInvalidTexture, - invalid_texture_binding = cudaErrorInvalidTextureBinding, - invalid_channel_descriptor = cudaErrorInvalidChannelDescriptor, - invalid_memcpy_direction = cudaErrorInvalidMemcpyDirection, - invalud_filter_setting = cudaErrorInvalidFilterSetting, - invalid_norm_setting = cudaErrorInvalidNormSetting, - unknown = cudaErrorUnknown, - invalid_resource_handle = cudaErrorInvalidResourceHandle, - insuffient_driver = cudaErrorInsufficientDriver, - no_device = cudaErrorNoDevice, - set_on_active_process = cudaErrorSetOnActiveProcess, - startup_failure = cudaErrorStartupFailure, - invalid_ptx = cudaErrorInvalidPtx, - no_kernel_image_for_device = cudaErrorNoKernelImageForDevice, - jit_compiler_not_found = cudaErrorJitCompilerNotFound -}; - -inline std::string string(enum error e){return cudaGetErrorString(static_cast(e));} - -struct error_category : std::error_category{ - char const* name() const noexcept override{return "cuda wrapper";} - std::string message(int e) const override{return string(static_cast(e));} - static error_category& instance(){ - static error_category instance; - return instance; - } -}; - -inline std::error_code make_error_code(error err) noexcept{ - return {int(err), error_category::instance()}; -} - -} - -namespace std{template<> struct is_error_code_enum : true_type{};} - -#if not __INCLUDE_LEVEL__ - -#include - -using std::cout; - -int main(){ - - { - std::error_code ec = Cuda::error::memory_allocation; (void)ec; - } - try{ - auto e = Cuda::error::memory_allocation; // return from a cudaFunction - throw std::system_error{e, "I cannot do allocation"}; - }catch(std::system_error const& e){ - cout - <<"catched...\n" - <<"code: " << e.code() <<'\n' - <<"message: "<< e.code().message() <<'\n' - <<"what: " << e.what() <<'\n' - ; - } - -// auto e = Cuda::error::memory_allocation; // return from a cudaFunction -// throw std::system_error{e, "because"}; - -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/malloc.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/malloc.hpp deleted file mode 100644 index f28c360bd2..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/malloc.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo '#include"'$0'"'>$0.cpp)&&nvcc -D_TEST_MULTI_MEMORY_ADAPTORS_CUDA_MALLOC $0.cpp -o $0x &&$0x&&rm $0x; exit -#endif - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_MALLOC -#define MULTI_MEMORY_ADAPTORS_CUDA_MALLOC - -#include "../../adaptors/cuda/clib.hpp" -#include "../../adaptors/cuda/ptr.hpp" - -namespace boost{namespace multi{ -namespace memory{ - -namespace cuda{ - using size_t = Cuda::size_t; -#if __cplusplus >= 201703L -#if __has_cpp_attribute(nodiscard) >= 201603L - [[nodiscard]] -#endif -#endif - inline auto malloc(size_t bytes) -> ptr{return ptr{Cuda::malloc(bytes)};} - inline void free(ptr p){Cuda::free(p);} -} - -} -}} - -#ifdef _TEST_MULTI_MEMORY_ADAPTORS_CUDA_MALLOC - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -int main(){ - using cuda::ptr; - ptr p = static_cast>(cuda::malloc(100*sizeof(double))); - p[10] = 99.; - cuda::free(p); -} - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/allocator.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/allocator.hpp deleted file mode 100644 index e18d2ab492..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/allocator.hpp +++ /dev/null @@ -1,126 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0x -lcudart&&$0x&&rm $0x;exit -#endif - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_ALLOCATOR_HPP -#define MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_ALLOCATOR_HPP - -#include "../../../adaptors/cuda/allocator.hpp" -#include "../../../adaptors/cuda/managed/ptr.hpp" - -#include "../../../adaptors/cuda/managed/clib.hpp" // cuda::malloc -#include "../../../adaptors/cuda/managed/malloc.hpp" - -#include -#include -#include // debug -#include -#include // bad_alloc - -namespace boost{namespace multi{ -namespace memory{namespace cuda{ - -namespace managed{ - struct bad_alloc : std::bad_alloc{}; - - template - class allocator : cuda::allocator{ - static_assert( std::is_same>{}, "!" ); - public: - using value_type = T; - using pointer = managed::ptr; - using size_type = ::size_t; // as specified by CudaMalloc - using const_void_pointer = managed::ptr; - template using rebind = managed::allocator; - pointer allocate(typename allocator::size_type n){ - if(n == 0) return pointer{nullptr}; - auto ret = static_cast(cuda::managed::malloc(n*sizeof(T))); - if(!ret) throw bad_alloc{}; - if(PrefetchDevice::value != -99) - if(cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), PrefetchDevice::value) != cudaSuccess) throw std::runtime_error{"cannot prefetch for some reason"}; - // ++allocator::n_allocations; allocator::bytes_allocated+=sizeof(T)*n; - return ret; - } - pointer allocate(typename allocator::size_type n, const_void_pointer hint){ - MULTI_MARK_SCOPE("cuda::managed::allocate"); - - auto const ret = allocate(n); - if(not hint){ - if(cudaMemPrefetchAsync(raw_pointer_cast(ret), n*sizeof(T), /*device*/ 0) != cudaSuccess) throw std::runtime_error{"cannot prefetch"}; - return ret; - } - cudaPointerAttributes attr; if(cudaPointerGetAttributes(&attr, raw_pointer_cast(hint))!=cudaSuccess) throw std::runtime_error{"cannot use attributes for hint"}; - switch(attr.type){ - case cudaMemoryTypeUnregistered:{//std::cout<< n <<" cudaMemoryTypeUnregistered"<< attr.device <<" "<< attr.device <<" cpuid:"<< cudaCpuDeviceId <>(p)); - } - template - void construct(P p, Args&&... args){ - ::new(p.rp_) T(std::forward(args)...); - } - template - void construct(P* p, Args&&... args){ - ::new(p) T(std::forward(args)...); - } - template void destroy(P p){p.rp_->~T();} - template void destroy(P* p){p->~T();} - constexpr bool operator==(allocator const&) const{return true;} - constexpr bool operator!=(allocator const&) const{return false;} - template - constexpr ForwardIt alloc_uninitialized_copy(InputIt first, InputIt last, ForwardIt d_first) const{ - return ForwardIt{adl_uninitialized_copy(first, last, d_first)}; - } - template - constexpr ForwardIt alloc_uninitialized_copy_n(InputIt first, Size count, ForwardIt d_first) const{ - return ForwardIt{adl_uninitialized_copy_n(first, count, d_first)}; - } - template - constexpr ForwardIt alloc_uninitialized_default_construct_n(ForwardIt first, Size n) const{ - return ForwardIt{adl_uninitialized_default_construct_n(first, n)}; - } - template - constexpr ForwardIt alloc_destroy_n(ForwardIt first, Size n) const{return ForwardIt{destroy_n(first, n)};} - }; -} - -}}}} - -#if not __INCLUDE_LEVEL__ - -#include -#include -#include "../../../../array.hpp" - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -int main(){ - - multi::array > A(32); - A[17] = 3.; - assert( A[17] == 3. ); - -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/clib.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/clib.hpp deleted file mode 100644 index cf8249a89b..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/clib.hpp +++ /dev/null @@ -1,44 +0,0 @@ -//#ifdef COMPILATION_INSTRUCTIONS -//(echo '#include "'$0'"'>$0.cpp)&&c++ -std=c++11 -Wall -Wextra -Wpedantic -Wfatal-errors -D_TEST_MULTI_MEMORY_ADAPTOR_CUDA_MANAGED_MALLOC $0.cpp -lcudart -o $0x &&$0x&& rm $0x $0.cpp; exit -//#endif -#ifndef MULTI_MEMORY_ADAPTOR_CUDA_MANAGED_CLIB_HPP -#define MULTI_MEMORY_ADAPTOR_CUDA_MANAGED_CLIB_HPP - -#include // cudaMallocManaged - -#include "../../../adaptors/cuda/clib.hpp" // Cuda::free -#include "../../../adaptors/cuda/error.hpp" - -namespace Cuda { - namespace Managed { - inline error Malloc(void** p, size_t bytes) {return static_cast(cudaMallocManaged(p, bytes/*, cudaMemAttachGlobal*/));} - inline void* malloc(size_t bytes) { - void* ret; - switch(auto e = Malloc(&ret, bytes)) { - case success : return ret; - case memory_allocation : return nullptr; - default : - throw std::system_error{e, "cannot allocate "+std::to_string(bytes)+" bytes in '"+__PRETTY_FUNCTION__+"'"}; - } - } - inline void free(void* p) {return Cuda::free(p);} - } -} - -//#ifdef _TEST_MULTI_MEMORY_ADAPTOR_CUDA_MANAGED_MALLOC - -//#include "../../cuda/managed/ptr.hpp" - -//#include - -//namespace multi = boost::multi; -//namespace cuda = multi::memory::cuda; - -//using std::cout; - -//int main(){ -// void* p = Cuda::Managed::malloc(100); -// Cuda::Managed::free(p); -//} -//#endif -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/malloc.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/malloc.hpp deleted file mode 100644 index 721cff5fd6..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/malloc.hpp +++ /dev/null @@ -1,33 +0,0 @@ -//#ifdef COMPILATION_INSTRUCTIONS -//(echo '#include"'$0'" '>$0.cpp)&& `#nvcc -ccbin=cuda-`c++ -D_TEST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_MALLOC $0.cpp -o $0x -lcudart &&$0x&&rm $0x; exit -//#endif - -#ifndef MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_MALLOC_HPP -#define MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_MALLOC_HPP - -#include "../../../adaptors/cuda/managed/clib.hpp" -#include "../../../adaptors/cuda/managed/ptr.hpp" - -namespace boost {namespace multi { -namespace memory { - -namespace cuda { - -namespace managed { - [[nodiscard]] - inline managed::ptr malloc(size_t bytes) { - MULTI_MARK_SCOPE("cuda::managed::malloc"); - return managed::ptr{Cuda::Managed::malloc(bytes)}; - } - inline void free(managed::ptr p) { - MULTI_MARK_SCOPE("cuda::managed::free"); - Cuda::Managed::free(static_cast(p)); - } -} - -} - -} -}} - -#endif diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/ptr.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/ptr.hpp deleted file mode 100644 index 32b89d54f3..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/managed/ptr.hpp +++ /dev/null @@ -1,351 +0,0 @@ -#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0`&&$0.$X&&rm $0.$X;exit -#endif -// © Alfredo A. Correa 2019-2020 - -#ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP -#define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_MANAGED_PTR_HPP - -#include // nullptr_t -#include // random_access_iterator_tag - -#include // is_const - -#include "../../cuda/ptr.hpp" - -#include "../../../../detail/memory.hpp" - -#include // cudaDeviceSynchronize - -#ifndef _DISABLE_CUDA_SLOW -#ifdef NDEBUG -#define SLOW deprecated("because it implies a slow element access to GPU memory") -#else -#define SLOW -#endif -#else -#define SLOW -#endif - -#ifndef HD -#ifdef __CUDA_ARCH__ -#define HD __host__ __device__ -#else -#define HD -#endif -#endif - -namespace boost { -namespace serialization { - template class array_wrapper; - template const array_wrapper make_array(T* t, S s); -}} - -namespace boost {namespace multi { -namespace memory {namespace cuda { - -namespace managed{ - -template struct ptr; - -template -struct ptr : cuda::ptr { - using T = void const; - using raw_pointer = RawPtr; -// raw_pointer rp_; - template friend struct ptr; - template friend ptr const_pointer_cast(ptr const&); - explicit ptr(raw_pointer rp) : cuda::ptr{rp} {} - - public: - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialized from nullptr - ptr(std::nullptr_t n) : cuda::ptr{n} {} - - template().rp_})> - // cppcheck-suppress noExplicitConstructor ; any pointer can be converted to void pointer - ptr(Other const& o) : cuda::ptr{o} {} - ptr& operator=(ptr const&) = default; - - using pointer = ptr; - using element_type = typename std::pointer_traits::element_type; - using difference_type = void; -// explicit operator bool() const{return rp_;} -// explicit operator raw_pointer&()&{return rp_;} - friend constexpr bool operator==(ptr const& self, ptr const& o) {return self.rp_ == o.rp_;} - friend constexpr bool operator!=(ptr const& self, ptr const& o) {return self.rp_ != o.rp_;} - - void operator*() const = delete; - template using rebind = ptr::template rebind>; -}; - -template -struct ptr : cuda::ptr { - using pointer = ptr; - using element_type = void; - using difference_type = typename std::pointer_traits::difference_type; - - protected: - using raw_pointer = RawPtr; -// raw_pointer rp_; - - private: - ptr(ptr const& p) : cuda::ptr{const_cast(p.rp_)} {} - template friend ptr const_pointer_cast(ptr const&); - template friend struct ptr; - template friend class allocator; - - public: - template ptr(ptr const& p) : cuda::ptr{p.rp_} {} - explicit ptr(raw_pointer rp) : cuda::ptr{rp} {} - ptr() = default; - ptr(ptr const& p) = default; - - // cppcheck-suppress noExplicitConstructor ; initialized from nullptr - ptr(std::nullptr_t n) : cuda::ptr{n} {} - - template().impl_})> - // cppcheck-suppress noExplicitConstructor ; any pointer is convertible to void pointer - ptr(Other const& o) : cuda::ptr{o.rp_}{} - - ptr& operator=(ptr const&) = default; - - friend constexpr bool operator==(ptr const& self, ptr const& other) {return self.rp_==other.rp_;} - friend constexpr bool operator!=(ptr const& self, ptr const& other) {return self.rp_!=other.rp_;} - - template using rebind = ptr::template rebind>; - - explicit operator raw_pointer&()& {return this->rp_;} - - void operator*() = delete; - friend raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} -}; - -template > class allocator; - -template -struct ptr : cuda::ptr { - using raw_pointer = RawPtr; -// raw_pointer rp_; - - protected: - friend struct cuda::ptr; // to allow automatic conversions - template friend class allocator; - template friend struct ptr; -// template{}>::type> -// ptr(ptr const& p) : rp_{const_cast(p.impl_)}{} - template friend ptr const_pointer_cast(ptr const&); - - public: - template using rebind = ptr::template rebind>; -// explicit ptr(cuda::ptr const& other) : rp_{other.rp_}{} - - template>().rp_)>, raw_pointer>{}>> - // cppcheck-suppress noExplicitConstructor ; propagate implicit of underlying pointer - /*explicit(false)*/ constexpr ptr(ptr const& o) : cuda::ptr{static_cast(o.rp_)} {} - - template>().rp_)>, raw_pointer>{}>, typename = decltype(static_cast(std::declval>().rp_))> - explicit/*(true)*/ constexpr ptr(ptr const& o, void** = 0) : cuda::ptr{static_cast(o.rp_)} {} - - constexpr explicit ptr(void* vp) : cuda::ptr{static_cast(vp)}{} -// template>().rp_)>, raw_pointer>{}>> -// ptr(ptr const& o) HD : rp_{static_cast(o.rp_)}{} -// template>().rp_)>, raw_pointer>{}>> -// explicit ptr(ptr const& o, void** = 0) HD : rp_{static_cast(o.rp_)}{} - constexpr explicit ptr(cuda::ptr const& other) : ptr{other.rp_}{ - // assert(other.rp_!=nullptr or Cuda::pointer::type(other.rp_) == cudaMemoryTypeManaged); - } - constexpr explicit ptr(raw_pointer p) : cuda::ptr{p} {}//Cuda::pointer::is_device(p);} - ptr() = default; - - // cppcheck-suppress noExplicitConstructor ; bug in cppcheck 2.3 - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - constexpr ptr(std::nullptr_t n) : cuda::ptr{n}{} - - ptr& operator=(ptr const&) = default; - friend constexpr bool operator==(ptr const& s, ptr const& o) {return s.rp_==o.rp_;} - friend constexpr bool operator!=(ptr const& s, ptr const& o) {return s.rp_!=o.rp_;} - - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; - using value_type = T; - using pointer = ptr; - using iterator_category = typename std::iterator_traits::iterator_category; // using iterator_concept = typename std::iterator_traits::iterator_concept; - constexpr explicit operator bool() const {return this->rp_;} -// bool operator not() const{return !rp_;} - constexpr -#ifndef MULTI_ALLOW_IMPLICIT_CPU_CONVERSION - explicit -#endif - operator raw_pointer()const&{return this->rp_;} // do not =delete - constexpr operator ptr() const{return ptr{this->rp_};} -// template -// decltype(auto) operator->*(PM pm) const{return *ptr*pm)>, decltype(&(rp_->*pm))>{&(rp_->*pm)};} - explicit constexpr operator typename std::pointer_traits::template rebind() const {return typename std::pointer_traits::template rebind{this->rp_};} - explicit operator typename std::pointer_traits::template rebind() const {return typename std::pointer_traits::template rebind{this->rp_};} - constexpr ptr& operator++() {++(this->rp_); return *this;} // remove - constexpr ptr& operator--() {--(this->rp_); return *this;} // remove - ptr operator++(int) {auto tmp = *this; ++(*this); return tmp;} // remove - ptr operator--(int) {auto tmp = *this; --(*this); return tmp;} // remove - constexpr ptr& operator+=(typename ptr::difference_type n) {(this->rp_)+=n; return *this;} // remove - constexpr ptr& operator-=(typename ptr::difference_type n) HD {(this->rp_)-=n; return *this;} // remove - constexpr ptr operator+(typename ptr::difference_type n) const {return ptr{(this->rp_) + n};} // remove - constexpr ptr operator-(typename ptr::difference_type n) const {return (*this) + (-n);} // remove - using reference = typename std::pointer_traits::element_type&;//ref; - constexpr reference operator*() const {return *(this->rp_);} - constexpr reference operator[](difference_type n) const {return *((*this)+n);} - constexpr typename ptr::difference_type operator-(ptr const& other) const {return (this->rp_)-other.rp_;} - - constexpr raw_pointer raw_pointer_cast() const& {return this->rp_;} // remove - friend constexpr raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} - - friend cuda::ptr cuda_pointer_cast(ptr const& self) {return cuda::ptr{self.rp_};} -// constexpr operator cuda::ptr() const{return cuda::ptr{this->rp_};} - friend constexpr allocator> get_allocator(ptr const&) {return {};} // do not =delete - using default_allocator_type = allocator>; - default_allocator_type default_allocator() const {return {};} - - template//, std::enable_if_t{}, int> =0> - static auto copy_n( - managed::ptr first, Size count, - managed::ptr result - ){ - return adl_copy_n(cuda::ptr(first), count, cuda::ptr(result)), result + count; - } -public: - friend allocator> default_allocator_of(ptr const&){return {};} -}; - -template const boost::serialization::array_wrapper make_array(ptr t, S s){ - using boost::serialization::make_array; - return make_array(raw_pointer_cast(t), s); -} - -} - -}} -}} - -#undef SLOW - -#if defined(__INCLUDE_LEVEL__) and not __INCLUDE_LEVEL__ - -#include "../../cuda/managed/clib.hpp" // cuda::malloc -#include "../../cuda/managed/malloc.hpp" - -#include -#include -#include - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -void add_one(double& d){d += 1.;} -template -void add_one(T&& t){std::forward(t) += 1.;} - -// * Functions with a __global__ qualifier, which run on the device but are called by the host, cannot use pass by reference. -//__global__ void set_5(cuda::ptr const& p){ -//__global__ void set_5(cuda::ptr p){*p = 5.;} -//__global__ void check_5(cuda::ptr p){assert(*p == 5.);} - -double const* g(){double* p{nullptr}; return p;} - -cuda::managed::ptr f(){ - return cuda::managed::ptr{nullptr}; -} - -cuda::managed::ptr ff(){ - return cuda::managed::ptr{cuda::ptr{nullptr}}; -} - -std::string full_overload(double*){return "cpu";} -std::string full_overload(cuda::ptr){return "gpu";} -std::string full_overload(cuda::managed::ptr){return "mng";} - -std::string cpugpu_overload(double*){return "cpu";} -std::string cpugpu_overload(cuda::ptr){return "gpu";} - -std::string cpuonly_overload(double*){return "cpu";} - -std::string gpuonly_overload(cuda::ptr){return "gpu";} - -template void what(T&&) = delete; - -int main(){ - - - f(); - using T = double; static_assert( sizeof(cuda::managed::ptr) == sizeof(T*) , "!"); - std::size_t const n = 100; - { - auto p = static_cast>(cuda::managed::malloc(n*sizeof(T))); - // cuda::managed::ptr vp = p; - // T* rp = p; - // void* vrp = p; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - *p = 99.; - if(*p != 99.) assert(0); - if(*p == 11.) assert(0); -#pragma GCC diagnostic pop - cuda::managed::free(p); - } - { - double d = 1.; - assert( full_overload(&d) == "cpu" ); - assert( cpugpu_overload(&d) == "cpu" ); - assert( cpugpu_overload(&d) == "cpu" ); - - cuda::ptr p = nullptr; - assert( full_overload(p) == "gpu" ); - assert( cpugpu_overload(p) == "gpu" ); - assert( gpuonly_overload(p) == "gpu" ); - - cuda::managed::ptr pm = nullptr; - assert( full_overload(pm) == "mng" ); - assert( cpugpu_overload(pm) == "gpu" ); - assert( cpuonly_overload(pm) == "cpu" ); - assert( gpuonly_overload(pm) == "gpu" ); - } - { - auto p = static_cast>(cuda::managed::malloc(n*sizeof(T))); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - double* ppp = p; *ppp = 3.14; - assert( *p == 3.14 ); -#pragma GCC diagnostic pop - // cuda::managed::ptr P = nullptr; - } - { - cuda::managed::ptr p = nullptr; - cuda::managed::ptr pc = nullptr; - assert( p == pc ); - pc = static_cast>(p); - // double* dp = cuda::managed::ptr{nullptr}; - auto f = [](double const*){}; - f(p); - // cuda::ptr pp = p; -// std::reinterpret_pointer_cast(pp); - // cuda::managed::ptr ppp{pp}; - } - { - static_assert(std::is_convertible, double*>{}); - } - { - auto p = static_cast>(cuda::managed::malloc(n*sizeof(T))); - cuda::ptr cp = p; - cuda::managed::ptr mcp{cp}; - } - { - static_assert(std::is_same>::rebind, cuda::managed::ptr>{}, "!"); - } - std::cout << "Finish" << std::endl; -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/ptr.hpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/ptr.hpp deleted file mode 100644 index 561c57c572..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/ptr.hpp +++ /dev/null @@ -1,956 +0,0 @@ -//#ifdef COMPILATION// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -//$CXXX $CXXFLAGS $0 -o $0.$X `pkg-config --cflags --libs cudart-11.0` -lboost_unit_test_framework&&$0.$X&&rm $0.$X;exit -//#endif - -#ifndef BOOST_MULTI_MEMORY_ADAPTORS_CUDA_PTR_HPP -#define BOOST_MULTI_MEMORY_ADAPTORS_CUDA_PTR_HPP - -#include "../../adaptors/cuda/clib.hpp" -#include "../../adaptors/cuda/error.hpp" -#include "../../../array_ref.hpp" -#include "../../../complex.hpp" // adl_conj - -#include "../../../config/DEPRECATED.hpp" - -#include // debug -#include // exchange - -#include - -#ifndef _DISABLE_CUDA_SLOW - #ifdef NDEBUG - #define SLOW DEPRECATED("WARNING: slow memory operation") - #else - #define SLOW - #endif -#else - #define SLOW -#endif - -#define CUDA_SLOW(ExpR) NO_DEPRECATED(ExpR) - -namespace boost {namespace multi { -namespace memory {namespace cuda { - -template struct ref; - -template struct ptr; - -namespace managed{template struct ptr;} - -template -struct ptr { - using pointer = ptr; - using element_type = void const; -// using difference_type = void;//typename std::pointer_traits::difference_type; - - operator ::thrust::cuda::pointer() const {return ::thrust::cuda::pointer{rp_};} - - protected: - using raw_pointer = RawPtr; - template friend struct managed::ptr; - raw_pointer rp_; - template friend struct ptr; - template friend ptr const_pointer_cast(ptr const&); - explicit ptr(raw_pointer rp) : rp_{rp} {} - - public: - ptr() = default; - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - ptr(std::nullptr_t n) : rp_{n} {} - - template().rp_})> - // cppcheck-suppress noExplicitConstructor ; any other pointer can be converted to void const pointer - ptr(Other const& o) : rp_{o.rp_} {} - ptr& operator=(ptr const&) = default; - - explicit operator bool() const {return rp_;} - - friend constexpr bool operator==(ptr const& s, ptr const& o) {return s.rp_==o.rp_;} - friend constexpr bool operator!=(ptr const& s, ptr const& o) {return s.rp_!=o.rp_;} - - friend constexpr raw_pointer to_address(ptr const& self) {return self.rp_;} - friend raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} -}; - -template class allocator; - -template -struct ptr { - operator ::thrust::cuda_cub::pointer() const {return ::thrust::cuda_cub::pointer{rp_};} - - protected: - using T = void; - using raw_pointer = RawPtr; - using raw_pointer_traits = std::pointer_traits; - static_assert(std::is_same{}, "!"); - raw_pointer rp_; - friend ptr malloc(size_t); - friend void free(); - friend ptr memset(ptr dest, int ch, std::size_t byte_count); - template friend struct managed::ptr; - - protected: - template friend ptr const_pointer_cast(ptr const&); - template friend struct ptr; - explicit ptr(raw_pointer rp) : rp_{rp} {} - operator raw_pointer() const{return rp_;} - friend ptr malloc(std::size_t); - friend void free(ptr); - - public: - ptr() = default; - ptr(ptr const& other) : rp_{other.rp_}{}//= default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - ptr(std::nullptr_t n) : rp_{n}{} - - template().rp_})> - // cppcheck-suppress noExplicitConstructor ; any pointer can be converted to void pointer - ptr(Other const& o) : rp_{o.rp_}{} - - ptr& operator=(ptr const&) = default; - friend constexpr bool operator==(ptr const& s, ptr const& o){return s.rp_==o.rp_;} - friend constexpr bool operator!=(ptr const& s, ptr const& o){return s.rp_!=o.rp_;} - using pointer = ptr; - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; - template using rebind = ptr::template rebind>; -// using default_allocator_type = typename cuda::allocator::value_type>; - explicit operator bool() const{return rp_;} -// explicit operator raw_pointer&()&{return impl_;} - - friend constexpr raw_pointer to_address(ptr const& p) {return p.rp_;} -}; - -template -struct ptr { - operator ::thrust::cuda_cub::pointer() const {return ::thrust::cuda_cub::pointer{rp_};} - - using raw_pointer = RawPtr; - using default_allocator_type = typename cuda::allocator>; - raw_pointer rp_ = {}; - - static_assert( not std::is_same{} , "!"); - - protected: - using raw_pointer_traits = typename std::pointer_traits; - template friend class allocator; - - template friend struct ptr; - template friend struct ref; - - template friend ptr const_pointer_cast(ptr const&); - friend struct managed::ptr; - - public: - template using rebind = ptr::template rebind>; - - template>().rp_)>, raw_pointer>{} and not std::is_same{} >> - // cppcheck-suppress noExplicitConstructor ; - HD constexpr /*explicit(false)*/ ptr(ptr const& o) : rp_{static_cast(o.rp_)} {} - template>().rp_)>, raw_pointer>{} and not std::is_same{}>, typename = decltype(static_cast(std::declval>().rp_))> - HD constexpr explicit/*(true)*/ ptr(ptr const& o, void** = 0) : rp_{static_cast(o.rp_)} {} - HD constexpr explicit ptr(raw_pointer rp) : rp_{rp} {} - - template friend auto reinterpret_pointer_cast(ptr p) - ->decltype(ptr{reinterpret_cast(std::declval())}){ - return ptr{reinterpret_cast(p.rp_)};} - - template(std::declval().rp_))> - HD constexpr explicit ptr(Other const& o) : rp_{static_cast(o.rp_)}{} - ptr() = default; - - // cppcheck-suppress noExplicitConstructor ; bug in cppcheck 2.3 - ptr(ptr const&) = default; - - // cppcheck-suppress noExplicitConstructor ; initialize from nullptr - constexpr ptr(std::nullptr_t nu) : rp_{nu} {} - - ptr& operator=(ptr const&) = default; - - friend constexpr bool operator==(ptr const& s, ptr const& o) {return s.rp_==o.rp_;} - friend constexpr bool operator!=(ptr const& s, ptr const& o) {return s.rp_!=o.rp_;} - - using element_type = typename raw_pointer_traits::element_type; - using difference_type = typename raw_pointer_traits::difference_type; - using size_type = difference_type; - using value_type = T; - - using pointer = ptr; - using iterator_category = typename std::iterator_traits::iterator_category; - explicit constexpr operator bool() const {return rp_;} - explicit constexpr operator void const*() const {return rp_;} - template(raw_pointer{}))> - explicit constexpr operator TT*() const {return static_cast(rp_);} - ptr& operator++() { - static_assert(not std::is_same{}, "!"); - ++rp_; - return *this; - } - ptr& operator--() {--rp_; return *this;} - - ptr operator++(int) {auto tmp = *this; ++(*this); return tmp;} - ptr operator--(int) {auto tmp = *this; --(*this); return tmp;} - - constexpr ptr& operator+=(difference_type n) {rp_+=n; return *this;} - constexpr ptr& operator-=(difference_type n) {rp_-=n; return *this;} - - constexpr ptr operator+(difference_type n) const { - // static_cast(not std::is_same{} , "!"); - return ptr{rp_ + n}; - } - constexpr ptr operator-(difference_type n) const {return ptr{rp_ - n};} - - using reference = ref; - - [[deprecated("slow")]] constexpr auto operator*() const {return reference{*this};} - constexpr auto operator[](difference_type n) const {return reference{*((*this)+n)};} - - constexpr difference_type operator-(ptr const& o) const {return rp_-o.rp_;} - operator ptr() {return ptr{rp_};} - HD auto get() const {return rp_;} - - friend constexpr raw_pointer to_address(ptr const& p) {return p.rp_;} // TODO(correaa) consider returning T* , from https://en.cppreference.com/w/cpp/memory/to_address - explicit constexpr operator raw_pointer() const {return rp_;} - constexpr raw_pointer raw_pointer_cast() const {return this->rp_;} - friend constexpr raw_pointer raw_pointer_cast(ptr const& self) {return self.rp_;} - - template - constexpr auto operator->*(PM&& pm) const - ->decltype(ref*std::forward(pm))>>{ptr*std::forward(pm))>>{&(rp_->*std::forward(pm))}}) { - return ref*std::forward(pm))>>{ptr*std::forward(pm))>>{&(rp_->*std::forward(pm))}}; } - - public: - friend allocator> get_allocator(ptr const&) {return {};} - friend allocator> default_allocator_of(ptr const&) {return {};} -}; - -template -DEPRECATED("experimental function, it might be removed soon https://gitlab.com/correaa/boost-multi/-/issues/91") -T* raw_pointer_cast(T* p) {return p;} - -template allocator get_allocator(ptr const&){return {};} - -template< - class InputIt, class Size, class... T, class ForwardIt = ptr, - typename InputV = typename std::pointer_traits::element_type, - typename ForwardV = typename std::pointer_traits::element_type, - std::enable_if_t{}, int> =0 -> -ForwardIt uninitialized_copy_n(InputIt f, Size n, ptr d) { - memcpy(d, f, n*sizeof(ForwardV)); - return d + n; -} - -template::value_type, typename = std::enable_if_t{}>> -auto uninitialized_copy_n(It first, Size count, boost::multi::iterator> result) -//->decltype(memcpy2D(base(result), sizeof(T2)*stride(result), first, sizeof(T)*stride(first), sizeof(T), count), result + count){ -{ return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T)*stride(first), sizeof(T), count), result + count;} - -template::value_type, typename = std::enable_if_t{}>> -auto uninitialized_move_n(It first, Size count, boost::multi::iterator> result) -//->decltype(memcpy2D(base(result), sizeof(T2)*stride(result), first, sizeof(T)*stride(first), sizeof(T), count), result + count){ -{ return memcpy2D(base(result), sizeof(T2)*stride(result), base(first), sizeof(T)*stride(first), sizeof(T), count), result + count;} - -template>::element_type> -auto uninitialized_move_n(ptr first, Size n, ptr dest) { - assert(( std::is_trivially_constructible{} )); - memcpy(dest, first, n*sizeof(Element)); - return dest + n; -} - - -// copy_n - -template -auto copy_n( - boost::multi::elements_iterator_t< Q1*, L1> first, Size count, - boost::multi::elements_iterator_t, L2> d_first -)-> boost::multi::elements_iterator_t, L2> { - copy_n( - first, count, - static_cast, L2>>(d_first) - ); - return d_first + count; -} - -template -auto copy_n( - boost::multi::elements_iterator_t, L1> first, Size count, - boost::multi::elements_iterator_t< Q2*, L2> d_first -)-> boost::multi::elements_iterator_t< Q2*, L2> { - copy_n( - static_cast, L1>>( first), count, - d_first - ); - return d_first + count; -} - -template -auto copy( - boost::multi::elements_iterator_t< Q1*, L1> first, - boost::multi::elements_iterator_t< Q1*, L1> last , - boost::multi::elements_iterator_t, L2> d_first -)-> boost::multi::elements_iterator_t, L2> { - return copy_n(first, last - first, d_first); -} - -template -auto copy( - boost::multi::elements_iterator_t, L1> first, - boost::multi::elements_iterator_t, L1> last , - boost::multi::elements_iterator_t< Q2*, L2> d_first -)-> boost::multi::elements_iterator_t< Q2*, L2> { - return copy_n(first, last - first, d_first); -} - - -//template< -// multi::dimensionality_type D, -// class T1, class Q1, -// class Size, -// class T2, class Q2 -//> -//auto copy_n( -// boost::multi::array_iterator> first , Size count, -// boost::multi::array_iterator d_first -//)-> boost::multi::array_iterator { -// copy_n( -// static_cast>>( first), count, -// d_first -// ); -// return d_first + count; -//} - -//template< -// multi::dimensionality_type D, -// class T1, class Q1, -// class Size, -// class T2, class Q2 -//> -//auto copy_n( -// boost::multi::array_iterator> first , Size count, -// boost::multi::array_iterator> d_first -//)-> boost::multi::array_iterator> { -// copy_n( -// static_cast>>( first), count, -// static_cast>>(d_first) -// ); -// return d_first + count; -//} - -// copy - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto copy( - boost::multi::array_iterator first, - boost::multi::array_iterator last , - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - copy_n(first, last - first, static_cast>>(d_first)); - return d_first + (last - first); -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto copy( - boost::multi::array_iterator> first, - boost::multi::array_iterator> last , - boost::multi::array_iterator d_first -)-> boost::multi::array_iterator { - return copy_n(first, last - first, d_first); -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto copy( - boost::multi::array_iterator> first, - boost::multi::array_iterator> last , - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - return copy_n(first, last - first, d_first); -} - -// uninitialized_copy_n - -template< - multi::dimensionality_type D, - class T1, class Q1, - class Size, - class T2, class Q2 -> -auto uninitialized_copy_n( - boost::multi::array_iterator first , Size count, - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - uninitialized_copy_n( - first , count, - static_cast>>(d_first) - ); - return d_first + count; -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class Size, - class T2, class Q2 -> -auto uninitialized_copy_n( - boost::multi::array_iterator> first , Size count, - boost::multi::array_iterator d_first -)-> boost::multi::array_iterator { - uninitialized_copy_n( - static_cast>>( first), count, - d_first - ); - return d_first + count; -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class Size, - class T2, class Q2 -> -auto uninitialized_copy_n( - boost::multi::array_iterator> first , Size count, - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - uninitialized_copy_n( - static_cast>>( first), count, - static_cast>>(d_first) - ); - return d_first + count; -} - -// uninitalized copy - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto uninitialized_copy( - boost::multi::array_iterator first, - boost::multi::array_iterator last , - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - return uninitialized_copy_n(first, last - first, static_cast>>(d_first)); -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto uninitialized_copy( - boost::multi::array_iterator> first, - boost::multi::array_iterator> last , - boost::multi::array_iterator d_first -)-> boost::multi::array_iterator { - return uninitialized_copy_n(first, last - first, d_first); -} - -template< - multi::dimensionality_type D, - class T1, class Q1, - class T2, class Q2 -> -auto uninitialized_copy( - boost::multi::array_iterator> first, - boost::multi::array_iterator> last , - boost::multi::array_iterator> d_first -)-> boost::multi::array_iterator> { - return uninitialized_copy_n(first, last - first, d_first); -} - -template< - class Alloc, class InputIt, class Size, class... T, class ForwardIt = ptr, - typename InputV = typename std::pointer_traits::element_type, - typename ForwardV = typename std::pointer_traits::element_type -// , typename = std::enable_if_t{}> -> -ForwardIt alloc_uninitialized_copy_n(Alloc&, InputIt f, Size n, ptr d){ - if(std::is_trivially_constructible{}) { - memcpy(d, f, n*sizeof(ForwardV)); // TODO, this is not correct whe InputIt is not a pointer - return d + n; - } else {assert(0);} - return d; -} - -template> -ForwardIt alloc_uninitialized_move_n(Alloc& a, InputIt f, Size n, ptr d) { - return alloc_uninitialized_copy_n(a, f, n, d); -} - -template -ptr const_pointer_cast(ptr const& p){return ptr{p.impl_};} - -template -static std::true_type is_ref_aux(ref const&); -std::false_type is_ref_aux(...); - -template struct is_ref : decltype(is_ref_aux(std::declval())){}; - -template -struct ref { - using value_type = T; - using reference = value_type&; - using pointer = ptr; - using raw_reference = value_type&; - - private: - pointer pimpl_; - constexpr explicit ref(pointer const& p) : pimpl_{p}{} - template friend struct ref; - - public: - constexpr explicit ref(T& t) : pimpl_{&t} {} - template(std::declval>().pimpl_))> - /*explicit(false)*/ constexpr ref(ref&& o) /*HD*/ : pimpl_{multi::implicit_cast(std::move(o).pimpl_)} {} - template>())>, pointer>{}>> - explicit/*(true) */ constexpr ref(ref const& o, void** = 0) /*HD*/ : pimpl_{static_cast(o)} {} - template friend struct ptr; - - pointer operator&() & __host__ __device__ {return pimpl_;} - pointer operator&() const& __host__ __device__ {return pimpl_;} - pointer operator&() && __host__ __device__ {return pimpl_;} - - struct skeleton_t { - std::array buff; T* p_; - SLOW - explicit skeleton_t(T* p) /*HD*/ : p_{p} { - #if __CUDA_ARCH__ - #else - {cudaError_t s = cudaMemcpy(buff.data(), p_, buff.size(), cudaMemcpyDeviceToHost); (void)s; assert(s == cudaSuccess);} - #endif - } - operator T&()&& /*HD*/{return reinterpret_cast(buff);} - void conditional_copyback_if_not(std::false_type) const /*HD*/{ - #if __CUDA_ARCH__ - // *p_ = reinterpret_cast( - #else - {cudaError_t s = cudaMemcpy(p_, buff.data(), buff.size(), cudaMemcpyHostToDevice); (void)s; assert(s == cudaSuccess);} - #endif - } - void conditional_copyback_if_not(std::true_type) const /*HD*/{ - #if __CUDA_ARCH__ - // *p_ = reinterpret_cast( - #else - // [[maybe_unused]] - cudaError_t s = cudaMemcpy(p_, buff.data(), buff.size(), cudaMemcpyHostToDevice); - (void)s; assert(s == cudaSuccess); - #endif - } - ~skeleton_t() /*HD*/{conditional_copyback_if_not(std::is_const{});} - }; - skeleton_t skeleton()&& /*HD*/{return skeleton_t{raw_pointer_cast(pimpl_.rp_)};} - - public: - constexpr ref(ref&& r) : pimpl_{std::move(r.pimpl_).rp_} {} - - private: - ref& move_assign(ref&& other, std::true_type) & { - cudaError_t s = cudaMemcpy(pimpl_.rp_, other.rp_, sizeof(T), cudaMemcpyDeviceToDevice); (void)s; assert(s == cudaSuccess); - return *this; - } - ref& move_assign(ref&& other, std::false_type) & { - cudaError_t s = cudaMemcpy(pimpl_.rp_, other.rp_, sizeof(T), cudaMemcpyDeviceToDevice); (void)s; assert(s == cudaSuccess); - return *this; - } - - public: - template{}, int> =0> - [[deprecated]] - ref&& operator=(ref const& other) && { - cudaError_t s = cudaMemcpy(pimpl_.rp_, other.pimpl_.rp_, sizeof(T), cudaMemcpyDeviceToDevice); assert(s==cudaSuccess); (void)s; - return std::move(*this); - } -#if __CUDA__ -#ifdef __NVCC__ - #ifndef __CUDA_ARCH__ - template - [[deprecated]] - __host__ auto operator=(TT&& t) && - ->decltype(*pimpl_.rp_ = std::forward(t), std::move(*this)){ - assert(0); return std::move(*this);} - #else - template - __device__ auto operator=(TT&& t) && - ->decltype(*pimpl_.rp_ = std::forward(t), std::move(*this)){ - return *pimpl_.rp_ = std::forward(t), std::move(*this);} - #endif -#else - template - [[deprecated("because it implies slow memory access, suround code with CUDA_SLOW")]] - __host__ auto operator=(TT&& t) && - ->decltype(*pimpl_.rp_ = std::forward(t), std::move(*this)){ // assert(0); - static_assert(std::is_trivially_assignable{}, "!"); - cudaError_t s=cudaMemcpy(pimpl_.rp_, std::addressof(t), sizeof(T), cudaMemcpyHostToDevice);assert(s==cudaSuccess);(void)s; - return std::move(*this); - } - template - __device__ ref&& operator=(TT&& t) &&{*pimpl_.rp_ = std::forward(t); return std::move(*this);} -#endif -#else - template{}> > - SLOW - ref&& operator=(TT const& t) &&{ - static_assert(std::is_trivially_assignable{}); - cudaError_t s=cudaMemcpy(pimpl_.rp_, std::addressof(t), sizeof(T), cudaMemcpyHostToDevice);assert(s==cudaSuccess);(void)s; - return std::move(*this); - } -#endif - -#if defined(__clang__) -#if defined(__CUDA__) //&& !defined(__CUDA_ARCH__) - operator T()&& __device__{return *(pimpl_.rp_);} - operator T()&& __host__ {static_assert( std::is_trivially_copyable>{}, "!" ); - typename std::aligned_storage::type ret; - {cudaError_t s=cudaMemcpy((void*)&ret, pimpl_.rp_, sizeof(T), cudaMemcpyDeviceToHost); assert(s == cudaSuccess); (void)s;} - return *reinterpret_cast(&ret); - } - operator T() const& __host__ {static_assert( std::is_trivially_copyable>{}, "!" ); - typename std::aligned_storage::type ret; - {cudaError_t s=cudaMemcpy((void*)&ret, pimpl_.rp_, sizeof(T), cudaMemcpyDeviceToHost); assert(s == cudaSuccess); (void)s;} - return *reinterpret_cast(&ret); - } -#else - SLOW operator T() && { - std::array buff; - cudaError_t s = cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost); - switch(s) { - case cudaSuccess : break; - case cudaErrorInvalidValue : throw std::runtime_error{"cudaErrorInvalidValue"}; - case cudaErrorInvalidMemcpyDirection: throw std::runtime_error{"cudaErrorInvalidMemcpyDirection"}; - default : throw std::runtime_error{"unknown error"}; - } - return std::move(reinterpret_cast(buff)); - } - SLOW operator T() const&{ - std::array buff; // char buff[sizeof(T)]; - cudaError_t s = cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost); - switch(s) { - case cudaSuccess : break; - case cudaErrorInvalidValue : throw std::runtime_error{"cudaErrorInvalidValue"}; - case cudaErrorInvalidMemcpyDirection: throw std::runtime_error{"cudaErrorInvalidMemcpyDirection"}; - default : throw std::runtime_error{"unknown error"}; - } - return reinterpret_cast(buff); - } -#endif -#else // no clang -#if __CUDA_ARCH__ - operator T()&& __device__{return *(pimpl_.rp_);} -#else - SLOW - operator T() && __host__ { - std::array buff; // char buff[sizeof(T)]; - {cudaError_t s = cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost); assert(s == cudaSuccess); (void)s;} - return std::move(reinterpret_cast(buff)); - } -#endif -#if defined(__clang__) - [[SLOW]] operator T() const& __host__{ - std::array buff; // char buff[sizeof(T)]; - { - // cudaError_t s = cudaMemcpy(buff, this->rp_, sizeof(T), cudaMemcpyDeviceToHost); - auto e = static_cast(cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost)); - if(e != Cuda::error::success) {throw std::system_error(e, " when trying to memcpy for element access");} - } - return std::move(reinterpret_cast(buff)); - } - operator T() const& __device__{return *(pimpl_.rp_);} -#else //no clang -#if __CUDA_ARCH__ - operator T() const& __device__{return *(pimpl_.rp_);} -#else - SLOW - operator T() const& __host__{ - std::array buff; // char buff[sizeof(T)]; - { - auto e = static_cast(cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost)); - if(e != Cuda::error::success) {throw std::system_error(e, " when trying to memcpy for element access");} - } - return std::move(reinterpret_cast(buff)); - } -#endif -#endif - #endif - -#ifndef _MULTI_MEMORY_CUDA_DISABLE_ELEMENT_ACCESS - bool operator!=(ref const& other) const&{return not(*this == other);} - template - bool operator!=(ref&& other)&&{ - std::array buff1; // char buff1[sizeof(T)]; - {cudaError_t s1 = cudaMemcpy(buff1.data(), this->impl_, buff1.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); (void)s1;} - std::array buff2; - {cudaError_t s2 = cudaMemcpy(buff2.data(), other.impl_, buff2.size(), cudaMemcpyDeviceToHost); assert(s2 == cudaSuccess); (void)s2;} - return reinterpret_cast(buff1) != reinterpret_cast(buff2); - } -#else -// bool operator==(ref const& other) const = delete; -#endif -#if 1 - -#if defined(__clang__) -#if defined(__CUDA__) && defined(__CUDA_ARCH__) - template>{}> > - friend auto operator==(ref&& self, Other&& other) __host__{ -//#if __CUDA_ARCH__ -// return std::forward(other)==*(this->rp_); -// return *(self->rp_) == std::forward(other); -//#else - return std::move(self).operator T() == std::forward(other); - // return static_cast(std::move(self)) == std::forward(other); -//#endif - } - template{}> > - friend auto operator==(ref&& self, Other&& other) __device__ { - return *(self->rp_) == std::forward(other); - } -#else - template{}> > - friend auto operator==(ref&& self, Other&& other) __host__ { - return std::move(self).operator T() == std::forward(other); - } -#endif -#else // no clang - template{}> > - friend auto operator==(ref&& self, Other&& other) /*HD*/{ -// #if __CUDA_ARCH__ -// return *(self.pimpl_.rp_) == std::forward(other); -// #else - return static_cast(std::move(self)) == std::forward(other); -// #endif - } -#endif - - friend __host__ __device__ decltype(auto) raw_reference_cast(ref&& r) {return *raw_pointer_cast(&r);} - friend __host__ __device__ auto raw_value_cast(ref&& r) {return std::move(r).operator T();} - auto raw_value_cast() && {return std::move(*this).operator T();} - - template{}> > - friend constexpr bool operator==(Other&& other, ref const& self) { -#if __CUDA_ARCH__ -// return std::forward(other)==*(this->rp_); - return std::forward(other)==*(self.pimpl_); -#else - return std::forward(other)== self.operator T();//static_cast(std::move(self)); -#endif - } - template{}> > - SLOW - bool operator==(ref&& other) && { - std::array buff1; // char buff1[sizeof(T)]; - // cuda::memcpy(buff1, ref::rp_, sizeof(T)); - {cudaError_t s1 = cudaMemcpy(buff1.data(), pimpl_.rp_, buff1.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); (void)s1;} - std::array buff2; // char buff2[sizeof(Other)]; - {cudaError_t s2 = cudaMemcpy(buff2.data(), raw_pointer_cast(&other), buff2.size(), cudaMemcpyDeviceToHost); assert(s2 == cudaSuccess); (void)s2;} - return reinterpret_cast(buff1) == reinterpret_cast(buff2); - } -#if 1 - SLOW - bool operator==(ref const& other) &&{ - std::array buff1; // char buff1[sizeof(T)]; - {cudaError_t s1 = cudaMemcpy(buff1.data(), pimpl_.rp_, buff1.size(), cudaMemcpyDeviceToHost); assert(s1 == cudaSuccess); (void)s1;} - std::array buff2; // char buff2[sizeof(T)]; - {cudaError_t s2 = cudaMemcpy(buff2.data(), other.pimpl_.rp_, buff2.size(), cudaMemcpyDeviceToHost); assert(s2 == cudaSuccess); (void)s2;} - return reinterpret_cast(buff1) == reinterpret_cast(buff2); - } -#endif -#endif - -#if __CUDA_ARCH__ - template() += std::declval())> __device__ ref& operator+=(O&& o) && {*(pimpl_.rp_) += std::forward(o); return std::move(*this);} - template() -= std::declval())> __device__ ref& operator-=(O&& o) && {*(pimpl_.rp_) -= std::forward(o); return std::move(*this);} -#else - template() += std::declval())> __host__ SLOW ref&& operator+=(O&& o) && { - std::array buff; - {cudaError_t s = cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost); assert(s == cudaSuccess); (void)s;} - reinterpret_cast(buff) += std::forward(o); - {cudaError_t s = cudaMemcpy(pimpl_.rp_, buff.data(), buff.size(), cudaMemcpyHostToDevice); assert(s == cudaSuccess); (void)s;} - return std::move(*this); - } - template() -= std::declval())> __host__ SLOW ref&& operator-=(O&& o) && { - std::array buff; - {cudaError_t s = cudaMemcpy(buff.data(), pimpl_.rp_, buff.size(), cudaMemcpyDeviceToHost); assert(s == cudaSuccess); (void)s;} - reinterpret_cast(buff) -= std::forward(o); - {cudaError_t s = cudaMemcpy(pimpl_.rp_, buff.data(), buff.size(), cudaMemcpyHostToDevice); assert(s == cudaSuccess); (void)s;} - return std::move(*this); - } -#endif - - private: - template - void swap(Ref&& b) &&{ - T tmp = std::move(*this); - BEGIN_CUDA_SLOW - *this = std::forward(b); - b = std::move(tmp); - END_CUDA_SLOW - } - - public: - template -#if __NVCC__ - __attribute__((deprecated)) -#else - [[deprecated("WARNING: slow cuda memory operation")]] -#endif - friend void swap(ref&& a, Ref&& b){a.swap(std::forward(b));} - template DEPRECATED("WARNING: slow cuda memory operation") - friend void swap(Ref&& a, ref&& b){std::move(b).swap(a);} - DEPRECATED("WARNING: slow cuda memory operation") - friend void swap(ref&& a, ref&& b){std::move(a).swap(std::move(b));} - ref&& operator++()&&{++(std::move(*this).skeleton()); return std::move(*this);} - ref&& operator--()&&{--(std::move(*this).skeleton()); return std::move(*this);} -// template{}>> -// friend auto conj(Self&& self, ref* = 0){ -// using std::conj; -// return conj(std::forward(self).skeleton()); -// } - template{}, int> =0> - friend /*std::decay_t*/ auto conj(RRef const& self){ - return adl_conj(self.operator T()); - } - template{}, int> =0> - friend auto /*std::decay_t*/ imag(RRef const& self){ - return adl_imag(self.operator T()); - } - template{}, int> =0> - friend auto /*std::decay_t*/ real(RRef const& self){ - return adl_real(self.operator T()); - } -}; - -}}}} - -namespace thrust { -template P raw_pointer_cast(boost::multi::memory::cuda::ptr const& p) __host__ __device__ { - return p.raw_pointer_cast(); -} -} -#undef SLOW - -//namespace boost { -//namespace multi { - -//template< -// class T1, class Q1, -// class Size, -// class T2, class P2//, class E2 = typename std::pointer_traits::element_type //, typename TP2 = decltype(ptr{std::declval()}) -//> struct adl_custom_copy< -// boost::multi::array_iterator, boost::multi::array_iterator, -// boost::multi::array_iterator -//> { -// auto copy( -// boost::multi::array_iterator first , boost::multi::array_iterator last, -// boost::multi::array_iterator d_first -// )-> boost::multi::array_iterator { -// return copy_n(first, last - first, d_first); -// } -//} - -//}} - -#if not __INCLUDE_LEVEL__ // def _TEST_MULTI_MEMORY_ADAPTORS_CUDA_PTR - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA pointers" -#define BOOST_TEST_DYN_LINK -#include - -#include "../cuda/malloc.hpp" - -#include "../../../adaptors/blas/numeric.hpp" - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -template __device__ void WHAT(T&&) = delete; - -#if __CUDA_ARCH__ -__device__ void f(cuda::ptr){ -// printf("%f", *p); -// printf("%f", static_cast(*p)); -} -#endif - -BOOST_AUTO_TEST_CASE(multi_memory_cuda_ptr){ - -// static_assert( not std::is_convertible*, multi::memory::cuda::ptr>>{}, "!" ); -// static_assert( not std::is_convertible>, std::complex*>{}, "!" ); - - multi::memory::cuda::ptr> xxx = nullptr; - std::complex* ppp = raw_pointer_cast(xxx); (void)ppp; - { - auto ppp2 = static_cast>>(cuda::malloc(1*sizeof(std::complex))); - std::complex const dd{*ppp2}; - assert( dd == std::complex{0} ); - } - using T = double; - static_assert( sizeof(cuda::ptr) == sizeof(T*), "!"); - std::size_t const n = 100; - { - using cuda::ptr; - auto p = static_cast>(cuda::malloc(n*sizeof(T))); -CUDA_SLOW( - *p = 99.; -) - { - ptr pc = p; - BOOST_REQUIRE( *p == *pc ); - } - BOOST_REQUIRE( CUDA_SLOW( *p == 99. ) ); - BOOST_REQUIRE( *p != 11. ); - cuda::free(p); - - cuda::ptr P = nullptr; - BOOST_REQUIRE( P == nullptr ); - ptr pv = p; (void)pv; - } -// what::rebind>(); -// what>::rebind>(); - static_assert( std::is_same>::rebind, cuda::ptr>{} , "!"); -} - -BOOST_AUTO_TEST_CASE(ptr_conversion){ - cuda::ptr p = nullptr; - cuda::ptr pc = p; (void)pc; - static_assert(not std::is_convertible, double*>{}); -} - -template struct Complex_{T real; T imag;}; - -BOOST_AUTO_TEST_CASE(multi_memory_cuda_ptr_member_pointer){ - - Complex_ c{10.,20.}; -// double Complex_::* - Complex_* p = &c; - auto pm = &Complex_::imag; - BOOST_REQUIRE( p->*pm == 20. ); - BOOST_REQUIRE( *p.*pm == 20. ); - -// cuda::ptr> pcu; -// pcu->*pm; -} - - - -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/array.cpp b/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/array.cpp deleted file mode 100644 index 126214b231..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/adaptors/cuda/test/array.cpp +++ /dev/null @@ -1,176 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -${CUDACXX:-nvcc} -std=c++17 -x cu -O3 $0 -o $0x --extended-lambda --expt-relaxed-constexpr --Werror=cross-execution-space-call -lboost_unit_test_framework -lboost_timer -Xcudafe=--display_error_number -D_DISABLE_CUDA_SLOW &&$0x&&rm $0x; exit -#endif - -#define _DISABLE_CUDA_SLOW - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi CUDA array" -#define BOOST_TEST_DYN_LINK -#include - -//#include "../../cuda/allocator.hpp" -//#include "../../../../array.hpp" - -#include "../../../../adaptors/cuda.hpp" - -#include - -namespace multi = boost::multi; -namespace cuda = multi::memory::cuda; - -template T what() = delete; -template T what(T&&) = delete; - -BOOST_AUTO_TEST_CASE(cuda_allocators){ - - multi::array > A1(200, 0.); - - BOOST_REQUIRE( size(A1) == 200 ); - BEGIN_CUDA_SLOW; - A1[100] = 1.; - END_CUDA_SLOW; -// what(A1.data()); - - multi::array> const B1(200, 2.); -// what(B1.data()); - BEGIN_CUDA_SLOW; - BOOST_REQUIRE( B1[10] == 2. ); - END_CUDA_SLOW; - -// what(A1[10]); -// what(B1[10]); -BEGIN_CUDA_SLOW; - A1[10] = B1[10]; -END_CUDA_SLOW; -// BOOST_REQUIRE( A1[10] == 2. ); - -// multi::array> C1(200, 0.); - -// B1[100] = 2.; -// C1[100] = 3.; - -} - -BOOST_AUTO_TEST_CASE(cuda_copy_timing){ - multi::array::extensions_type const x = {10000, 10000}; - std::cout<<"double 2D\nsize "<< x.num_elements()*sizeof(double)/1e6 <<" MBs"< const A(x, 999.); - multi::array B(x, 888.); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[1000][1000] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"cpu "<< cpu_times.wall/1e9 <<" sec"<> const A(x, 999.); - multi::array> B(x, 888.); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[1000][1000] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"gpu "<< gpu_times.wall/1e9 <<" sec"<> const A(x, 999.); - multi::array> B(x, 888.); - B() = A(); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[1000][1000] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"mng "<< mng_times.wall/1e9 <<" sec"<; - multi::array::extensions_type const x = {10000, 10000}; - std::cout<<"complex 2D\nsize "<< x.num_elements()*sizeof(complex)/1e6 <<" MBs"<>{} ); - - auto const cpu_times = [&]{ - multi::array const A(x, 999.); - multi::array B(x, 888.); - boost::timer::cpu_timer timer; - B() = A(); - BOOST_REQUIRE( B[1000][1000] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"cpu "<< cpu_times.wall/1e9 <<" sec"<< std::endl; - - auto const gpu_times = [&]{ - multi::array> const A(x, 999.); - multi::array> B(x, 888.); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[1000][1000] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"gpu "<< gpu_times.wall/1e9 <<" sec"<< std::endl; - - auto const mng_times = [&]{ - multi::array> const A(x, 999.); - multi::array> B(x, 888.); - B() = A(); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[1e3][1e3] == 999. ); - return timer.elapsed(); - }(); - std::cout<<"mng "<< mng_times.wall/1e9 <<" sec"<< std::endl; -} - -BOOST_AUTO_TEST_CASE(cuda_managed_empty){ - using complex = std::complex; - multi::array> A; - multi::array> B = A; - BOOST_REQUIRE( A.is_empty() ); - BOOST_REQUIRE( B.is_empty() ); - BOOST_REQUIRE( A == B ); -} - -BOOST_AUTO_TEST_CASE(cuda_copy_complex_timing_4d){ - using complex = std::complex; - multi::array::extensions_type const x = {100, 100, 100, 100}; - std::cout<<"complex 4D\nsize "<< x.num_elements()*sizeof(complex)/1e6 <<" MBs"<>{} ); - - auto const cpu_times = [&]{ - multi::array const A(x, 999.); - multi::array B(x, 888.); - boost::timer::cpu_timer timer; - B() = A(); - BOOST_REQUIRE( B[10][10][10][10]== 999. ); - return timer.elapsed(); - }(); - std::cout<<"cpu "<< cpu_times.wall/1e9 <<" sec"<< std::endl; - - auto const gpu_times = [&]{ - multi::array> const A(x, 999.); - multi::array> B(x, 888.); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[10][10][10][10]== 999. ); - return timer.elapsed(); - }(); - std::cout<<"gpu "<< gpu_times.wall/1e9 <<" sec"<< std::endl; - - auto const mng_times = [&]{ - multi::array> const A(x, 999.); - multi::array> B(x, 888.); - B() = A(); - boost::timer::cpu_timer timer; - ~B() = ~A(); - BOOST_REQUIRE( B[10][10][10][10]== 999. ); - return timer.elapsed(); - }(); - std::cout<<"mng "<< mng_times.wall/1e9 <<" sec"<< std::endl; -} - diff --git a/external_codes/boost_multi/multi/include/multi/memory/instrumented.hpp b/external_codes/boost_multi/multi/include/multi/memory/instrumented.hpp deleted file mode 100644 index 0f3b314c2f..0000000000 --- a/external_codes/boost_multi/multi/include/multi/memory/instrumented.hpp +++ /dev/null @@ -1,178 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS -(echo "#include\""$0"\"" > $0.cpp) && c++ -std=c++17 -Wall -Wextra -Wfatal-errors -D_TEST_BOOST_MULTI_MEMORY_INSTRUMENTED $0.cpp -o $0x && valgrind $0x && rm $0x $0.cpp; exit -#endif -#ifndef BOOST_MULTI_MEMORY_INSTRUMENTED_HPP -#define BOOST_MULTI_MEMORY_INSTRUMENTED_HPP - -#include // max_align_t -#include -#include -#include -#include -#include - -#include - -//#include "../memory/block.hpp" -//#include "../memory/allocator.hpp" - -#include // accumulate -#include -namespace boost{ -namespace multi{ -namespace memory{ - -template< - class MemoryResource = std::pmr::memory_resource, - typename SizeType = std::size_t, - typename VoidPointer = void* -> -class instrumented{ - MemoryResource* back_ = std::pmr::get_default_resource(); - using void_pointer = VoidPointer; - using size_type = SizeType; - std::map blocks_; -public: - instrumented() = default; - instrumented(instrumented const&) = delete; - std::map const& blocks() const{return blocks_;} - auto leak(){ - return std::accumulate( - blocks_.begin(), blocks_.end(), size_type{}, [](auto a, auto&& e){return a+e.second;} - ); - } - typename instrumented::void_pointer - allocate(size_type required_bytes, size_type align = alignof(std::max_align_t)){ - std::cout << "allocating " << required_bytes << std::endl; - auto ret = back_->allocate(required_bytes, align); - blocks_[ret] += required_bytes; - return ret; - } - void deallocate(typename instrumented::void_pointer p, typename instrumented::size_type discarded_bytes, size_type align = alignof(std::max_align_t)){ - std::cout << "deallocating " << discarded_bytes << std::endl; - back_->deallocate(p, discarded_bytes, align); - blocks_[p] -= discarded_bytes; - } -}; -}}} - -#include "../../multi/memory/allocator.hpp" - -namespace boost{ -namespace multi{ -namespace memory{ - -template -using instrumented_allocator = multi::memory::allocator>; - -}}} - -#if _TEST_BOOST_MULTI_MEMORY_INSTRUMENTED - -#include "../../multi/array.hpp" -#include "../memory/monotonic.hpp" - -#include -#include -#include - -namespace multi = boost::multi; -using std::cout; - -int main(){ -{ - multi::memory::instrumented<> im; - auto p1 = im.allocate(1*sizeof(double), alignof(double)); - auto p2 = im.allocate(255*sizeof(double), alignof(double)); - im.deallocate(p2, 255*sizeof(double)); - im.deallocate(p1, 1*sizeof(double)); - assert( im.blocks().size() == 2 ); - assert( not im.leak() ); - { - multi::memory::instrumented<> im; - multi::memory::allocator > A(&im); - double* p = A.allocate(1); - A.construct(p, 8.); - assert( *p == 8. ); - double* arr = A.allocate(255); - A.construct(arr, 81.); - assert( *arr == 81. ); - A.deallocate(arr, 255); - A.deallocate(p, 1); - assert( not im.leak() ); - } - { - multi::memory::instrumented<> im; - multi::memory::allocator > A(&im); - { - std::vector v(A); - v.push_back(99); - v.push_back(10); - v.push_back(12); - v.resize(1); - v.push_back(10); - } - assert( not im.leak() ); - } - { - multi::memory::instrumented<> im; - using alloc = multi::memory::instrumented_allocator; - alloc A(&im); - alloc B(A); - { - multi::static_array arr1({10}, 99., A); - multi::static_array arr2({10, 20}, 99., A); - multi::static_array arr3({10, 20, 30}, 99., B); - multi::static_array brr3(arr3); - } - assert( not im.leak() ); - } - { - multi::memory::instrumented<> im; - multi::memory::allocator > A(&im); - { - multi::array arr1({10}, 99., A); - multi::array arr2({10, 20}, 99., A); - multi::array arr3({10, 20, 30}, 99., A); - } - assert( not im.leak() ); - } - { - multi::memory::instrumented<> im; - { - using alloc = multi::memory::instrumented_allocator; - multi::array A({10, 20, 30}, 99., &im); - multi::array B({10, 20, 30}, 11., &im); - B = std::move(A); assert( empty(A) ); - } - assert( not im.leak() ); - } - std::cout << "-------------------" << std::endl; - { - multi::memory::instrumented<> im; - { - using alloc = multi::memory::instrumented_allocator; - // using alloc = std::allocator; - multi::array arr1({10}, 99., &im); - multi::array arr2({10, 20}, 99., &im); - multi::array arr3({10, 20, 30}, 99., &im); - arr1.reextent({20}); - arr2.reextent({200, 10}); - arr3.reextent({201, 10, 100}); - arr2.clear(); - multi::array brr1 = arr1; - multi::array brr2(arr2); - assert( arr3.num_elements() == 201*10*100 ); - multi::array brr3(std::move(arr3)); - assert( arr3.num_elements() == 0 ); - assert( brr3.num_elements() == 201*10*100 ); - } - assert( not im.leak() ); - } - return 0; -// return 0; -} -} -#endif -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/utility/const_iterator.hpp b/external_codes/boost_multi/multi/include/multi/utility/const_iterator.hpp deleted file mode 100644 index 56e63dde53..0000000000 --- a/external_codes/boost_multi/multi/include/multi/utility/const_iterator.hpp +++ /dev/null @@ -1,131 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS// -*-indent-tabs-mode:t;tab-width:4;c-basic-offset:4;truncate-lines:1-*- -$CXX $0 -o $0x -DBOOST_TEST_DYN_LINK -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif - -#include // iterator_traits - -namespace boost{ -namespace multi{ - -template -struct reference_traits{ - using reference = Reference; - using rebind_const = std::add_const_t; -}; - -template -struct reference_traits{ - using reference = T&; - using rebind_const = std::add_const_t; -}; - -template -class const_iterator : Iter{ -// see notes in https://en.cppreference.com/w/cpp/iterator/move_iterator - public: - using base_type = Iter; - using iterator_type = Iter; - using iterator_category = typename std::iterator_traits::iterator_category; - using iterator_concept = std::input_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = typename std::iterator_traits::difference_type; - using pointer = Iter; - using reference = typename multi::reference_traits::reference>::rebind_const; - - public: - constexpr const_iterator() = default; // (1) - constexpr explicit const_iterator(iterator_type x) : Iter{x} {} // (2) - template // (3) - constexpr explicit const_iterator(const_iterator const& o) : Iter{o.base()} {} - template - constexpr const_iterator& operator=(const_iterator const& o) { - static_cast(*this)=o.base(); - return *this; - } - constexpr base_type base() const {return static_cast(*this);} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator* - reference operator*() const {return *static_cast(*this);} - constexpr pointer operator->() const {return &*static_cast(*this);} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator_at - constexpr reference operator[](difference_type n) const {return static_cast(*this)[n];} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator_arith - constexpr const_iterator& operator++(){return ++static_cast(*this), *this;} //(1) - constexpr const_iterator& operator--(){return --static_cast(*this), *this;} //(2) - constexpr const_iterator operator++(int){return const_iterator{static_cast(*this)++};}//(3) - constexpr const_iterator operator--(int){return const_iterator{static_cast(*this)--};}//(4) - constexpr const_iterator operator+(difference_type n) const{ //(5) - return const_iterator{static_cast(*this)+n}; - } - constexpr const_iterator operator-(difference_type n) const{ //(6) - return const_iterator{static_cast(*this)-n}; - } - constexpr const_iterator& operator+=(difference_type n){ //(7) - return static_cast(*this)+=n, *this; - } - constexpr const_iterator& operator-=(difference_type n){ //(8) - return static_cast(*this)-=n, *this; - } - template::value_type, typename std::iterator_traits::value_type>{} and - std::is_assignable{} and - not std::is_assignable::reference, typename std::iterator_traits::value_type>{}, int - > =0> - operator Other() const{return base();} - using rebind_const = const_iterator; -}; - -template constexpr bool operator==(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()==rhs.base();} //(1) -template constexpr bool operator!=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()!=rhs.base();} //(2) -template constexpr bool operator< (const_iterator const& lhs, const_iterator const& rhs){return lhs.base()< rhs.base();} //(3) -template constexpr bool operator<=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()<=rhs.base();} //(4) -template constexpr bool operator> (const_iterator const& lhs, const_iterator const& rhs){return lhs.base()> rhs.base();} //(5) -template constexpr bool operator>=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()>=rhs.base();} //(6) -// TODO three way comparison for C++20 - -template -const_iterator make_const_iterator(It it){return const_iterator{it};} - -template ,typename Enable = void> -struct iterator_traits : Base{ - using rebind_const = multi::const_iterator; -}; - -template -struct iterator_traits : Base{ - using rebind_const = typename Iterator::rebind_const; -}; - -template -struct iterator_traits : std::iterator_traits{ - using rebind_const = T const*; -}; - -}} - -#if not __INCLUDE_LEVEL__ // TEST BELOW - -#define BOOST_TEST_MODULE test const_iterator -#ifdef BOOST_TEST_DYN_LINK -#include -#else -#include -#endif - -#include - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(sematics) { - std::vector v(5, 9); - std::vector::iterator it = v.begin(); - *it += 1; - BOOST_REQUIRE(v[0] == 10 ); - - static_assert( std::is_same::rebind_const, double const*>{}, "" ); - static_assert( std::is_same::iterator>::rebind_const, multi::const_iterator::iterator> >{}, "" ); - - std::vector::const_iterator cit = multi::make_const_iterator(v.begin()); (void)cit; -} - -#endif - diff --git a/external_codes/boost_multi/multi/include/multi/utility/const_iterator_.hpp b/external_codes/boost_multi/multi/include/multi/utility/const_iterator_.hpp deleted file mode 100644 index c3dd52f66d..0000000000 --- a/external_codes/boost_multi/multi/include/multi/utility/const_iterator_.hpp +++ /dev/null @@ -1,133 +0,0 @@ -#ifdef COMPILATION_INSTRUCTIONS// -*-indent-tabs-mode:t;tab-width:4;c-basic-offset:4;truncate-lines:1-*- -$CXX $0 -o $0x -DBOOST_TEST_DYN_LINK -lboost_unit_test_framework&&$0x&&rm $0x;exit -#endif - -dsadsadsadsa - -#include // iterator_traits - -namespace boost{ -namespace multi{ - -template -struct reference_traits{ - using reference = Reference; - using rebind_const = std::add_const_t; -}; - -template -struct reference_traits{ - using reference = T&; - using rebind_const = std::add_const_t; -}; - -template -class const_iterator : Iter{ -// see notes in https://en.cppreference.com/w/cpp/iterator/move_iterator - public: - using base_type = Iter; - using iterator_type = Iter; - using iterator_category = typename std::iterator_traits::iterator_category; - using iterator_concept = std::input_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = typename std::iterator_traits::difference_type; - using pointer = Iter; - using reference = typename multi::reference_traits::reference>::rebind_const; - - public: - constexpr const_iterator() = default; // (1) - constexpr explicit const_iterator(iterator_type x) : Iter{x} {} // (2) - template // (3) - constexpr explicit const_iterator(const_iterator const& o) : Iter{o.base()} {} - template - constexpr const_iterator& operator=(const_iterator const& o) { - static_cast(*this)=o.base(); - return *this; - } - constexpr base_type base() const {return static_cast(*this);} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator* - reference operator*() const {return *static_cast(*this);} - constexpr pointer operator->() const {return &*static_cast(*this);} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator_at - constexpr reference operator[](difference_type n) const {return static_cast(*this)[n];} -// https://en.cppreference.com/w/cpp/iterator/move_iterator/operator_arith - constexpr const_iterator& operator++(){return ++static_cast(*this), *this;} //(1) - constexpr const_iterator& operator--(){return --static_cast(*this), *this;} //(2) - constexpr const_iterator operator++(int){return const_iterator{static_cast(*this)++};}//(3) - constexpr const_iterator operator--(int){return const_iterator{static_cast(*this)--};}//(4) - constexpr const_iterator operator+(difference_type n) const{ //(5) - return const_iterator{static_cast(*this)+n}; - } - constexpr const_iterator operator-(difference_type n) const{ //(6) - return const_iterator{static_cast(*this)-n}; - } - constexpr const_iterator& operator+=(difference_type n){ //(7) - return static_cast(*this)+=n, *this; - } - constexpr const_iterator& operator-=(difference_type n){ //(8) - return static_cast(*this)-=n, *this; - } - template::value_type, typename std::iterator_traits::value_type>{} and - std::is_assignable{} and - not std::is_assignable::reference, typename std::iterator_traits::value_type>{}, int - > =0> - operator Other() const{return base();} - using rebind_const = const_iterator; -}; - -template constexpr bool operator==(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()==rhs.base();} //(1) -template constexpr bool operator!=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()!=rhs.base();} //(2) -template constexpr bool operator< (const_iterator const& lhs, const_iterator const& rhs){return lhs.base()< rhs.base();} //(3) -template constexpr bool operator<=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()<=rhs.base();} //(4) -template constexpr bool operator> (const_iterator const& lhs, const_iterator const& rhs){return lhs.base()> rhs.base();} //(5) -template constexpr bool operator>=(const_iterator const& lhs, const_iterator const& rhs){return lhs.base()>=rhs.base();} //(6) -// TODO three way comparison for C++20 - -template -const_iterator make_const_iterator(It it){return const_iterator{it};} - -template ,typename Enable = void> -struct iterator_traits : Base{ - using rebind_const = multi::const_iterator; -}; - -template -struct iterator_traits : Base{ - using rebind_const = typename Iterator::rebind_const; -}; - -template -struct iterator_traits : std::iterator_traits{ - using rebind_const = T const*; -}; - -}} - -#if not __INCLUDE_LEVEL__ // TEST BELOW - -#define BOOST_TEST_MODULE test const_iterator -#ifdef BOOST_TEST_DYN_LINK -#include -#else -#include -#endif - -#include - -namespace multi = boost::multi; - -BOOST_AUTO_TEST_CASE(sematics) { - std::vector v(5, 9); - std::vector::iterator it = v.begin(); - *it += 1; - BOOST_REQUIRE(v[0] == 10 ); - - static_assert( std::is_same::rebind_const, double const*>{}, "" ); - static_assert( std::is_same::iterator>::rebind_const, multi::const_iterator::iterator> >{}, "" ); - - std::vector::const_iterator cit = multi::make_const_iterator(v.begin()); (void)cit; -} - -#endif - diff --git a/external_codes/boost_multi/multi/meta/libraries.json b/external_codes/boost_multi/multi/meta/libraries.json new file mode 100644 index 0000000000..42e5c258e3 --- /dev/null +++ b/external_codes/boost_multi/multi/meta/libraries.json @@ -0,0 +1,16 @@ +{ + "key": "multi", + "name": "Multi", + "authors": [ + "Alfredo Correa" + ], + "maintainers": [ + "Alfredo Correa " + ], + "description": "Multi is a modern C++ library that provides access and manipulation of data in multidimensional arrays, for both CPU and GPU memory.", + "category": [ + "Containers", + "Math" + ], + "cxxstd": "17" +} diff --git a/external_codes/boost_multi/multi/pre-push b/external_codes/boost_multi/multi/pre-push index 06049c5a3d..a8ac2d3273 100755 --- a/external_codes/boost_multi/multi/pre-push +++ b/external_codes/boost_multi/multi/pre-push @@ -1,30 +1,38 @@ #!/bin/bash # -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -#sudo apt install clang++ clang-tidy cmake g++ libblas-dev libfftw3-dev libboost-test-dev libboost-timer-dev make +# sudo apt install ccache clang clang-tidy cmake cppcheck g++ git lcov libblas-dev pkg-config libfftw3-dev libboost-test-dev libboost-timer-dev make ninja-build valgrind +# sudo dnf install boost-devel blas-devel ccache clang clang-tools-extra cmake cppcheck fftw-devel git lcov libasan liblas-devel libubsan ninja-build valgrind +# install circle # mkdir -p $HOME/bin && wget https://www.circle-lang.org/linux/build_latest.tgz -P $HOME/tmp/ && tar -zxvf $HOME/tmp/build_latest.tgz --directory $HOME/bin/ && $HOME/bin/circle --version +# install nvc++ # ($ echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list) && sudo apt-get updatesudo apt-get update && sudo apt-get install nvhpc-22-7 +# # sudo yum-config-manager --add-repo https://developer.download.nvidia.com/hpc-sdk/rhel/nvhpc.repo && sudo yum install -y nvhpc-cuda-multi-23.1 - (mkdir -p .build.g++.asan && cd .build.g++.asan && CXX=g++ CXXFLAGS="-fsanitize=address,pointer-compare,pointer-subtract,leak -fno-sanitize-recover=all" cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.circle_169 && cd .build.circle_169 && CXX=/home/correaa/circle_169/circle cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CIRCLE=1 -DBoost_USE_STATIC_LIBS=OFF -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu/ && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.circle_170 && cd .build.circle_170 && CXX=/home/correaa/circle_170/circle cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_CIRCLE=1 -DBoost_USE_STATIC_LIBS=OFF -DBOOST_LIBRARYDIR=/usr/lib/x86_64-linux-gnu/ && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.clang++.debug && cd .build.clang++.debug && CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CLANG_TIDY="clang-tidy" && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.cppcheck && cd .build.cppcheck && CXX=g++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--inline-suppr;--std=c++17;--check-config;--error-exitcode=1" && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.nvcc && cd .build.nvcc && CUDACXX=nvcc CMAKE_CUDA_HOST_COMPILER=g++ cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_FLAGS="-ccbin=g++-9" -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=61 && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -T Test -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.icpc && cd .build.icpc && CXX=/home/correaa/intel/oneapi/compiler/latest/linux/bin/intel64/icpc cmake .. -DCMAKE_BUILD_TYPE=Release && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -T Test -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.icpx && cd .build.icpx && CXX=/home/correaa/intel/oneapi/compiler/latest/linux/bin/icpx cmake .. -DCMAKE_BUILD_TYPE=Release && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -T Test -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.g++-9 && cd .build.g++-9 && CXX=g++-9 cmake .. && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit - (mkdir -p .build.g++-10 && cd .build.g++-10 && CXX=g++-10 cmake .. && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit - (mkdir -p .build && cd .build && cmake .. -DCMAKE_BUILD_TYPE=Release && make -j $(($(nproc) - 2)) VERBOSE=1 && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.clang++.asan && cd .build.clang++.asan && CXX=clang++ CXXFLAGS="-fsanitize=undefined -fsanitize=address" cmake .. -DCMAKE_BUILD_TYPE=Debug && make -j $(($(nproc) - 2)) VERBOSE=1 && ASAN_OPTIONS="halt_on_error=1 detect_leaks=1" ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.memchk && cd .build.memchk && cmake .. && make -j 12 && ctest --overwrite MemoryCheckCommandOptions="-q --tool=memcheck --leak-check=yes --num-callers=51 --trace-children=yes --leak-check=full --track-origins=yes --gen-suppressions=all" -T memcheck -j 12 --output-on-failure) || exit - (mkdir -p .build.clang++std20 && cd .build.clang++std20 && CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=20 && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.clang++-11 && cd .build.clang++-11 && CXX=clang++-11 cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.clang++-12 && cd .build.clang++-12 && CXX=clang++-12 cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.clang++-13 && cd .build.clang++-13 && CXX=clang++-13 cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.debug && cd .build.debug && cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.g++.udef && cd .build.g++.udef && CXX=g++ CXXFLAGS="-fsanitize=undefined,shift,integer-divide-by-zero,unreachable,null,return,signed-integer-overflow -fno-sanitize-recover=all" cmake .. -DCMAKE_BUILD_TYPE=Release && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.g++.thread && cd .build.g++.thread && CXX=g++ CXXFLAGS="-fsanitize=thread -fno-sanitize-recover=all" cmake .. -DCMAKE_BUILD_TYPE=Debug && (make -j $(($(nproc) - 2)) || make VERBOSE=1) && ctest -j 12 --output-on-failure) || exit 666 - (mkdir -p .build.cpplint && cd .build.cpplint && cmake .. -DCMAKE_CXX_CPPLINT="cpplint;--filter=-whitespace/tab,-whitespace/parens,-whitespace/operators,-whitespace/braces,-readability/alt_tokens,-build/include_order,-whitespace/line_length,-whitespace/semicolon,-legal/copyright,-whitespace/comments,-readability/nolint" && (make $(($(nproc) - 2)) || make VERBOSE=1 ) && ctest -j 12 --output-on-failure) || exit 666 -exit +#CXX=g++-12 cmake --fresh .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-12 -DCMAKE_CUDA_ARCHITECTURES=61 -#(mkdir -p .build.clang++.msan && cd .build.clang++.msan && CXX=clang++ CXXFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer -g -O0 " MSAN_OPTIONS=symbolize=1 cmake .. -DCMAKE_BUILD_TYPE=Debug && make -j 12 && MSAN_OPTIONS=symbolize=1 MSAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer ctest -j 1 --output-on-failure) || exit - (mkdir -p .build.clang++.iwyu && cd .build.clang++.iwyu && CXX=clang++ cmake .. -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE="iwyu" && make -j 10 && ctest -j 12 --output-on-failure) || exit - (find . -name '*.hpp' -exec cppcheck --enable=all --inline-suppr --suppress=unmatchedSuppression:{} --suppress=syntaxError --suppress=missingInclude --suppress=missingIncludeSystem --suppress=preprocessorErrorDirective --suppress=syntaxError --suppress=unusedFunction --suppress=arithOperationsOnVoidPointer --suppress=sizeofDereferencedVoidPointer -D__align__ -DCUDARTAPI --language=c++ --std=c++17 --error-exitcode=666 --suppress=unmatchedSuppression {} \;) || exit +export VALGRIND_EXE="valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all --suppressions=.valgrind_suppressions --gen-suppressions=all --error-exitcode=1 " +export CMAKE_GENERATOR=Ninja +export CMAKE_CXX_COMPILER_LAUNCHER="ccache" + + (mkdir -p .build.g++.std23 && cd .build.g++.std23 && CXX=g++ cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=23 && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.clang++ && cd .build.clang++ && CXX=clang++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wfatal-errors" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.circle && cd .build.circle && CXX="$HOME/bin/circle" cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DENABLE_CIRCLE=1 -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.clang++.tidy && cd .build.clang++.tidy && CXX=clang++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CLANG_TIDY="clang-tidy" -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG=0 -D_LIBCPP_DEBUG=0 -Wfatal-errors" && cmake --build . && ASAN_OPTIONS="new_delete_type_mismatch=0" ctest -j 12 --output-on-failure) || exit 666 +#(mkdir -p .build.g++.m32 && cd .build.g++.m32 && CXX=g++ CXXFLAGS="-m32" cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release && cmake --build . --verbose && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.clang++.m32 && cd .build.clang++.m32 && CXX=clang++ CXXFLAGS="-m32" cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release && cmake --build . --verbose && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.clang++-17 && cd .build.clang++-17 && CXX=clang++-17 cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-Wfatal-errors" -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 +#(mkdir -p .build.icpc && cd .build.icpc && CXX=/home/correaa/intel/oneapi/compiler/latest/linux/bin/intel64/icpc cmake .. -GNinja DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.icpx && cd .build.icpx && CXX=/opt/intel/oneapi/compiler/latest/bin/icpx cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.nvc++ && cd .build.nvc++ && CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-stdpar=multicore" -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 +#(mkdir -p .build.cunvc++ && cd .build.cunvc++ && CXX=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/opt/nvidia/hpc_sdk/Linux_x86_64/2024/compilers/bin/nvc++ -DCMAKE_CUDA_ARCHITECTURES=75 && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.nvcc && cd .build.nvcc && CXX=g++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-12 -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" -DCMAKE_CUDA_COMPILER_LAUNCHER="ccache" && cmake --build . --parallel 12 --verbose && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.culang && cd .build.culang && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=clang++-17 -DCMAKE_CXX_COMPILER=clang++-17 -DCMAKE_CUDA_ARCHITECTURES=75 && cmake --build . --verbose --parallel 4 && ctest -j 1 --output-on-failure) || exit 666 + (mkdir -p .build.clang++.std20 && cd .build.clang++.std20 && CXX=clang++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=20 -DCMAKE_CXX_FLAGS="-Wfatal-errors" -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.g++.anlys-std23-memchk && cd .build.g++.anlys-std23-memchk && CXX=g++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=23 -DBLA_VENDOR=OpenBLAS `#-DCMAKE_CXX_INCLUDE_WHAT_YOU_USE="iwyu"` -DCMAKE_CXX_FLAGS="-fanalyzer -Wno-analyzer-null-dereference -Wno-analyzer-possible-null-dereference -Wno-analyzer-malloc-leak -Wno-analyzer-use-of-uninitialized-value -Wno-analyzer-use-after-free" -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 12 --output-on-failure -T memcheck) || exit 666 +#(mkdir -p .build.clang++.tidy && cd .build.clang++.tidy && CXX=clang++ CXXFLAGS="-D_LIBCPP_ENABLE_DEBUG_MODE=1 -stdlib=libc++" cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CLANG_TIDY="clang-tidy" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG=1 -D_LIBCPP_DEBUG=1 -Wfatal-errors" && cmake --build . && ASAN_OPTIONS="new_delete_type_mismatch=0" ctest -j 12 --output-on-failure) || exit 666 +#(mkdir -p .build.hip && cd .build.hip && cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBOOST_MULTI_STANDALONE=1 -DBUILD_TESTING=0 -DENABLE_HIP=1 -DCMAKE_HIP_ARCHITECTURES=gfx90a && cmake --build . ) || exit 666 + (mkdir -p .build.g++-.check-cov && cd .build.g++-.check-cov && CXX=g++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--inline-suppr;--std=c++17;--check-config;--error-exitcode=1" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG=1 -Wfatal-errors --coverage -lgcov -fno-inline -fno-inline-small-functions -fno-default-inline" -DCMAKE_EXE_LINKER_FLAGS="-lgcov --coverage" && cmake --build . && ASAN_OPTIONS="new_delete_type_mismatch=0" ctest -j 12 --output-on-failure -T Test `# && lcov --directory . --capture --output-file coverage.info && lcov --remove coverage.info '/usr/*' --output-file coverage.info && lcov --list coverage.info && genhtml coverage.info`) || exit 666 + (mkdir -p .build.g++-release && cd .build.g++-release && CXX=g++ cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" && cmake --build . && ctest -j 1 --output-on-failure -T Test) || exit 666 + +#(mkdir -p .build.clang++.iwyu && cd .build.clang++.iwyu && CXX=clang++ cmake .. -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE="iwyu" && make -j 10 && ctest -j 12 --output-on-failure) || exit +# cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE="include-what-you-use;-Xiwyu;--mapping_file=/Users/correatedesco1/boost-multi/boost-test.imp;-Xiwyu;--mapping_file=/opt/homebrew/Cellar/include-what-you-use/0.22/libexec/share/include-what-you-use/boost-all.imp" -DCMAKE_VERIFY_INTERFACE_HEADER_SETS=ON +# TODO(correaa) make cppcheck work for all the code +#(find . -name '*.hpp' -exec cppcheck --enable=all --inline-suppr --suppress=unmatchedSuppression:{} --suppress=syntaxError --suppress=missingInclude --suppress=missingIncludeSystem --suppress=preprocessorErrorDirective --suppress=syntaxError --suppress=unusedFunction --suppress=arithOperationsOnVoidPointer --suppress=sizeofDereferencedVoidPointer -D__align__ -DCUDARTAPI --language=c++ --std=c++17 --error-exitcode=666 --suppress=unmatchedSuppression {} \;) || exit diff --git a/external_codes/boost_multi/multi/pre-push.macos b/external_codes/boost_multi/multi/pre-push.macos new file mode 100755 index 0000000000..745de0d8b7 --- /dev/null +++ b/external_codes/boost_multi/multi/pre-push.macos @@ -0,0 +1,22 @@ +#!/bin/bash +# -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- +# sudo apt install ccache clang clang-tidy cmake cppcheck g++ git lcov libblas-dev pkg-config libfftw3-dev libboost-test-dev libboost-timer-dev make ninja-build valgrind +# sudo dnf install boost-devel blas-devel ccache clang clang-tools-extra cmake cppcheck fftw-devel git lcov libasan liblas-devel libubsan ninja-build valgrind +# install circle # mkdir -p $HOME/bin && wget https://www.circle-lang.org/linux/build_latest.tgz -P $HOME/tmp/ && tar -zxvf $HOME/tmp/build_latest.tgz --directory $HOME/bin/ && $HOME/bin/circle --version +# install nvc++ # ($ echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list) && sudo apt-get updatesudo apt-get update && sudo apt-get install nvhpc-22-7 +# # sudo yum-config-manager --add-repo https://developer.download.nvidia.com/hpc-sdk/rhel/nvhpc.repo && sudo yum install -y nvhpc-cuda-multi-23.1 + +#CXX=g++-12 cmake --fresh .. -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CUDA_HOST_COMPILER=g++-12 -DCMAKE_CUDA_ARCHITECTURES=61 + +export CMAKE_GENERATOR=Ninja +export CMAKE_CXX_COMPILER_LAUNCHER="ccache" + + (mkdir -p .build.c++.std23 && cd .build.c++.std23 && CXX=c++ cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=23 && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.c++.asan && cd .build.c++.asan && CXX=c++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="-D_LIBCPP_DEBUG=1 -fsanitize=address -fno-omit-frame-pointer" && cmake --build . && ASAN_OPTIONS="new_delete_type_mismatch=1" ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.c++.m32 && cd .build.c++.m32 && CXX=c++ cmake .. -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.z++ && cd .build.z++ && CXX="zig c++" cmake .. -DCMAKE_BUILD_TYPE=Release && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + (mkdir -p .build.c++.tidy && cd .build.c++.tidy && CXX=c++ cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_CLANG_TIDY="clang-tidy" -DCMAKE_CXX_CPPLINT="cpplint;--quiet" -DCMAKE_CXX_CPPCHECK="cppcheck;--enable=all;--suppress=missingIncludeSystem;--suppress=checkersReport;--inline-suppr;--std=c++17;--error-exitcode=1" && cmake --build . && ctest -j 12 --output-on-failure) || exit 666 + +#(mkdir -p .build.clang++.iwyu && cd .build.clang++.iwyu && CXX=clang++ cmake .. -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE="iwyu" && make -j 10 && ctest -j 12 --output-on-failure) || exit +# TODO(correaa) make cppcheck work for all the code +#(find . -name '*.hpp' -exec cppcheck --enable=all --inline-suppr --suppress=unmatchedSuppression:{} --suppress=syntaxError --suppress=missingInclude --suppress=missingIncludeSystem --suppress=preprocessorErrorDirective --suppress=syntaxError --suppress=unusedFunction --suppress=arithOperationsOnVoidPointer --suppress=sizeofDereferencedVoidPointer -D__align__ -DCUDARTAPI --language=c++ --std=c++17 --error-exitcode=666 --suppress=unmatchedSuppression {} \;) || exit diff --git a/external_codes/boost_multi/multi/sonar-project.properties b/external_codes/boost_multi/multi/sonar-project.properties new file mode 100644 index 0000000000..5d5d87894d --- /dev/null +++ b/external_codes/boost_multi/multi/sonar-project.properties @@ -0,0 +1,13 @@ +sonar.projectKey=correaa_boost-multi +sonar.organization=correaa + +# This is the name and version displayed in the SonarCloud UI. +#sonar.projectName=boost-multi +#sonar.projectVersion=1.0 + + +# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows. +#sonar.sources=. + +# Encoding of the source code. Default is default system encoding +#sonar.sourceEncoding=UTF-8 diff --git a/external_codes/boost_multi/multi/test/.gitignore b/external_codes/boost_multi/multi/test/.gitignore deleted file mode 100644 index 10f3c3d80d..0000000000 --- a/external_codes/boost_multi/multi/test/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/build* - diff --git a/external_codes/boost_multi/multi/test/CMakeLists.txt b/external_codes/boost_multi/multi/test/CMakeLists.txt index 708367814f..57bd1a7ef0 100644 --- a/external_codes/boost_multi/multi/test/CMakeLists.txt +++ b/external_codes/boost_multi/multi/test/CMakeLists.txt @@ -1,912 +1,910 @@ -#[=[Multi Test suite can be run like this: -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- - mkdir -p build && cd build - cmake .. [-DENABLE_CUDA=1] - make -j - ctest -j --output-on-error [-T memcheck] - exit -#]=] -cmake_minimum_required(VERSION 3.11) +# Copyright 2018-2024 Alfredo A. Correa +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt -if(EXISTS "CMakeLists.txt") - message(FATAL_ERROR "You probably don't want to run CMake in a directory with CMakeLists.txt") -endif() +cmake_minimum_required(VERSION 3.16) -#project( -# boost-multi-test -# VERSION 0.1 -# LANGUAGES CXX -#) +if (DEFINED BOOST_SUPERPROJECT_VERSION AND NOT DEFINED BOOST_MULTI_STANDALONE AND BUILD_TESTING) -find_package(Boost COMPONENTS unit_test_framework REQUIRED) # tests require Boost.Test `sudo apt install libboost-test-dev` + include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST) -if((ENABLE_CUDA OR DEFINED CXXCUDA) AND NOT DEFINED FORCED_ARCH) - enable_language(CUDA) + if(HAVE_BOOST_TEST) -# include(FindCUDA/select_compute_arch) -# cuda_detect_installed_gpus(INSTALLED_GPU_CCS_1) -# string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2) -# string( -# REPLACE " " -# ";" -# INSTALLED_GPU_CCS_3 -# "${INSTALLED_GPU_CCS_2}" -# ) -# string( -# REPLACE "." -# "" -# CUDA_ARCH_LIST -# "${INSTALLED_GPU_CCS_3}" -# ) -# set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST}) -endif() + boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::multi Boost::unit_test_framework Boost::multi_array Boost::iterator) -enable_testing() -include(CTest) + endif() -file( - GLOB TEST_SRCS - RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - *.cpp -) +else() -foreach(TEST_FILE ${TEST_SRCS}) - set(TEST_EXE "${TEST_FILE}.x") - add_executable(${TEST_EXE} ${TEST_FILE}) + enable_testing() - if((ENABLE_CUDA OR DEFINED CXXCUDA) AND NOT DEFINED FORCED_ARCH) -# set_property(TARGET ${TEST_EXE} PROPERTY "${CUDA_ARCH_LIST}") - endif() + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) + + find_package(Boost COMPONENTS unit_test_framework) # tests require Boost.Test `sudo apt install libboost-test-dev` - if(ENABLE_CUDA OR DEFINED CXXCUDA) - set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + if(ENABLE_CUDA) + enable_language(CUDA) + set(CMAKE_CUDA_EXTENSIONS OFF) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() endif() - target_link_libraries(${TEST_EXE} PRIVATE multi) + find_package(TBB) + + enable_testing() + include(CTest) + + set(WARNS + $<$: + -Xcompiler=-Werror,-Wall,-Wextra,-Wcast-align,-Wcast-qual,-Wdouble-promotion,-Wduplicated-branches,-Wduplicated-cond,-Wformat-truncation,-Wformat=2,-Wlogical-op,-Wmisleading-indentation,-Wno-missing-include-dirs,-Wnon-virtual-dtor,-Wno-missing-declarations,-Wnon-virtual-dtor,-Wnull-dereference,-Woverloaded-virtual,-Wpointer-arith,-Wno-redundant-decls,-Wno-shadow,-Wno-switch-enum,-Wno-unknown-pragmas,-Wtrampolines,-Wuninitialized,-Wunused,-Wunused-but-set-variable,-Wunused-result,-Wno-zero-as-null-pointer-constant + #--expt-relaxed-constexpr --extended-lambda + --Werror=cross-execution-space-call -Xcudafe=--display_error_number -Xcudafe=--diag_error=incompatible_assignment_operands -Xcudafe=--diag_error=returning_ptr_to_local_variable -Xcudafe=--diag_error=subscript_out_of_range -Xcudafe=--diag_error=used_before_set -Xcudafe=--diag_error=undefined_preproc_id -Xcudafe=--diag_error=implicit_func_decl -Xcudafe=--diag_error=implicit_return_from_non_void_function -Xcudafe=--diag_error=missing_type_specifier + > + $<$,$>,$>>: -Werror # -Wno-unknown-warning (activates -Wunknown-pragmas) + # -WNSObject-attribute # (gcc 12, not in 11) + # -Wabi=13 -Wabi-tag # (maybe important when linking with very old libraries) + -Waddress + #-Waddress-of-packed-member # (gcc 11, not in gcc 8) + -Wno-aggregate-return # (disallow return classes or structs, seems a C-compatibility warning) + -Waggressive-loop-optimizations + -Waligned-new=all # (gcc 12, not in 11) + # -Walloc-size-larger-than= # (gcc 12, not in 11) + -Walloc-zero # -Walloc-size-larger-than= + -Walloca # -Walloca-larger-than= + # -Warith-conversion # (gcc 11, not in gcc 9) + -Warray-bounds # -Warray-bounds=<0,2> + # -Warray-compare # (gcc 12, not in gcc 9) + # -Warray-parameter #=<0,2> # (gcc 11, not in gcc 10) + #-Wattribute-alias #=<0,2> # (gcc 12, not in 11, not in 7) + #-Wattribute-warning # (gcc 9, not in 8) + -Wattributes + -Wbool-compare -Wbool-operation + # -Wbidi-chars -Wbidi-chars=any # (gcc 12, not in 11) + -Wbuiltin-declaration-mismatch -Wbuiltin-macro-redefined + #-Wc++-compat + -Wc++0x-compat -Wc++11-compat -Wc++14-compat -Wc++17-compat + # -Wc++17-extensions # (gcc 12, not in 9) + -Wc++1z-compat + # -Wc++20-compat -Wc++20-extensions -Wc++23-extensions -Wc++2a-compat # (gcc 11, not in gcc 9) + #-Wcannot-profile # (gcc 9, not in gcc 8) + #-Wcast-align=strict -Wcast-function-type # (gcc 8, not in gcc 7) + -Wcast-qual + #-Wcatch-value #=<0, 3> # (gcc 8, not in gcc 7) + -Wchar-subscripts + #-Wclass-conversion # (gcc 11, not in 8) + #-Wclass-memaccess # (gcc 12, not in 11, not in 7) + -Wclobbered + # -Wcomma-subscript # (gcc 12, not in 11) + -Wcomment # (same as -Wcomments) + -Wconditionally-supported + -Wconversion -Wconversion-null + -Wcoverage-mismatch -Wcpp + # -Wctad-maybe-unsupported # (gcc 12, not in 9) + -Wctor-dtor-privacy + -Wdangling-else + # -Wdangling-pointer # (gcc 12, not in 11) + # -Wdangling-reference # (gcc 13, not in 12) + -Wdate-time + -Wdelete-incomplete -Wdelete-non-virtual-dtor + -Wdeprecated + #-Wdeprecated-copy -Wdeprecated-copy-dtor # (gcc 11, not in gcc 8) + -Wdeprecated-declarations + # -Wdeprecated-enum-enum-conversion -Wdeprecated-enum-float-conversion # (gcc 11, not in gcc 10) + -Wdisabled-optimization + -Wdiv-by-zero -Wdouble-promotion + -Wduplicated-branches -Wduplicated-cond + # -Weffc++ # (doesn't allow some advanced techniques, such as CRTP) + -Wempty-body -Wendif-labels + -Wenum-compare + # -Wenum-conversion # (gcc 11, not in 10) + -Wexpansion-to-defined + # -Wexceptions # (gcc 11, not in 10) + # -Wextra-semi # (gcc 8, not in 7) + -Wfloat-conversion -Wfloat-equal + -Wformat=2 + -Wformat-contains-nul # (gcc 12, not in 11) + # -Wformat-diag # (gcc 10, not in 9) + -Wformat-extra-args -Wformat-nonliteral + -Wformat-overflow=1 + -Wformat-security -Wformat-signedness -Wformat-truncation -Wformat-y2k -Wformat-zero-length + -Wframe-address # -Wframe-larger-than= + -Wfree-nonheap-object -Whsa + # -Wif-not-aligned # (gcc 8, not in 7) + -Wignored-attributes # -Wignored-qualifiers + -Wno-ignored-qualifiers + -Wimplicit-fallthrough #=3 # -Wimplicit-fallthrough=<0,5> + # -Winaccessible-base # (gcc 12, not in 11) + # -Winfinite-recursion # (gcc 12, not in 9) + -Winherited-variadic-ctor + #-Winit-list-lifetime # (gcc 12, not in 11) + -Winit-self + -Wno-inline # not all inline attempts can be successful + -Wint-in-bool-context -Wint-to-pointer-cast + # -Winterference-size # (gcc 12, not in 11) + # -Winvalid-imported-macros # (gcc 11, not in gcc 10) + -Winvalid-memory-model -Winvalid-offsetof -Winvalid-pch + # -Wlarger-than= # (disallow large objects types? in executable) + -Wliteral-suffix + -Wlogical-not-parentheses -Wlogical-op + # -Wlong-long # (C++98 warning) + -Wlto-type-mismatch -Wmain -Wmaybe-uninitialized + -Wmemset-elt-size -Wmemset-transposed-args + -Wmisleading-indentation + # -Wmismatched-dealloc -Wmismatched-new-delete # (gcc 11, not in gcc 10) + # -Wmismatched-tags # (gcc 11, not in 9) + # -Wmissing-attributes # (gcc 8, not in 8) + -Wmissing-braces -Wmissing-declarations -Wmissing-field-initializers -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn + #-Wmissing-profile # (gcc 11, not in gcc 8) + # -Wmissing-requires -Wmissing-template-keyword # (gcc 12, not in 11) + -Wmultichar + -Wno-multiple-inheritance # (disallows composition by inheritance) + # -Wmultistatement-macros # (gcc 8, not in 7) + -Wno-namespaces # (disallows use of namespaces, seems a C-tool) + -Wnarrowing + # -Wno-alloc-size-larger-than= -Wframe-larger-than= -Wno-larger-than -Wstack-usage= # (gcc 112, not in 11) + -Wnoexcept -Wnoexcept-type + -Wnon-template-friend -Wnon-virtual-dtor + -Wnonnull -Wnonnull-compare + -Wnormalized #=nfc -Wnormalized=[none|id|nfc|nfkc] + -Wnull-dereference + -Wodr + -Wold-style-cast # TODO(correaa) fix this + # -Wopenacc-parallelism # (gcc 12, not in 11) + -Wopenmp-simd -Woverflow + -Woverlength-strings -Woverloaded-virtual + -Wpacked -Wpacked-bitfield-compat + # -Wpacked-not-aligned # (gcc 8, not 7) + # -Wpadded # (disallows structs that need padding for alignment) + -Wparentheses + #-Wpessimizing-move # (gcc 11, not in gcc 8) + -Wplacement-new #=1 -Wplacement-new=<0,2> + -Wpmf-conversions + -Wpointer-arith -Wpointer-compare + -Wpragmas + #-Wprio-ctor-dtor # (gcc 11, not in gcc 8) + -Wpsabi + # -Wrange-loop-construct # (gcc 11, not in gcc 10) + -Wredundant-decls + #-Wredundant-move # (gcc 11, not in gcc 8) + # -Wredundant-tags # (gcc 11, not in gcc 9) + -Wregister + -Wreorder # (gcc 12, not in 11) + -Wreturn-local-addr -Wreturn-type + -Wrestrict -Wreorder + -Wscalar-storage-order -Wsequence-point + -Wshadow -Wshadow-compatible-local -Wshadow-local -Wshadow=compatible-local -Wshadow=local + -Wshift-count-negative -Wshift-count-overflow -Wshift-negative-value -Wshift-overflow #=1 -Wshift-overflow=<0,2> + -Wsign-compare -Wsign-conversion -Wsign-promo + -Wsized-deallocation + -Wsizeof-array-argument + # -Wsizeof-array-div # (gcc 11, not in 10) + -Wsizeof-pointer-memaccess + -Wstack-protector # -Wstack-usage= + -Wstrict-aliasing #=3 # -Wstrict-aliasing=<0,3> + -Wstrict-null-sentinel #=1 -Wstrict-overflow=<0,5> + -Wstrict-overflow=2 + # -Wstrict-overflow=3 # not compatible with Boost.Test + # -Wstrict-overflow=4 # not compatible with stl heap + # -Wstrict-overflow=5 # not compatible with Boost.Test + # -Wstring-compare # (gcc 11, not in 9) + -Wstringop-overflow #=2 -Wstringop-overflow=<0,4> + # -Wstringop-overread # (gcc 11, not in 10) + # -Wstringop-truncation # (gcc 8, not in 7) + -Wsubobject-linkage + # -Wsuggest-attribute=cold # (gcc 12, not in 11, not in 7) + -Wsuggest-attribute=const -Wsuggest-attribute=format + # -Wsuggest-attribute=malloc # (gcc 8, not in 7) + -Wsuggest-attribute=noreturn # -Wsuggest-attribute=pure # (false positive in gcc 8.5.0 RedHat) + -Wsuggest-final-methods -Wsuggest-final-types + -Wsuggest-override # (gcc 12, not in gcc 11) + -Wswitch -Wswitch-bool -Wswitch-default -Wswitch-enum + # -Wswitch-outside-range # (gcc 11, not in gcc 9) + -Wswitch-unreachable + -Wsync-nand -Wsynth + -Wno-system-headers # (expects system headers to be warning-compliant which they are not) + -Wtautological-compare + -Wno-templates # (disallows templates, C-tool) + -Wterminate # (gcc 12, not in 11) + -Wtrampolines -Wtrigraphs + # -Wtrivial-auto-var-init # (gcc 12, not in 11) + # -Wtsan # (gcc 11, not in 10) + -Wtype-limits -Wundef -Wuninitialized + -Wno-unknown-pragmas # (see above) -Wunknown-pragmas (other compilers need their own pragmas for their warnings) + -Wunreachable-code -Wunsafe-loop-optimizations + -Wunused -Wunused-but-set-parameter -Wunused-but-set-variable + -Wunused-const-variable #=2 TODO(correaa) add [[maybe_unused]] to niebloids + -Wunused-function -Wunused-label -Wunused-local-typedefs -Wunused-macros -Wunused-parameter -Wunused-result -Wunused-value -Wunused-variable + # -Wuse-after-free # =<0,3> # (gcc 12, not in 11) + -Wuseless-cast + -Wvarargs -Wvariadic-macros -Wvector-operation-performance + # -Wvexing-parse # (gcc 11, not in gcc 10) + -Wvirtual-inheritance -Wvirtual-move-assign + -Wvla # -Wvla-larger-than= # (gcc 12, not in 11) + # -Wvla-parameter # (gcc 11, not in gcc 10) + # -Wvolatile # (gcc 11, not in gcc 9) + -Wvolatile-register-var + -Wwrite-strings + -Wzero-as-null-pointer-constant + # -Wzero-length-bounds # (gcc 12, not in 11) + > + $<$,$,$>:-Werror -Wall -Wpedantic -Wextra + -Wno-error=\#warnings # -Wno-\#warnings disable this working for a bug in rocm ~5.6 + -WCFString-literal + -WCL4 + -WIndependentClass-attribute + -WNSObject-attribute + -Wabi + -Wabsolute-value + -Wabstract-final-class -Wabstract-vbase-init + -Waddress -Waddress-of-packed-member -Waddress-of-temporary + -Waggregate-return + # -Waix-compat (clang 13, not in clang 11) + # -Walign-mismatch (clang 13, not in clang 11) + # -Walloca (clang 11, not in clang 9) + -Walloca-with-align-alignof + # -Walways-inline-coroutine (clang 15, not in clang 13) + -Wambiguous-delete -Wambiguous-ellipsis -Wambiguous-macro -Wambiguous-member-template + # -Wambiguous-reversed-operator (clang 11, not in clang 9) + -Wanalyzer-incompatible-plugin + # -Wanon-enum-enum-conversion (clang 11, not in clang 9) + -Wanonymous-pack-parens + -Warc -Warc-bridge-casts-disallowed-in-nonarc -Warc-maybe-repeated-use-of-weak -Warc-non-pod-memaccess -Warc-performSelector-leaks -Warc-repeated-use-of-weak -Warc-retain-cycles -Warc-unsafe-retained-assign + # -Wargument-outside-range (clang 15, not in clang 13) + # -Wargument-undefined-behaviour (clang 15, not in clang 13) + -Warray-bounds -Warray-bounds-pointer-arithmetic + -Wasm -Wasm-operand-widths + -Wassign-enum + -Wassume + -Wat-protocol + -Watimport-in-framework-header + # -Watomic-access (clang 15, not in clang 13) + -Watomic-alignment + # -Watomic-implicit-seq-cst (not in clang 7) + -Watomic-memory-ordering -Watomic-properties -Watomic-property-with-user-defined-accessor + -Wattribute-packed-for-bitfield + # -Wattribute-warning (clang 15, not in clang 13) + -Wattributes + -Wauto-disable-vptr-sanitizer -Wauto-import -Wauto-storage-class -Wauto-var-id + -Wavailability + # -Wavr-rtlib-linking-quirks (not in clang 7) + -Wbackend-plugin + -Wbackslash-newline-escape + -Wbad-function-cast + -Wbinary-literal + -Wbind-to-temporary-copy + -Wbinding-in-condition + # -Wbit-int-extension (clang 15, not in clang 13) + -Wbitfield-constant-conversion -Wbitfield-enum-conversion -Wbitfield-width + # -Wbitwise-conditional-parentheses (clang 11, not in clang 9) + # -Wbitwise-instead-of-logical (clang 15, not in clang 13) + -Wbitwise-op-parentheses + -Wblock-capture-autoreleasing + -Wbool-conversion -Wbool-conversions + # -Wbool-operation (clang 11, not in clang 9) + -Wbraced-scalar-init + # -Wbranch-protection (clang 15, not in clang 13) + -Wbridge-cast + # -Wbuiltin-assume-aligned-alignment (clang 11, not in clang 9) + -Wbuiltin-macro-redefined -Wbuiltin-memcpy-chk-size -Wbuiltin-requires-header + # -Wc++-compat + # -Wc++0x-compat -Wc++0x-extensions -Wc++0x-narrowing + # -Wc++11-compat -Wc++11-compat-deprecated-writable-strings -Wc++11-compat-pedantic -Wc++11-compat-reserved-user-defined-literal -Wc++11-extensions -Wc++11-extra-semi -Wc++11-inline-namespace -Wc++11-long-long -Wc++11-narrowing + # -Wc++14-attribute-extensions (clang 15, not in clang 13) + -Wc++14-binary-literal + # -Wc++14-compat -Wc++14-compat-pedantic -Wc++14-extensions + # -Wc++17-attribute-extensions (clang 15, not in clang 13) + # -Wc++17-compat -Wc++17-compat-mangling -Wc++17-compat-pedantic -Wc++17-extensions + # -Wc++1y-extensions + # -Wc++1z-compat -Wc++1z-compat-mangling -Wc++1z-extensions + # -Wc++20-attribute-extensions (clang 15, not in clang 13) + # -Wc++20-compat -Wc++20-compat-pedantic + # -Wc++20-designator -Wc++20-extensions + # -Wc++2a-compat -Wc++2a-compat-pedantic + # -Wc++2a-extensions + # -Wc++2b-extensions + # -Wc++98-c++11-c++14-c++17-compat -Wc++98-c++11-c++14-c++17-compat-pedantic -Wc++98-c++11-c++14-compat -Wc++98-c++11-c++14-compat-pedantic + # -Wc++98-c++11-compat -Wc++98-c++11-compat-binary-literal -Wc++98-c++11-compat-pedantic + # -Wc++98-compat + -Wno-c++98-compat-bind-to-temporary-copy + #-Wc++98-compat-extra-semi -Wc++98-compat-local-type-template-args -Wc++98-compat-pedantic -Wc++98-compat-unnamed-type-template-args + -Wno-c++98-compat-unnamed-type-template-args + # -Wc11-extensions + # -Wc2x-extensions + # -Wc99-compat + # -Wc99-designator -Wc99-extensions + -Wcast-align -Wcast-qual + # -Wcall-to-pure-virtual-from-ctor-dtor (not in clang 7) + # -Wcalled-once-parameter (clang 13, not in clang 11) + -Wcast-align -Wcast-calling-convention + # -Wcast-function-type (clang 13, not in clang 11) + -Wcast-of-sel-type -Wcast-qual -Wcast-qual-unrelated + -Wchar-align -Wchar-subscripts + -Wclang-cl-pch + # -Wclass-conversion (clang 11, not in clang 9) + -Wclass-varargs + # -Wcmse-union-leak (clang 11, not in clang 9) + -Wcomma + -Wcomment -Wcomments + -Wcompare-distinct-pointer-types + # -Wcompletion-handler (clang 13, not in clang 11) + -Wcomplex-component-init + # -Wcompound-token-split -Wcompound-token-split-by-macro -Wcompound-token-split-by-space (clang 13, not in clang 11) + -Wconversion + #-Wconcepts-ts-compat (not working in clang++ 16) + -Wconditional-type-mismatch -Wconditional-uninitialized + -Wconfig-macros + -Wconstant-conversion + # -Wconstant-evaluated (clang 11, not in clang 9) + -Wconstant-logical-operand + -Wconstexpr-not-const + -Wconsumed + -Wconversion -Wconversion-null + -Wcoroutine -Wcoroutine-missing-unhandled-exception + -Wcovered-switch-default + -Wcpp + -Wcstring-format-directive + -Wctor-dtor-privacy + # -Wctu # (not in clang 7) + -Wcuda-compat + -Wcustom-atomic-properties + # -Wcxx-attribute-extension (clang 13, not in clang 11) + -Wdangling -Wdangling-else -Wdangling-field + # -Wdangling-gsl (clang 13, not in clang 9) + -Wdangling-initializer-list + # -Wdarwin-sdk-settings (not in clang 7) + -Wdate-time + -Wdealloc-in-category + -Wdebug-compression-unavailable + -Wdeclaration-after-statement + # -Wdefaulted-function-deleted (not in clang 7) + -Wdelegating-ctor-cycles + # -Wdelete-abstract-non-virtual-dtor (not in clang 7) + -Wdelete-incomplete + # -Wdelete-non-abstract-non-virtual-dtor (not in clang 7) + # -Wdelete-non-virtual-dtor (not in clang 7) + # -Wdelimited-escape-sequence-extension (clang 15, not in clang 13) + -Wdeprecated + # -Wdeprecated-altivec-src-compat (clang 13, not in clang 11) + # -Wdeprecated-anon-enum-enum-conversion -Wdeprecated-array-compare (clang 11, not in clang 9) + -Wdeprecated-attributes + # -Wdeprecated-comma-subscript (clang 11, not in clang 9) + # -Wdeprecated-copy -Wdeprecated-copy-dtor (clang 11, not in clang 9) + # -Wdeprecated-copy-with-dtor (clang 13, not in clang 11) + # -Wdeprecated-copy-with-user-provided-copy (clang 13, not in clang 11) + # -Wdeprecated-copy-with-user-provided-dtor (clang 13, not in clang 11) + # -Wdeprecated-coroutine (clang 15, not in clang 13) + -Wdeprecated-declarations -Wdeprecated-dynamic-exception-spec + # -Wdeprecated-enum-compare -Wdeprecated-enum-compare-conditional -Wdeprecated-enum-enum-conversion (clang 11, not in clang 9) + # -Wdeprecated-enum-float-conversion (clang 11, not in clang 9) + # -Wdeprecated-experimental-coroutine (clang 15, not in clang 13) + -Wdeprecated-implementations -Wdeprecated-increment-bool + # -Wdeprecated-non-prototype (clang 15, not in clang 13) + -Wdeprecated-objc-isa-usage -Wdeprecated-objc-pointer-introspection -Wdeprecated-objc-pointer-introspection-performSelector + # -Wdeprecated-pragma (clang 15, not in clang 13) + -Wdeprecated-register -Wdeprecated-this-capture + # -Wdeprecated-type (clang 15, not in clang 13) + # -Wdeprecated-volatile (clang 11, not in clang 9) + -Wdeprecated-writable-strings + -Wdirect-ivar-access + # -Wdisabled-macro-expansion (Boost.Test) + -Wdisabled-optimization + -Wdiscard-qual + -Wdistributed-object-modifiers + -Wdiv-by-zero + -Wdivision-by-zero + -Wdll-attribute-on-redeclaration -Wdllexport-explicit-instantiation-decl -Wdllimport-static-field-def + -Wdocumentation -Wdocumentation-deprecated-sync -Wdocumentation-html -Wdocumentation-pedantic -Wdocumentation-unknown-command + -Wdollar-in-identifier-extension + -Wdouble-promotion + # -Wdtor-name -Wdtor-typedef (clang 11, not in clang 9) + -Wduplicate-decl-specifier -Wduplicate-enum -Wduplicate-method-arg -Wduplicate-method-match -Wduplicate-protocol + -Wdynamic-class-memaccess -Wdynamic-exception-spec + # -Weffc++ + # -Welaborated-enum-base -Welaborated-enum-class (clang 11, not in clang 9) + -Wembedded-directive + -Wempty-body -Wempty-decomposition + # -Wempty-init-stmt (not in clang 7) + -Wempty-translation-unit + -Wencode-type + -Wendif-labels + -Wenum-compare + # -Wenum-compare-conditional (clang 11, not in clang 9) + -Wenum-compare-switch -Wenum-conversion + # -Wenum-enum-conversion -Wenum-float-conversion (clang 11, not in clang 9) + -Wenum-too-large + -Wexceptions + # -Wexcess-initializers (clang 11, not in clang 9) + -Wexit-time-destructors + -Wexpansion-to-defined + -Wexplicit-initialize-call -Wexplicit-ownership-type + # -Wexport-unnamed (not in clang 7) + # -Wexport-using-directive (not in clang 7) + -Wextern-c-compat -Wextern-initializer + #-Wextra + -Wextra-qualification + # -Wextra-semi (clang 8, not in 7) + # -Wextra-semi-stmt (not in clang 7) + -Wextra-tokens + # -Wfinal-dtor-non-final-class (clang 11, not in clang 9) + # -Wfinal-macro (clang 15, not in clang 13) + # -Wfixed-enum-extension (not in clang 7) + # -Wfixed-point-overflow (clang 11, not in clang 9) + -Wflag-enum + -Wflexible-array-extensions + -Wfloat-conversion -Wfloat-equal + -Wfloat-overflow-conversion -Wfloat-zero-conversion + -Wfor-loop-analysis + -Wformat -Wformat-extra-args + # -Wformat-insufficient-args (clang 13, not in clang 11) + -Wformat-invalid-specifier -Wformat-non-iso -Wformat-nonliteral -Wformat-pedantic -Wformat-security + # -Wformat-type-confusion (clang 11, not in clang 9) + -Wformat-y2k -Wformat-zero-length -Wformat=2 + # -Wfortify-source (not in clang 7) + -Wfour-char-constants + # -Wframe-address (clang 11, not in clang 9) + # -Wframe-larger-than # -Wframe-larger-than= + -Wframework-include-private-from-public + # -Wfree-nonheap-object (clang 13, not in clang 11) + -Wfunction-def-in-objc-container -Wfunction-multiversion + # -Wfuse-ld-path (clang 13, not in clang 11) + # -Wfuture-attribute-extensions (clang 15, not in clang 13) + # -Wfuture-compat + # -Wgcc-compat + # -Wglobal-constructors (Boost.Test) + # -Wglobal-isel (clang 11, not in clang 9) + -Wgnu -Wgnu-alignof-expression -Wgnu-anonymous-struct -Wgnu-array-member-paren-init -Wgnu-auto-type -Wgnu-binary-literal -Wgnu-case-range -Wgnu-complex-integer -Wgnu-compound-literal-initializer -Wgnu-conditional-omitted-operand -Wgnu-designator -Wgnu-empty-initializer -Wgnu-empty-struct -Wgnu-flexible-array-initializer -Wgnu-flexible-array-union-member -Wgnu-folding-constant -Wgnu-imaginary-constant -Wgnu-include-next + # -Wgnu-inline-cpp-without-extern (clang 11, not in clang 9) + -Wgnu-label-as-value + # -Wgnu-null-pointer-arithmetic + # -Wgnu-pointer-arith (clang 15, not in clang 13) + -Wgnu-redeclared-enum -Wgnu-statement-expression -Wgnu-static-float-init -Wgnu-string-literal-operator-template -Wgnu-union-cast -Wgnu-variable-sized-type-not-at-end -Wgnu-zero-line-directive -Wgnu-zero-variadic-macro-arguments + # -Wgpu-maybe-wrong-side (clang 15, not in clang 13) + -Wheader-guard -Wheader-hygiene + # -Whip-only (clang 11, not in clang 9) + -Widiomatic-parentheses + -Wignored-attributes + # -Wignored-availability-without-sdk-settings (clang 13, not in clang 11) + -Wignored-optimization-argument + #-Wignored-pragma-intrinsic + #-Wignored-pragma-optimize + #-Wignored-pragmas (other compilers need pragmas) + -Wno-ignored-qualifiers # turns out that returning const types is not really ignored by the compiler and prevents certain misleading syntaxes + # -Wignored-reference-qualifiers (clang 15, not in clang 13) + -Wimplicit -Wimplicit-atomic-properties + # -Wimplicit-const-int-float-conversion (clang 11, not in clang 9) + -Wimplicit-conversion-floating-point-to-bool -Wimplicit-exception-spec-mismatch -Wimplicit-fallthrough -Wimplicit-fallthrough-per-function + # -Wimplicit-fixed-point-conversion -Wimplicit-float-conversion (not in clang 7) + -Wimplicit-function-declaration -Wimplicit-int + # -Wimplicit-int-conversion (not in clang 7) + # -Wimplicit-int-float-conversion (clang 11, not in clang 9) + -Wimplicit-retain-self -Wimplicitly-unsigned-literal + -Wimport -Wimport-preprocessor-directive-pedantic + -Winaccessible-base + -Winclude-next-absolute-path -Winclude-next-outside-header + -Wincompatible-exception-spec -Wincompatible-function-pointer-types -Wincompatible-library-redeclaration -Wincompatible-ms-struct -Wincompatible-pointer-types -Wincompatible-pointer-types-discards-qualifiers -Wincompatible-property-type -Wincompatible-sysroot -Wincomplete-framework-module-declaration -Wincomplete-implementation -Wincomplete-module + # -Wincomplete-setjmp-declaration (not in clang 7) + -Wincomplete-umbrella + -Winconsistent-dllimport -Winconsistent-missing-destructor-override -Winconsistent-missing-override + -Wincrement-bool + -Winfinite-recursion + -Winit-self + -Winitializer-overrides + -Winjected-class-name + -Winline -Winline-asm + # -Winline-namespace-reopened-noninline (clang 11, not in clang 9) + -Winline-new-delete + -Winstantiation-after-specialization + -Wint-conversion -Wint-conversions + # -Wint-in-bool-context (clang 11, not in clang 9) + -Wint-to-pointer-cast -Wint-to-void-pointer-cast + -Winteger-overflow + # -Winterrupt-service-routine (clang 13, not in clang 11) + -Winvalid-command-line-argument -Winvalid-constexpr -Winvalid-iboutlet -Winvalid-initializer-from-system-header -Winvalid-ios-deployment-target + # -Winvalid-no-builtin-names (clang 11, not in clang 9) + -Winvalid-noreturn -Winvalid-offsetof -Winvalid-or-nonexistent-directory -Winvalid-partial-specialization -Winvalid-pch -Winvalid-pp-token -Winvalid-source-encoding -Winvalid-token-paste + -Wjump-seh-finally + -Wkeyword-compat -Wkeyword-macro + -Wknr-promoted-parameter + -Wlanguage-extension-token + -Wlarge-by-value-copy + -Wliblto + # -Wlinker-warnings (clang 15, not in clang 13) + -Wliteral-conversion -Wliteral-range + # -Wlocal-type-template-args + -Wlogical-not-parentheses -Wlogical-op-parentheses + #-Wlong-long (C++98 warning) + -Wloop-analysis + -Wmacro-redefined + -Wmain -Wmain-return-type + -Wmalformed-warning-check + -Wmany-braces-around-scalar-init + # -Wmax-tokens (clang 11, not in clang 9) + -Wmax-unsigned-zero + -Wmemset-transposed-args -Wmemsize-comparison + -Wmethod-signatures + -Wmicrosoft + # -Wmicrosoft-abstract (clang 13, not in clang 11) + -Wmicrosoft-anon-tag -Wmicrosoft-cast -Wmicrosoft-charize -Wmicrosoft-comment-paste -Wmicrosoft-const-init -Wmicrosoft-cpp-macro -Wmicrosoft-default-arg-redefinition + # -Wmicrosoft-drectve-section (not in clang 7) + -Wmicrosoft-end-of-file -Wmicrosoft-enum-forward-reference -Wmicrosoft-enum-value -Wmicrosoft-exception-spec -Wmicrosoft-exists -Wmicrosoft-explicit-constructor-call -Wmicrosoft-extra-qualification -Wmicrosoft-fixed-enum -Wmicrosoft-flexible-array -Wmicrosoft-goto -Wmicrosoft-inaccessible-base -Wmicrosoft-include -Wmicrosoft-mutable-reference -Wmicrosoft-pure-definition -Wmicrosoft-redeclare-static -Wmicrosoft-sealed + # -Wmicrosoft-static-assert (clang 13, not in clang 11) + -Wmicrosoft-template + # -Wmicrosoft-template-shadow (clang 11, not in clang 9) + -Wmicrosoft-union-member-reference -Wmicrosoft-unqualified-friend -Wmicrosoft-using-decl -Wmicrosoft-void-pseudo-dtor + # -Wmisexpect (clang 15, not in clang 13) + # -Wmisleading-indentation (clang 11, not in clang 9) + -Wmismatched-new-delete -Wmismatched-parameter-types -Wmismatched-return-types -Wmismatched-tags + -Wmissing-braces + # -Wmissing-constinit (clang 11, not in clang 9) + -Wmissing-declarations -Wmissing-exception-spec -Wmissing-field-initializers -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-method-return-type -Wmissing-noescape -Wmissing-noreturn -Wmissing-prototype-for-cc -Wmissing-prototypes -Wmissing-selector-name -Wmissing-sysroot -Wmissing-variable-declarations + # -Wmisspelled-assumption (clang 13, not in clang 11) + # -Rmodule-build + -Wmodule-conflict -Wmodule-file-config-mismatch -Wmodule-file-extension + # -Rmodule-import (not in clang 7) + -Wmodule-import-in-extern-c + # -Rmodule-lock + -Wmodules-ambiguous-internal-linkage -Wmodules-import-nested-redundant + -Wmost + -Wmove + -Wmsvc-include -Wmsvc-not-found + -Wmultichar + -Wmultiple-move-vbase + -Wnon-virtual-dtor + -Wnarrowing + -Wnested-anon-types -Wnested-externs + -Wnew-returns-null + -Wnewline-eof + # -Wnoderef (not in clang 7) + -Wnoexcept-type + # -Wnon-c-typedef-for-linkage (clang 11, not in clang 9) + -Wnon-gcc + -Wnon-literal-null-conversion + -Wnon-modular-include-in-framework-module -Wnon-modular-include-in-module + -Wnon-pod-varargs + # -Wnon-power-of-two-alignment (clang 11, not in clang 9) + -Wnon-virtual-dtor + -Wnonnull + #-Wnonportable-cfstrings + -Wnonportable-include-path + -Wnonportable-system-include-path + -Wnonportable-vector-initialization + -Wnontrivial-memaccess + -Wnsconsumed-mismatch + -Wnsreturns-mismatch + -Wnull-arithmetic -Wnull-character -Wnull-conversion -Wnull-dereference -Wnull-pointer-arithmetic + # -Wnull-pointer-subtraction (clang 13, not in clang 11) + -Wnullability -Wnullability-completeness -Wnullability-completeness-on-arrays -Wnullability-declspec -Wnullability-extension -Wnullability-inferred-on-nested-type + -Wnullable-to-nonnull-conversion + -Wodr + -Wold-style-cast + -Wold-style-definition + -Wopencl-unsupported-rgba + # -Wopenmp (clang 11, not in clang 9) + # -Wopenmp-51-extensions (clang 13, not in clang 11) + -Wopenmp-clauses -Wopenmp-loop-form + # -Wopenmp-mapping (clang 11, not in clang 9) + -Wopenmp-target + -Woption-ignored + -Wordered-compare-function-pointers + -Wout-of-line-declaration -Wout-of-scope-function + -Wover-aligned + -Woverflow + -Woverlength-strings + -Woverloaded-shift-op-parentheses -Woverloaded-virtual + # -Woverride-init (not in clang 7) + -Woverride-module -Woverriding-method-mismatch -Woverriding-t-option + -Wpacked + # -Wpadded (ask for aligment) + -Wparentheses -Wparentheses-equality + -Wpartial-availability + # -Rpass -Rpass-analysis + -Wpass-failed + # -Rpass-missed + -Wpch-date-time + -Wpedantic -Wpedantic-core-features + # -Wpedantic-macros (clang 15, not in clang 13) + -Wpessimizing-move + -Wpointer-arith -Wpointer-bool-conversion + # -Wpointer-compare (clang 11, not in clang 9) + # -Wpointer-integer-compare (not in clang 7) + -Wpointer-sign + # -Wpointer-to-enum-cast (clang 11, not in clang 9) + -Wpointer-to-int-cast -Wpointer-type-mismatch + # -Wpoison-system-directories (clang 11, not in clang 9) + # -Wpotentially-direct-selector (clang 11, not in clang 9) + -Wpotentially-evaluated-expression + # -Wpragma-clang-attribute -Wpragma-once-outside-header -Wpragma-pack -Wpragma-pack-suspicious-include -Wpragma-system-header-outside-header -Wpragmas + # -Wpre-c++14-compat -Wpre-c++14-compat-pedantic + # -Wpre-c++17-compat -Wpre-c++17-compat-pedantic + # -Wpre-c++20-compat -Wpre-c++20-compat-pedantic -Wpre-c++2b-compat -Wpre-c++2b-compat-pedantic (clang 13, not in clang 11) + # -Wpre-c2x-compat -Wpre-c2x-compat-pedantic (clang 13, not in clang 11) + # -Wpre-openmp-51-compat (clang 13, not in clang 11) + -Wpredefined-identifier-outside-function + -Wprivate-extern -Wprivate-header -Wprivate-module + -Wprofile-instr-missing -Wprofile-instr-out-of-date -Wprofile-instr-unprofiled + -Wproperty-access-dot-syntax -Wproperty-attribute-mismatch + -Wprotocol -Wprotocol-property-synthesis-ambiguity + # -Wpsabi (clang 11, not in clang 9) + -Wqualified-void-return-type + -Wquoted-include-in-framework-header + # -Wrange-loop-analysis (this check is overzealous in clang 9) + -Wrange-loop-analysis + # -Wrange-loop-bind-reference -Wrange-loop-construct (clang 11, not in clang 9) + -Wreadonly-iboutlet-property + -Wreceiver-expr -Wreceiver-forward-class + -Wredeclared-class-member + # -Wredundant-consteval-if (clang 15, not in clang 13) + -Wredundant-decls -Wredundant-move -Wredundant-parens + -Wregister + -Wreinterpret-base-class + -Rremark-backend-plugin + -Wreorder + # -Wreorder-ctor -Wreorder-init-list (clang 11, not in clang 9) + -Wrequires-super-attribute + -Wreserved-id-macro + # -Wreserved-identifier -Wreserved-macro-identifier (clang 13, not in clang 11) + -Wreserved-user-defined-literal + # -Wrestrict-expansion (clang 15, not in clang 13) + -Wretained-language-linkage + -Wreturn-stack-address -Wreturn-std-move -Wreturn-type -Wreturn-type-c-linkage + # -Wrewrite-not-bool (clang 11, not in clang 9) + # -Rround-trip-cc1-args (clang 13, not in clang 11) + # -Wrtti (clang 13, not in clang 11) + -Wshadow + -Wold-style-cast + -Woverloaded-virtual + -Rsanitize-address + # -Rsearch-path-usage + -Wsection + -Wselector -Wselector-type-mismatch + -Wself-assign -Wself-assign-field -Wself-assign-overloaded + -Wself-move + -Wsemicolon-before-method-body + -Wsentinel + -Wsequence-point + -Wserialized-diagnostics + -Wshadow -Wshadow-all -Wshadow-field -Wshadow-field-in-constructor -Wshadow-field-in-constructor-modified -Wshadow-ivar -Wshadow-uncaptured-local + -Wshift-count-negative -Wshift-count-overflow -Wshift-negative-value -Wshift-op-parentheses -Wshift-overflow -Wshift-sign-overflow + -Wshorten-64-to-32 + -Wsign-compare -Wsign-conversion -Wsign-promo + -Wsigned-enum-bitfield + # -Wsigned-unsigned-wchar (clang 11, not in clang 9) + -Wsizeof-array-argument -Wsizeof-array-decay + # -Wsizeof-array-div (clang 11, not in clang 9) + # -Wsizeof-pointer-div (not in clang 7) + -Wsizeof-pointer-memaccess + -Wslash-u-filename + # -Wslh-asm-goto (clang 11, not in clang 9) + -Wsometimes-uninitialized + # -Wsource-mgr (clang 13, not in clang 11) + -Wsource-uses-openmp + -Wspir-compat + # -Wspirv-compat (clang 15, not in clang 13) + # -Wstack-exhausted (clang 11, not in clang 9) + -Wstack-protector + -Wstatic-float-init -Wstatic-in-inline -Wstatic-inline-explicit-instantiation -Wstatic-local-in-inline -Wstatic-self-init + -Wstdlibcxx-not-found + -Wstrict-aliasing -Wstrict-aliasing=0 -Wstrict-aliasing=1 -Wstrict-aliasing=2 + # -Wstrict-overflow + # -Wstrict-overflow=0 + # -Wstrict-overflow=1 + # -Wstrict-overflow=2 + # -Wstrict-overflow=3 + # -Wstrict-overflow=4 + -Wstrict-overflow=5 + # -Wstrict-potentially-direct-selector (clang 11, not in clang 9) + -Wstrict-prototypes -Wstrict-selector-match + -Wstring-compare + # -Wstring-concatenation (clang 13, not in clang 11) + -Wstring-conversion -Wstring-plus-char -Wstring-plus-int + -Wstrlcpy-strlcat-size + -Wstrncat-size + # -Wsuggest-destructor-override (clang 11, not in clang 9) + # -Wsuggest-override (clang 11, not in clang 9) + -Wsuper-class-method-mismatch + -Wsuspicious-bzero -Wsuspicious-memaccess + # -Wswift-name-attribute (clang 13, not in clang 11) + -Wswitch -Wswitch-bool -Wswitch-default -Wswitch-enum + -Wsync-fetch-and-nand-semantics-changed + -Wsynth + # -Wtarget-clones-mixed-specifiers (clang 15, not in clang 13) + # -Wtautological-bitwise-compare (clang 11, not in clang 9) + -Wtautological-compare -Wtautological-constant-compare -Wtautological-constant-in-range-compare -Wtautological-constant-out-of-range-compare + # -Wtautological-objc-bool-compare (not in clang 7) + -Wtautological-overlap-compare -Wtautological-pointer-compare -Wtautological-type-limit-compare -Wtautological-undefined-compare + # -Wtautological-unsigned-char-zero-compare -Wtautological-unsigned-enum-zero-compare + # -Wtautological-unsigned-zero-compare -Wtautological-value-range-compare (clang 13, not in clang 11) + # -Wtcb-enforcement (clang 13, not in clang 11) + -Wtentative-definition-incomplete-type + -Wthread-safety -Wthread-safety-analysis -Wthread-safety-attributes -Wthread-safety-beta -Wthread-safety-negative -Wthread-safety-precise -Wthread-safety-reference -Wthread-safety-verbose + -Wtrigraphs + -Wtype-limits -Wtype-safety + -Wtypedef-redefinition + -Wtypename-missing + -Wunable-to-open-stats-file + # -Wunaligned-access (clang 15, not in clang 13) + # -Wunaligned-qualifier-implicit-cast (clang 15, not in clang 13) + -Wunavailable-declarations + -Wundeclared-selector + -Wundef + # -Wundef-prefix (clang 11, not in clang 9) + -Wundefined-bool-conversion -Wundefined-func-template -Wundefined-inline -Wundefined-internal -Wundefined-internal-type + -Wno-undefined-reinterpret-cast # needed to cast to c-array + -Wundefined-var-template + # -Wunderaligned-exception-object (not in clang 7) + -Wunevaluated-expression + -Wunguarded-availability -Wunguarded-availability-new + -Wunicode -Wunicode-homoglyph -Wunicode-whitespace + # -Wunicode-zero-width (not in clang 7) + -Wuninitialized + # -Wuninitialized-const-reference (clang 11, not in clang 9) + -Wunknown-argument + # -Wunknown-assumption (clang 13, not in clang 11) + -Wunknown-attributes + # -Wunknown-cuda-version (clang 11, not in clang 9) + -Wunknown-escape-sequence + # -Wunknown-pragmas # (other compilers need their own pragmas) + -Wno-unknown-pragmas # (other compilers need their own pragmas) + # -Wunknown-sanitizers + # -Wunknown-warning-option + -Wunnamed-type-template-args + -Wunneeded-internal-declaration -Wunneeded-member-function + # -Wunqualified-std-cast-call (clang 15, not in clang 13) + -Wunreachable-code -Wunreachable-code-aggressive -Wunreachable-code-break + # -Wunreachable-code-fallthrough (clang 15, not in clang 13) + -Wunreachable-code-loop-increment -Wunreachable-code-return + -Wunsafe-buffer-usage + -Wunsequenced + # -Wunsupported-abi (clang 15, not in clang 13) + -Wunsupported-abs -Wunsupported-availability-guard -Wunsupported-cb -Wunsupported-dll-base-class-template + # -Wunsupported-floating-point-opt (clang 11, not in clang 9) + -Wunsupported-friend -Wunsupported-gpopt -Wunsupported-nan -Wunsupported-target-opt -Wunsupported-visibility + -Wunusable-partial-specialization + -Wunused -Wunused-argument + # -Wunused-but-set-parameter -Wunused-but-set-variable (clang 13, not in clang 11) + -Wunused-command-line-argument -Wunused-comparison -Wunused-const-variable -Wunused-exception-parameter -Wunused-function -Wunused-getter-return-value -Wunused-label -Wunused-lambda-capture -Wunused-local-typedef -Wunused-local-typedefs -Wunused-macros -Wunused-member-function -Wunused-parameter -Wunused-private-field -Wunused-property-ivar -Wunused-result -Wunused-template -Wunused-value -Wunused-variable -Wunused-volatile-lvalue + # -Wused-but-marked-unused (Boost.Test) + -Wuser-defined-literals -Wuser-defined-warnings + -Wvarargs + -Wvariadic-macros + -Wvec-elem-size + -Wvector-conversion -Wvector-conversions + -Wvexing-parse + -Wvisibility + -Wvla -Wvla-extension + # -Wvoid-pointer-to-enum-cast -Wvoid-pointer-to-int-cast (clang 11, not in clang 9) + -Wvoid-ptr-dereference + -Wvolatile-register-var + # -Wwasm-exception-spec (clang 11, not in clang 9) + -Wweak-template-vtables -Wweak-vtables + -Wwritable-strings + -Wwrite-strings + # -Wxor-used-as-pow (clang 11, not in clang 9) + -Wzero-as-null-pointer-constant -Wzero-length-array + > + $<$: -Werror -Wall -Wextra # also IntelLLVM, XL (ibm), XLClang (ibm) + -diag-disable=remark + -diag-error:3846 + -diag-disable=1011 # disables warning missing return at the end of non-void function + -diag-disable=2196 # disables "error #2196: routine is both "inline" and "noinline"" in icpc 2021.5.0 + -wd161 + -Wabi + -Warray-bounds + -Wcast-qual + -Wchar-subscripts + -Wcomment + -Wdeprecated + -Wenum-compare + -Wextra-tokens + -Wformat -Wformat=2 -Wformat-security + -Wic-pointer + -Wnarrowing + -Wreturn-type + -Wnon-virtual-dtor + -Wnonnull + -Wmaybe-uninitialized + -Wmain + -Wmissing-declarations -Wmissing-prototypes + -Wmultichar + -Woverloaded-virtual + -Woverflow + -Wp64 + -Wparentheses + -Wpointer-arith + -Wpointer-sign + -Wreorder + -Wreturn-type + -Wsequence-point + -Wshadow + -Wsign-compare + -Wshorten-64-to-32 + -Wmissing-prototypes + -Wstrict-aliasing + -Wno-unknown-pragmas + -Wstrict-prototypes + -Wtrigraphs + -Wtype-limits + -Wuninitialized + -Wunused -Wunused-but-set-variable -Wunused-function -Wunused-parameter -Wunused-variable + -Wwrite-strings + > + # $<$,$>: + $<$: + -Werror -Wall + -Wcast-qual + -Wformat=2 + -Wshadow + -Wuninitialized + > + $<$: + /WX /W4 /permissive- /volatile:iso # /EHsc /Zc:wchar_t /Zc:forScope /Zc:inline + > + ) + + file( + GLOB TEST_SRCS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + *.cpp + ) + + if(NOT Boost_FOUND) + message(WARNING "Cannot find Boost, Multi library will have a very minimal test. If you want to test the library install Boost.Test, for example please run:\n sudo apt install libboost-test-dev\n sudo dnf install boost-devel") + + add_executable(main main.cpp) - target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) - target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS} ) - target_link_libraries (${TEST_EXE} PRIVATE Boost::unit_test_framework ) + # target_link_libraries(main PRIVATE multi) + target_compile_features (main PUBLIC cxx_std_17) + target_include_directories(main PRIVATE ${PROJECT_SOURCE_DIR}/include) + target_compile_definitions(main PUBLIC BOOST_TEST_MODULE="C++ Unit Tests for Multi main") - target_compile_definitions(${TEST_EXE} PRIVATE "$<$:BOOST_PP_VARIADICS>") + target_compile_definitions(main PRIVATE "$<$:BOOST_PP_VARIADICS>") + + if(NOT DEFINED ENABLE_CIRCLE) + target_compile_options(main PRIVATE ${WARNS}) + else() + target_compile_definitions(main PRIVATE "BOOST_TEST_DYN_LINK=1") + target_compile_options (main PRIVATE -Werror -Wall) + endif() + add_test(NAME main COMMAND $) - if(NOT DEFINED ENABLE_CIRCLE) - target_compile_options( - ${TEST_EXE} - PRIVATE - $<$: - -Xcompiler=-Werror,-Wall,-Wextra,-Wcast-align,-Wcast-qual,-Wdouble-promotion,-Wduplicated-branches,-Wduplicated-cond,-Wformat-truncation,-Wformat=2,-Wlogical-op,-Wmisleading-indentation,-Wno-missing-include-dirs,-Wnon-virtual-dtor,-Wno-missing-declarations,-Wnon-virtual-dtor,-Wnull-dereference,-Woverloaded-virtual,-Wpointer-arith,-Wno-redundant-decls,-Wno-shadow,-Wno-switch-enum,-Wno-unknown-pragmas,-Wtrampolines,-Wuninitialized,-Wunused,-Wunused-but-set-variable,-Wunused-result,-Wno-zero-as-null-pointer-constant - --expt-relaxed-constexpr --extended-lambda --Werror=cross-execution-space-call -Xcudafe=--display_error_number -Xcudafe=--diag_error=incompatible_assignment_operands -Xcudafe=--diag_error=returning_ptr_to_local_variable -Xcudafe=--diag_error=subscript_out_of_range -Xcudafe=--diag_error=used_before_set -Xcudafe=--diag_error=undefined_preproc_id -Xcudafe=--diag_error=implicit_func_decl -Xcudafe=--diag_error=implicit_return_from_non_void_function -Xcudafe=--diag_error=missing_type_specifier - > - $<$,$>,$>>: - -Werror - -Wall - -Wextra # (activates -Wunknown-pragmas) - # -Wextra-semi (gcc 8, not in gcc 7) - -Wpedantic - # -Wabi=13 -Wabi-tag (maybe important when linking with very old libraries) - -Waddress - # -Waddress-of-packed-member (gcc 11, not in gcc 8) - # -Waggregate-return (disallow return classes or structs, seems a C-compatibility warning) - -Waggressive-loop-optimizations - -Walloc-zero # -Walloc-size-larger-than= - -Walloca # -Walloca-larger-than= - # -Wanalyzer-double-fclose -Wanalyzer-double-free -Wanalyzer-exposure-through-output-file -Wanalyzer-file-leak -Wanalyzer-free-of-non-heap -Wanalyzer-malloc-leak (gcc 11, not in gcc 9) - # -Wanalyzer-mismatching-deallocation (gcc 11, not in gcc 10) - # -Wanalyzer-null-argument -Wanalyzer-null-dereference -Wanalyzer-possible-null-argument -Wanalyzer-possible-null-dereference (gcc 11, not in gcc 9) - # -Wanalyzer-shift-count-negative -Wanalyzer-shift-count-overflow (gcc 11, not in gcc 10) - # -Wanalyzer-stale-setjmp-buffer -Wanalyzer-tainted-array-index -Wanalyzer-too-complex -Wanalyzer-unsafe-call-within-signal-handler -Wanalyzer-use-after-free -Wanalyzer-use-of-pointer-in-stale-stack-frame (gcc 11, not in gcc 9) - # -Wanalyzer-write-to-const -Wanalyzer-write-to-string-literal (gcc 11, not in gcc 10) - # -Warith-conversion (gcc 11, not in gcc 9) - -Warray-bounds - # -Warray-parameter # -Warray-bounds=<0,2> -Warray-parameter=<0,2> (gcc 11, not in gcc 10) - # -Wattribute-warning (gcc 9, not in gcc 8) - -Wattributes - -Wbool-compare -Wbool-operation - -Wbuiltin-declaration-mismatch -Wbuiltin-macro-redefined - -Wc++0x-compat -Wc++14-compat -Wc++17-compat -Wc++1z-compat - # -Wc++20-compat -Wc++2a-compat (gcc 11, not in gcc 9) - # -Wcannot-profile (gcc 9, not in gcc 8) - -Wcast-align - # -Wcast-align=strict -Wcast-function-type (gcc 8, not in gcc 7) - -Wcast-qual - # -Wcatch-value # -Wcatch-value=<0, 3> (gcc 8, not in gcc 7) - -Wchar-subscripts -Wclobbered -Wcomment # (same as -Wcomments) - -Wconditionally-supported - -Wconversion -Wconversion-null - -Wcoverage-mismatch -Wcpp - # -Wctad-maybe-unsupported # TODO(correaa) add ctad explicitly as necessary - -Wctor-dtor-privacy - -Wdangling-else -Wdate-time - -Wdelete-incomplete -Wdelete-non-virtual-dtor - -Wdeprecated - # -Wdeprecated-copy -Wdeprecated-copy-dtor (gcc 11, not in gcc 8) - -Wdeprecated-declarations - # -Wdeprecated-enum-enum-conversion -Wdeprecated-enum-float-conversion (gcc 11, not in gcc 10) - -Wdisabled-optimization -Wdiv-by-zero -Wdouble-promotion - -Wduplicated-branches -Wduplicated-cond - # -Weffc++ (doesn't allow some advanced techniques, such as CRTP) - -Wempty-body -Wendif-labels - -Wenum-compare - # -Wenum-conversion (gcc 11, not in gcc 10) - -Wexpansion-to-defined - # -Wexceptions (gcc 11, not in gcc 10) - # -Wextra (see above) - -Wfloat-conversion # -Wfloat-equal (disallows float equality) - -Wformat-truncation -Wformat=2 - # -Wformat-diag (gcc 10, not in gcc 9) - -Wformat-extra-args -Wformat-nonliteral -Wformat-security -Wformat-signedness -Wformat-truncation -Wformat-y2k -Wformat-zero-length # -Wformat-overflow=<0,2> - -Wframe-address # -Wframe-larger-than= - -Wfree-nonheap-object -Whsa - # -Wif-not-aligned (gcc 8, not in gcc 7) - -Wignored-attributes -Wignored-qualifiers - -Wimplicit-fallthrough#=3 # -Wimplicit-fallthrough=<0,5> - -Winit-self - # -Winline - -Wint-in-bool-context -Wint-to-pointer-cast - # -Winvalid-imported-macros (gcc 11, not in gcc 10) - -Winvalid-memory-model -Winvalid-offsetof -Winvalid-pch - -Wignored-qualifiers - # -Wlarger-than= # (disallow large objects types? in executable) - -Wliteral-suffix - -Wlogical-not-parentheses -Wlogical-op - -Wlong-long -Wlto-type-mismatch -Wmain -Wmaybe-uninitialized - -Wmemset-elt-size -Wmemset-transposed-args - -Wmisleading-indentation - # -Wmismatched-dealloc -Wmismatched-new-delete (gcc 11, not in gcc 10) - # -Wmismatched-tags (gcc 11, not in gcc 9) - # -Wmissing-attributes (gcc 8, not in gcc 7) - -Wmissing-braces -Wmissing-declarations -Wmissing-field-initializers -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-noreturn - # -Wmissing-profile (gcc 11, not in gcc 8) - -Wmultichar - # -Wmultiple-inheritance (disallows composition by inheritance) - # -Wmultistatement-macros (gcc 8, not in gcc 7) - # -Wnamespaces (disallows use of namespaces, seems a C-tool) - -Wnarrowing - -Wnoexcept -Wnoexcept-type - -Wnormalized #=nfc -Wnormalized=[none|id|nfc|nfkc] - -Wnon-template-friend - -Wnon-virtual-dtor - -Wnonnull -Wnonnull-compare -Wnull-dereference - -Wodr -Wold-style-cast -Wopenmp-simd -Woverflow - -Woverlength-strings -Woverloaded-virtual - -Wpacked -Wpacked-bitfield-compat - # -Wpacked-not-aligned (gcc 8, not in gcc 7) - # -Wpadded (disallows structs that need padding for alignment) - -Wparentheses - # -Wpedantic (see above) - # -Wpessimizing-move (gcc 11, not in gcc 8) - -Wplacement-new #=1 -Wplacement-new=<0,2> - -Wpmf-conversions - -Wpointer-arith -Wpointer-compare - -Wpragmas - # -Wprio-ctor-dtor (gcc 11, not in gcc 8) - -Wpsabi - # -Wrange-loop-construct (gcc 11, not in gcc 10) - -Wredundant-decls - # -Wredundant-move (gcc 11, not in gcc 8) - # -Wredundant-tags (gcc 11, not in gcc 9) - -Wregister - -Wreturn-local-addr -Wreturn-type - -Wrestrict -Wreorder - -Wscalar-storage-order -Wsequence-point - -Wshadow -Wshadow-compatible-local -Wshadow-local -Wshadow=compatible-local -Wshadow=local - -Wshift-count-negative -Wshift-count-overflow -Wshift-negative-value -Wshift-overflow #=1 -Wshift-overflow=<0,2> - -Wsign-compare -Wsign-conversion -Wsign-promo - -Wsized-deallocation - -Wsizeof-array-argument - # -Wsizeof-array-div (gcc 11, not in gcc 10) - # -Wsizeof-pointer-div (gcc 8, not in gcc 7) - -Wsizeof-pointer-memaccess - -Wstack-protector # -Wstack-usage= - -Wstrict-aliasing #=3 -Wstrict-aliasing=<0,3> - -Wstrict-null-sentinel #=1 -Wstrict-overflow=<0,5> - -Wstrict-overflow #=1 -Wstrict-overflow=<0,5> - # -Wstring-compare (gcc 11, not in gcc 9) - -Wstringop-overflow #=2 -Wstringop-overflow=<0,4> - # -Wstringop-overread (gcc 11, not in gcc 10) - # -Wstringop-truncation (gcc 8, not in gcc 7) - -Wsubobject-linkage - -Wsuggest-attribute=const -Wsuggest-attribute=format - # -Wsuggest-attribute=malloc (gcc 8, not in gcc 7) - -Wsuggest-attribute=noreturn - # -Wsuggest-attribute=pure - -Wsuggest-final-methods -Wsuggest-final-types - -Wswitch -Wswitch-bool -Wswitch-default -Wswitch-enum - # -Wswitch-outside-range (gcc 11, not in gcc 9) - -Wswitch-unreachable - -Wsync-nand -Wsynth - # -Wsystem-headers (expects system headers to be warning-compliant which they are not) - -Wtautological-compare - # -Wtemplates (disallows templates, C-tool) - -Wtrampolines -Wtrigraphs - # -Wtsan (gcc 11, not in gcc 10) - -Wtype-limits -Wundef -Wuninitialized - -Wno-unknown-pragmas # (see above) -Wunknown-pragmas (other compilers need their own pragmas for their warnings) - -Wunreachable-code -Wunsafe-loop-optimizations - -Wunused -Wunused-but-set-variable - # -Wunused-const-variable #=2 TODO(correaa) add [[maybe_unused]] to niebloids - -Wunused-function -Wunused-label -Wunused-local-typedefs -Wunused-macros -Wunused-parameter -Wunused-result -Wunused-value -Wunused-variable - -Wuseless-cast - -Wvarargs -Wvariadic-macros -Wvector-operation-performance - # -Wvexing-parse (gcc 11, not in gcc 10) - -Wvirtual-inheritance -Wvirtual-move-assign - -Wvla - # -Wvla-parameter (gcc 11, not in gcc 10) - # -Wvolatile (gcc 11, not in gcc 9) - -Wvolatile-register-var - -Wwrite-strings - -Wzero-as-null-pointer-constant - > - $<$,$,$>: - -Werror - -Wall - -Wextra - -Wcast-align - -Wcast-qual - -Wconversion - -Wdouble-promotion - -Wformat=2 - -Wmissing-include-dirs - -Wnon-virtual-dtor - -Wnull-dereference - -Wmove - -Wold-style-cast - -Woverloaded-virtual - -Wpointer-arith - -Wpedantic - -Wredundant-decls - -Wshadow - -Wold-style-cast - -Woverloaded-virtual - -Wpedantic - -Wsign-compare - -Wsign-conversion - -Wstrict-aliasing - -Wswitch-enum - -Wtype-limits - -Wuninitialized - -Wunused - -Wunused-result - -Wzero-as-null-pointer-constant - #-W - #-W#pragma-messages - #-W#warnings - -WCFString-literal - -WCL4 - -WIndependentClass-attribute - -WNSObject-attribute - -Wabi - -Wabsolute-value - -Wabstract-final-class - -Wabstract-vbase-init - -Waddress - -Waddress-of-packed-member - -Waddress-of-temporary - -Waggregate-return - # -Waix-compat (clang 13, not in clang 11) - # -Walign-mismatch (clang 13, not in clang 11) - #-Wall - # -Walloca (clang 11, not in clang 9) - -Walloca-with-align-alignof - # -Walways-inline-coroutine (clang 15, not in clang 13) - -Wambiguous-delete - -Wambiguous-ellipsis - -Wambiguous-macro - -Wambiguous-member-template - # -Wambiguous-reversed-operator (clang 11, not in clang 9) - -Wanalyzer-incompatible-plugin - # -Wanon-enum-enum-conversion (clang 11, not in clang 9) - -Wanonymous-pack-parens - -Warc - -Warc-bridge-casts-disallowed-in-nonarc - -Warc-maybe-repeated-use-of-weak - -Warc-non-pod-memaccess - -Warc-performSelector-leaks - -Warc-repeated-use-of-weak - -Warc-retain-cycles - -Warc-unsafe-retained-assign - # -Wargument-outside-range (clang 15, not in clang 13) - # -Wargument-undefined-behaviour (clang 15, not in clang 13) - -Warray-bounds - -Warray-bounds-pointer-arithmetic - -Wasm - -Wasm-operand-widths - -Wassign-enum - -Wassume - -Wat-protocol - -Watimport-in-framework-header - # -Watomic-access (clang 15, not in clang 13) - -Watomic-alignment -Watomic-implicit-seq-cst -Watomic-memory-ordering -Watomic-properties -Watomic-property-with-user-defined-accessor - -Wattribute-packed-for-bitfield - # -Wattribute-warning (clang 15, not in clang 13) - -Wattributes - -Wauto-disable-vptr-sanitizer -Wauto-import -Wauto-storage-class -Wauto-var-id - -Wavailability - -Wavr-rtlib-linking-quirks - -Wbackend-plugin - -Wbackslash-newline-escape - -Wbad-function-cast - -Wbinary-literal - -Wbind-to-temporary-copy - -Wbinding-in-condition - # -Wbit-int-extension (clang 15, not in clang 13) - -Wbitfield-constant-conversion -Wbitfield-enum-conversion -Wbitfield-width - # -Wbitwise-conditional-parentheses (clang 11, not in clang 9) - # -Wbitwise-instead-of-logical (clang 15, not in clang 13) - -Wbitwise-op-parentheses - -Wblock-capture-autoreleasing - -Wbool-conversion -Wbool-conversions - # -Wbool-operation (clang 11, not in clang 9) - -Wbraced-scalar-init - # -Wbranch-protection (clang 15, not in clang 13) - -Wbridge-cast - # -Wbuiltin-assume-aligned-alignment (clang 11, not in clang 9) - -Wbuiltin-macro-redefined -Wbuiltin-memcpy-chk-size -Wbuiltin-requires-header - # -Wc++-compat - # -Wc++0x-compat - # -Wc++0x-extensions - # -Wc++0x-narrowing - # -Wc++11-compat - # -Wc++11-compat-deprecated-writable-strings - # -Wc++11-compat-pedantic - # -Wc++11-compat-reserved-user-defined-literal - # -Wc++11-extensions - # -Wc++11-extra-semi - # -Wc++11-inline-namespace - # -Wc++11-long-long - # -Wc++11-narrowing - # -Wc++14-attribute-extensions (clang 15, not in clang 13) - -Wc++14-binary-literal - # -Wc++14-compat - # -Wc++14-compat-pedantic - # -Wc++14-extensions - # -Wc++17-attribute-extensions (clang 15, not in clang 13) - # -Wc++17-compat - # -Wc++17-compat-mangling - # -Wc++17-compat-pedantic - # -Wc++17-extensions - # -Wc++1y-extensions - # -Wc++1z-compat - # -Wc++1z-compat-mangling - # -Wc++1z-extensions - # -Wc++20-attribute-extensions (clang 15, not in clang 13) - # -Wc++20-compat - # -Wc++20-compat-pedantic - # -Wc++20-designator - # -Wc++20-extensions - # -Wc++2a-compat - # -Wc++2a-compat-pedantic - # -Wc++2a-extensions - # -Wc++2b-extensions - # -Wc++98-c++11-c++14-c++17-compat - # -Wc++98-c++11-c++14-c++17-compat-pedantic - # -Wc++98-c++11-c++14-compat - # -Wc++98-c++11-c++14-compat-pedantic - # -Wc++98-c++11-compat - # -Wc++98-c++11-compat-binary-literal - # -Wc++98-c++11-compat-pedantic - # -Wc++98-compat - -Wno-c++98-compat-bind-to-temporary-copy - #-Wc++98-compat-extra-semi - #-Wc++98-compat-local-type-template-args - #-Wc++98-compat-pedantic - #-Wc++98-compat-unnamed-type-template-args - # -Wc11-extensions - # -Wc2x-extensions - # -Wc99-compat - # -Wc99-designator - # -Wc99-extensions - -Wcall-to-pure-virtual-from-ctor-dtor - # -Wcalled-once-parameter (clang 13, not in clang 11) - -Wcast-align -Wcast-calling-convention - # -Wcast-function-type (clang 13, not in clang 11) - -Wcast-of-sel-type -Wcast-qual -Wcast-qual-unrelated - -Wchar-align - -Wchar-subscripts - -Wclang-cl-pch - # -Wclass-conversion (clang 11, not in clang 9) - -Wclass-varargs - # -Wcmse-union-leak (clang 11, not in clang 9) - -Wcomma - -Wcomment -Wcomments - -Wcompare-distinct-pointer-types - # -Wcompletion-handler (clang 13, not in clang 11) - -Wcomplex-component-init - # -Wcompound-token-split (clang 13, not in clang 11) - # -Wcompound-token-split-by-macro (clang 13, not in clang 11) - # -Wcompound-token-split-by-space (clang 13, not in clang 11) - -Wconcepts-ts-compat - -Wconditional-type-mismatch -Wconditional-uninitialized - -Wconfig-macros - -Wconstant-conversion - # -Wconstant-evaluated (clang 11, not in clang 9) - -Wconstant-logical-operand - -Wconstexpr-not-const - -Wconsumed - -Wconversion -Wconversion-null - -Wcoroutine -Wcoroutine-missing-unhandled-exception - -Wcovered-switch-default - -Wcpp - -Wcstring-format-directive - -Wno-ctad-maybe-unsupported # TODO(correeaa) activate - -Wctor-dtor-privacy - -Wctu - -Wcuda-compat - -Wcustom-atomic-properties - # -Wcxx-attribute-extension (clang 13, not in clang 11) - -Wdangling -Wdangling-else -Wdangling-field - # -Wdangling-gsl (clang 13, not in clang 9) - -Wdangling-initializer-list - -Wdarwin-sdk-settings - -Wdate-time - -Wdealloc-in-category - -Wdebug-compression-unavailable - -Wdeclaration-after-statement - -Wdefaulted-function-deleted - -Wdelegating-ctor-cycles - -Wdelete-abstract-non-virtual-dtor -Wdelete-incomplete -Wdelete-non-abstract-non-virtual-dtor -Wdelete-non-virtual-dtor - # -Wdelimited-escape-sequence-extension (clang 15, not in clang 13) - -Wdeprecated - # -Wdeprecated-altivec-src-compat (clang 13, not in clang 11) - # -Wdeprecated-anon-enum-enum-conversion -Wdeprecated-array-compare (clang 11, not in clang 9) - -Wdeprecated-attributes - # -Wdeprecated-comma-subscript (clang 11, not in clang 9) - # -Wdeprecated-copy -Wdeprecated-copy-dtor (clang 11, not in clang 9) - # -Wdeprecated-copy-with-dtor (clang 13, not in clang 11) - # -Wdeprecated-copy-with-user-provided-copy (clang 13, not in clang 11) - # -Wdeprecated-copy-with-user-provided-dtor (clang 13, not in clang 11) - # -Wdeprecated-coroutine (clang 15, not in clang 13) - -Wdeprecated-declarations -Wdeprecated-dynamic-exception-spec - # -Wdeprecated-enum-compare -Wdeprecated-enum-compare-conditional -Wdeprecated-enum-enum-conversion (clang 11, not in clang 9) - # -Wdeprecated-enum-float-conversion (clang 11, not in clang 9) - # -Wdeprecated-experimental-coroutine (clang 15, not in clang 13) - -Wdeprecated-implementations -Wdeprecated-increment-bool - # -Wdeprecated-non-prototype (clang 15, not in clang 13) - -Wdeprecated-objc-isa-usage -Wdeprecated-objc-pointer-introspection -Wdeprecated-objc-pointer-introspection-performSelector - # -Wdeprecated-pragma (clang 15, not in clang 13) - -Wdeprecated-register -Wdeprecated-this-capture - # -Wdeprecated-type (clang 15, not in clang 13) - # -Wdeprecated-volatile (clang 11, not in clang 9) - -Wdeprecated-writable-strings - -Wdirect-ivar-access - # -Wdisabled-macro-expansion (Boost.Test) - -Wdisabled-optimization - -Wdiscard-qual - -Wdistributed-object-modifiers - -Wdiv-by-zero - -Wdivision-by-zero - -Wdll-attribute-on-redeclaration -Wdllexport-explicit-instantiation-decl -Wdllimport-static-field-def - -Wdocumentation -Wdocumentation-deprecated-sync -Wdocumentation-html -Wdocumentation-pedantic -Wdocumentation-unknown-command - -Wdollar-in-identifier-extension - -Wdouble-promotion - # -Wdtor-name -Wdtor-typedef (clang 11, not in clang 9) - -Wduplicate-decl-specifier -Wduplicate-enum -Wduplicate-method-arg -Wduplicate-method-match -Wduplicate-protocol - -Wdynamic-class-memaccess -Wdynamic-exception-spec - # -Weffc++ - # -Welaborated-enum-base -Welaborated-enum-class (clang 11, not in clang 9) - -Wembedded-directive - -Wempty-body -Wempty-decomposition -Wempty-init-stmt -Wempty-translation-unit - -Wencode-type - -Wendif-labels - -Wenum-compare - # -Wenum-compare-conditional (clang 11, not in clang 9) - -Wenum-compare-switch -Wenum-conversion - # -Wenum-enum-conversion -Wenum-float-conversion (clang 11, not in clang 9) - -Wenum-too-large - -Wexceptions - # -Wexcess-initializers (clang 11, not in clang 9) - -Wexit-time-destructors - -Wexpansion-to-defined - -Wexplicit-initialize-call -Wexplicit-ownership-type - -Wexport-unnamed -Wexport-using-directive - -Wextern-c-compat -Wextern-initializer - #-Wextra - -Wextra-qualification -Wextra-semi -Wextra-semi-stmt -Wextra-tokens - # -Wfinal-dtor-non-final-class (clang 11, not in clang 9) - # -Wfinal-macro (clang 15, not in clang 13) - -Wfixed-enum-extension - # -Wfixed-point-overflow (clang 11, not in clang 9) - -Wflag-enum - -Wflexible-array-extensions - -Wfloat-conversion # -Wfloat-equal - -Wfloat-overflow-conversion -Wfloat-zero-conversion - -Wfor-loop-analysis - -Wformat -Wformat-extra-args - # -Wformat-insufficient-args (clang 13, not in clang 11) - -Wformat-invalid-specifier -Wformat-non-iso -Wformat-nonliteral -Wformat-pedantic -Wformat-security - # -Wformat-type-confusion (clang 11, not in clang 9) - -Wformat-y2k -Wformat-zero-length - -Wformat=2 - -Wfortify-source - -Wfour-char-constants - # -Wframe-address (clang 11, not in clang 9) - # -Wframe-larger-than # -Wframe-larger-than= - -Wframework-include-private-from-public - # -Wfree-nonheap-object (clang 13, not in clang 11) - -Wfunction-def-in-objc-container -Wfunction-multiversion - # -Wfuse-ld-path (clang 13, not in clang 11) - # -Wfuture-attribute-extensions (clang 15, not in clang 13) - # -Wfuture-compat - # -Wgcc-compat - # -Wglobal-constructors (Boost.Test) - # -Wglobal-isel (clang 11, not in clang 9) - -Wgnu -Wgnu-alignof-expression -Wgnu-anonymous-struct -Wgnu-array-member-paren-init -Wgnu-auto-type -Wgnu-binary-literal -Wgnu-case-range -Wgnu-complex-integer -Wgnu-compound-literal-initializer -Wgnu-conditional-omitted-operand -Wgnu-designator -Wgnu-empty-initializer -Wgnu-empty-struct -Wgnu-flexible-array-initializer -Wgnu-flexible-array-union-member -Wgnu-folding-constant -Wgnu-imaginary-constant -Wgnu-include-next - # -Wgnu-inline-cpp-without-extern (clang 11, not in clang 9) - -Wgnu-label-as-value - # -Wgnu-null-pointer-arithmetic - # -Wgnu-pointer-arith (clang 15, not in clang 13) - -Wgnu-redeclared-enum -Wgnu-statement-expression -Wgnu-static-float-init -Wgnu-string-literal-operator-template -Wgnu-union-cast -Wgnu-variable-sized-type-not-at-end -Wgnu-zero-line-directive -Wgnu-zero-variadic-macro-arguments - # -Wgpu-maybe-wrong-side (clang 15, not in clang 13) - -Wheader-guard -Wheader-hygiene - # -Whip-only (clang 11, not in clang 9) - -Widiomatic-parentheses - -Wignored-attributes - # -Wignored-availability-without-sdk-settings (clang 13, not in clang 11) - -Wignored-optimization-argument - #-Wignored-pragma-intrinsic - #-Wignored-pragma-optimize - #-Wignored-pragmas (other compilers need pragmas) - -Wignored-qualifiers - # -Wignored-reference-qualifiers (clang 15, not in clang 13) - -Wimplicit -Wimplicit-atomic-properties - # -Wimplicit-const-int-float-conversion (clang 11, not in clang 9) - -Wimplicit-conversion-floating-point-to-bool -Wimplicit-exception-spec-mismatch -Wimplicit-fallthrough -Wimplicit-fallthrough-per-function -Wimplicit-fixed-point-conversion -Wimplicit-float-conversion -Wimplicit-function-declaration -Wimplicit-int -Wimplicit-int-conversion - # -Wimplicit-int-float-conversion (clang 11, not in clang 9) - -Wimplicit-retain-self -Wimplicitly-unsigned-literal - -Wimport -Wimport-preprocessor-directive-pedantic - -Winaccessible-base - -Winclude-next-absolute-path -Winclude-next-outside-header - -Wincompatible-exception-spec -Wincompatible-function-pointer-types -Wincompatible-library-redeclaration -Wincompatible-ms-struct -Wincompatible-pointer-types -Wincompatible-pointer-types-discards-qualifiers -Wincompatible-property-type -Wincompatible-sysroot -Wincomplete-framework-module-declaration -Wincomplete-implementation -Wincomplete-module -Wincomplete-setjmp-declaration -Wincomplete-umbrella - -Winconsistent-dllimport -Winconsistent-missing-destructor-override -Winconsistent-missing-override - -Wincrement-bool - -Winfinite-recursion - -Winit-self - -Winitializer-overrides - -Winjected-class-name - -Winline -Winline-asm - # -Winline-namespace-reopened-noninline (clang 11, not in clang 9) - -Winline-new-delete - -Winstantiation-after-specialization - -Wint-conversion -Wint-conversions - # -Wint-in-bool-context (clang 11, not in clang 9) - -Wint-to-pointer-cast -Wint-to-void-pointer-cast - -Winteger-overflow - # -Winterrupt-service-routine (clang 13, not in clang 11) - -Winvalid-command-line-argument -Winvalid-constexpr -Winvalid-iboutlet -Winvalid-initializer-from-system-header -Winvalid-ios-deployment-target - # -Winvalid-no-builtin-names (clang 11, not in clang 9) - -Winvalid-noreturn -Winvalid-offsetof -Winvalid-or-nonexistent-directory -Winvalid-partial-specialization -Winvalid-pch -Winvalid-pp-token -Winvalid-source-encoding -Winvalid-token-paste - -Wjump-seh-finally - -Wkeyword-compat -Wkeyword-macro - -Wknr-promoted-parameter - -Wlanguage-extension-token - -Wlarge-by-value-copy - -Wliblto - # -Wlinker-warnings (clang 15, not in clang 13) - -Wliteral-conversion -Wliteral-range - # -Wlocal-type-template-args - -Wlogical-not-parentheses -Wlogical-op-parentheses - -Wlong-long - -Wloop-analysis - -Wmacro-redefined - -Wmain -Wmain-return-type - -Wmalformed-warning-check - -Wmany-braces-around-scalar-init - # -Wmax-tokens (clang 11, not in clang 9) - -Wmax-unsigned-zero - -Wmemset-transposed-args -Wmemsize-comparison - -Wmethod-signatures - -Wmicrosoft - # -Wmicrosoft-abstract (clang 13, not in clang 11) - -Wmicrosoft-anon-tag -Wmicrosoft-cast -Wmicrosoft-charize -Wmicrosoft-comment-paste -Wmicrosoft-const-init -Wmicrosoft-cpp-macro -Wmicrosoft-default-arg-redefinition -Wmicrosoft-drectve-section -Wmicrosoft-end-of-file -Wmicrosoft-enum-forward-reference -Wmicrosoft-enum-value -Wmicrosoft-exception-spec -Wmicrosoft-exists -Wmicrosoft-explicit-constructor-call -Wmicrosoft-extra-qualification -Wmicrosoft-fixed-enum -Wmicrosoft-flexible-array -Wmicrosoft-goto -Wmicrosoft-inaccessible-base -Wmicrosoft-include -Wmicrosoft-mutable-reference -Wmicrosoft-pure-definition -Wmicrosoft-redeclare-static -Wmicrosoft-sealed - # -Wmicrosoft-static-assert (clang 13, not in clang 11) - -Wmicrosoft-template - # -Wmicrosoft-template-shadow (clang 11, not in clang 9) - -Wmicrosoft-union-member-reference -Wmicrosoft-unqualified-friend -Wmicrosoft-using-decl -Wmicrosoft-void-pseudo-dtor - # -Wmisexpect (clang 15, not in clang 13) - # -Wmisleading-indentation (clang 11, not in clang 9) - -Wmismatched-new-delete -Wmismatched-parameter-types -Wmismatched-return-types -Wmismatched-tags - -Wmissing-braces - # -Wmissing-constinit (clang 11, not in clang 9) - -Wmissing-declarations -Wmissing-exception-spec -Wmissing-field-initializers -Wmissing-format-attribute -Wmissing-include-dirs -Wmissing-method-return-type -Wmissing-noescape -Wmissing-noreturn -Wmissing-prototype-for-cc -Wmissing-prototypes -Wmissing-selector-name -Wmissing-sysroot -Wmissing-variable-declarations - # -Wmisspelled-assumption (clang 13, not in clang 11) - # -Rmodule-build - -Wmodule-conflict -Wmodule-file-config-mismatch -Wmodule-file-extension -Rmodule-import -Wmodule-import-in-extern-c - # -Rmodule-lock - -Wmodules-ambiguous-internal-linkage -Wmodules-import-nested-redundant - -Wmost - -Wmove - -Wmsvc-include -Wmsvc-not-found - -Wmultichar - -Wmultiple-move-vbase - -Wnarrowing - -Wnested-anon-types -Wnested-externs - -Wnew-returns-null - -Wnewline-eof - -Wnoderef - -Wnoexcept-type - # -Wnon-c-typedef-for-linkage (clang 11, not in clang 9) - -Wnon-gcc - -Wnon-literal-null-conversion - -Wnon-modular-include-in-framework-module -Wnon-modular-include-in-module - -Wnon-pod-varargs - # -Wnon-power-of-two-alignment (clang 11, not in clang 9) - -Wnon-virtual-dtor - -Wnonnull - -Wnonportable-cfstrings -Wnonportable-include-path -Wnonportable-system-include-path - -Wnonportable-vector-initialization - -Wnontrivial-memaccess - -Wnsconsumed-mismatch - -Wnsreturns-mismatch - -Wnull-arithmetic -Wnull-character -Wnull-conversion -Wnull-dereference -Wnull-pointer-arithmetic - # -Wnull-pointer-subtraction (clang 13, not in clang 11) - -Wnullability -Wnullability-completeness -Wnullability-completeness-on-arrays -Wnullability-declspec -Wnullability-extension -Wnullability-inferred-on-nested-type - -Wnullable-to-nonnull-conversion - -Wodr - -Wold-style-cast -Wold-style-definition - -Wopencl-unsupported-rgba - # -Wopenmp (clang 11, not in clang 9) - # -Wopenmp-51-extensions (clang 13, not in clang 11) - -Wopenmp-clauses -Wopenmp-loop-form - # -Wopenmp-mapping (clang 11, not in clang 9) - -Wopenmp-target - -Woption-ignored - -Wordered-compare-function-pointers - -Wout-of-line-declaration -Wout-of-scope-function - -Wover-aligned - -Woverflow - -Woverlength-strings - -Woverloaded-shift-op-parentheses -Woverloaded-virtual - -Woverride-init -Woverride-module -Woverriding-method-mismatch -Woverriding-t-option - -Wpacked - # -Wpadded (ask for aligment) - -Wparentheses -Wparentheses-equality - -Wpartial-availability - # -Rpass -Rpass-analysis - -Wpass-failed - # -Rpass-missed - -Wpch-date-time - -Wpedantic -Wpedantic-core-features - # -Wpedantic-macros (clang 15, not in clang 13) - -Wpessimizing-move - -Wpointer-arith -Wpointer-bool-conversion - # -Wpointer-compare (clang 11, not in clang 9) - -Wpointer-integer-compare -Wpointer-sign - # -Wpointer-to-enum-cast (clang 11, not in clang 9) - -Wpointer-to-int-cast -Wpointer-type-mismatch - # -Wpoison-system-directories (clang 11, not in clang 9) - # -Wpotentially-direct-selector (clang 11, not in clang 9) - -Wpotentially-evaluated-expression - # -Wpragma-clang-attribute - # -Wpragma-once-outside-header - # -Wpragma-pack - # -Wpragma-pack-suspicious-include - # -Wpragma-system-header-outside-header - # -Wpragmas - # -Wpre-c++14-compat -Wpre-c++14-compat-pedantic - # -Wpre-c++17-compat -Wpre-c++17-compat-pedantic - # -Wpre-c++20-compat -Wpre-c++20-compat-pedantic -Wpre-c++2b-compat -Wpre-c++2b-compat-pedantic (clang 13, not in clang 11) - # -Wpre-c2x-compat -Wpre-c2x-compat-pedantic (clang 13, not in clang 11) - # -Wpre-openmp-51-compat (clang 13, not in clang 11) - -Wpredefined-identifier-outside-function - -Wprivate-extern -Wprivate-header -Wprivate-module - -Wprofile-instr-missing -Wprofile-instr-out-of-date -Wprofile-instr-unprofiled - -Wproperty-access-dot-syntax -Wproperty-attribute-mismatch - -Wprotocol -Wprotocol-property-synthesis-ambiguity - # -Wpsabi (clang 11, not in clang 9) - -Wqualified-void-return-type - -Wquoted-include-in-framework-header - # -Wrange-loop-analysis (this check is overzealous in clang 9) - # -Wrange-loop-bind-reference -Wrange-loop-construct (clang 11, not in clang 9) - -Wreadonly-iboutlet-property - -Wreceiver-expr -Wreceiver-forward-class - -Wredeclared-class-member - # -Wredundant-consteval-if (clang 15, not in clang 13) - -Wredundant-decls -Wredundant-move -Wredundant-parens - -Wregister - -Wreinterpret-base-class - -Rremark-backend-plugin - -Wreorder - # -Wreorder-ctor -Wreorder-init-list (clang 11, not in clang 9) - -Wrequires-super-attribute - -Wreserved-id-macro - # -Wreserved-identifier -Wreserved-macro-identifier (clang 13, not in clang 11) - -Wreserved-user-defined-literal - # -Wrestrict-expansion (clang 15, not in clang 13) - -Wretained-language-linkage - -Wreturn-stack-address -Wreturn-std-move -Wreturn-type -Wreturn-type-c-linkage - # -Wrewrite-not-bool (clang 11, not in clang 9) - # -Rround-trip-cc1-args (clang 13, not in clang 11) - # -Wrtti (clang 13, not in clang 11) - -Rsanitize-address - # -Rsearch-path-usage - -Wsection - -Wselector -Wselector-type-mismatch - -Wself-assign -Wself-assign-field -Wself-assign-overloaded - -Wself-move - -Wsemicolon-before-method-body - -Wsentinel - -Wsequence-point - -Wserialized-diagnostics - -Wshadow -Wshadow-all -Wshadow-field -Wshadow-field-in-constructor -Wshadow-field-in-constructor-modified -Wshadow-ivar -Wshadow-uncaptured-local - -Wshift-count-negative -Wshift-count-overflow -Wshift-negative-value -Wshift-op-parentheses -Wshift-overflow -Wshift-sign-overflow - -Wshorten-64-to-32 - -Wsign-compare -Wsign-conversion -Wsign-promo - -Wsigned-enum-bitfield - # -Wsigned-unsigned-wchar (clang 11, not in clang 9) - -Wsizeof-array-argument -Wsizeof-array-decay - # -Wsizeof-array-div (clang 11, not in clang 9) - -Wsizeof-pointer-div -Wsizeof-pointer-memaccess - -Wslash-u-filename - # -Wslh-asm-goto (clang 11, not in clang 9) - -Wsometimes-uninitialized - # -Wsource-mgr (clang 13, not in clang 11) - -Wsource-uses-openmp - -Wspir-compat - # -Wspirv-compat (clang 15, not in clang 13) - # -Wstack-exhausted (clang 11, not in clang 9) - -Wstack-protector - -Wstatic-float-init -Wstatic-in-inline -Wstatic-inline-explicit-instantiation -Wstatic-local-in-inline -Wstatic-self-init - -Wstdlibcxx-not-found - -Wstrict-aliasing -Wstrict-aliasing=0 -Wstrict-aliasing=1 -Wstrict-aliasing=2 - -Wstrict-overflow -Wstrict-overflow=0 -Wstrict-overflow=1 -Wstrict-overflow=2 -Wstrict-overflow=3 -Wstrict-overflow=4 -Wstrict-overflow=5 - # -Wstrict-potentially-direct-selector (clang 11, not in clang 9) - -Wstrict-prototypes -Wstrict-selector-match - -Wstring-compare - # -Wstring-concatenation (clang 13, not in clang 11) - -Wstring-conversion -Wstring-plus-char -Wstring-plus-int - -Wstrlcpy-strlcat-size - -Wstrncat-size - # -Wsuggest-destructor-override (clang 11, not in clang 9) - # -Wsuggest-override (clang 11, not in clang 9) - -Wsuper-class-method-mismatch - -Wsuspicious-bzero -Wsuspicious-memaccess - # -Wswift-name-attribute (clang 13, not in clang 11) - -Wswitch -Wswitch-bool -Wswitch-default -Wswitch-enum - -Wsync-fetch-and-nand-semantics-changed - -Wsynth - # -Wtarget-clones-mixed-specifiers (clang 15, not in clang 13) - # -Wtautological-bitwise-compare (clang 11, not in clang 9) - -Wtautological-compare -Wtautological-constant-compare -Wtautological-constant-in-range-compare -Wtautological-constant-out-of-range-compare -Wtautological-objc-bool-compare -Wtautological-overlap-compare -Wtautological-pointer-compare -Wtautological-type-limit-compare -Wtautological-undefined-compare - # -Wtautological-unsigned-char-zero-compare -Wtautological-unsigned-enum-zero-compare - # -Wtautological-unsigned-zero-compare -Wtautological-value-range-compare (clang 13, not in clang 11) - # -Wtcb-enforcement (clang 13, not in clang 11) - -Wtentative-definition-incomplete-type - -Wthread-safety -Wthread-safety-analysis -Wthread-safety-attributes -Wthread-safety-beta -Wthread-safety-negative -Wthread-safety-precise -Wthread-safety-reference -Wthread-safety-verbose - -Wtrigraphs - -Wtype-limits -Wtype-safety - -Wtypedef-redefinition - -Wtypename-missing - -Wunable-to-open-stats-file - # -Wunaligned-access (clang 15, not in clang 13) - # -Wunaligned-qualifier-implicit-cast (clang 15, not in clang 13) - -Wunavailable-declarations - -Wundeclared-selector - -Wundef - # -Wundef-prefix (clang 11, not in clang 9) - -Wundefined-bool-conversion -Wundefined-func-template -Wundefined-inline -Wundefined-internal -Wundefined-internal-type -Wundefined-reinterpret-cast -Wundefined-var-template -Wunderaligned-exception-object - -Wunevaluated-expression - -Wunguarded-availability -Wunguarded-availability-new - -Wunicode -Wunicode-homoglyph -Wunicode-whitespace -Wunicode-zero-width - -Wuninitialized - # -Wuninitialized-const-reference (clang 11, not in clang 9) - -Wunknown-argument - # -Wunknown-assumption (clang 13, not in clang 11) - -Wunknown-attributes - # -Wunknown-cuda-version (clang 11, not in clang 9) - -Wunknown-escape-sequence - # -Wunknown-pragmas # (other compilers need their own pragmas) - -Wno-unknown-pragmas # (other compilers need their own pragmas) - # -Wunknown-sanitizers - # -Wunknown-warning-option - -Wunnamed-type-template-args - -Wunneeded-internal-declaration -Wunneeded-member-function - -Wno-unknown-pragmas - # -Wunqualified-std-cast-call (clang 15, not in clang 13) - -Wunreachable-code -Wunreachable-code-aggressive -Wunreachable-code-break - # -Wunreachable-code-fallthrough (clang 15, not in clang 13) - -Wunreachable-code-loop-increment -Wunreachable-code-return - -Wunsequenced - # -Wunsupported-abi (clang 15, not in clang 13) - -Wunsupported-abs -Wunsupported-availability-guard -Wunsupported-cb -Wunsupported-dll-base-class-template - # -Wunsupported-floating-point-opt (clang 11, not in clang 9) - -Wunsupported-friend -Wunsupported-gpopt -Wunsupported-nan -Wunsupported-target-opt -Wunsupported-visibility - -Wunusable-partial-specialization - -Wunused -Wunused-argument - # -Wunused-but-set-parameter -Wunused-but-set-variable (clang 13, not in clang 11) - -Wunused-command-line-argument -Wunused-comparison -Wunused-const-variable -Wunused-exception-parameter -Wunused-function -Wunused-getter-return-value -Wunused-label -Wunused-lambda-capture -Wunused-local-typedef -Wunused-local-typedefs -Wunused-macros -Wunused-member-function -Wunused-parameter -Wunused-private-field -Wunused-property-ivar -Wunused-result -Wunused-template -Wunused-value -Wunused-variable -Wunused-volatile-lvalue - # -Wused-but-marked-unused (Boost.Test) - -Wuser-defined-literals -Wuser-defined-warnings - -Wvarargs - -Wvariadic-macros - -Wvec-elem-size - -Wvector-conversion -Wvector-conversions - -Wvexing-parse - -Wvisibility - -Wvla -Wvla-extension - # -Wvoid-pointer-to-enum-cast -Wvoid-pointer-to-int-cast (clang 11, not in clang 9) - -Wvoid-ptr-dereference - -Wvolatile-register-var - # -Wwasm-exception-spec (clang 11, not in clang 9) - -Wweak-template-vtables -Wweak-vtables - -Wwritable-strings - -Wwrite-strings - # -Wxor-used-as-pow (clang 11, not in clang 9) - -Wzero-as-null-pointer-constant -Wzero-length-array - -Wno-c++98-compat-unnamed-type-template-args - -Wno-ignored-qualifiers - -Wno-range-loop-analysis # (this check is overzealous in clang 9) - > - $<$: # also IntelLLVM, XL (ibm), XLClang (ibm) - -Werror - -Wall - -Wextra - -diag-disable=remark - -diag-error:3846 - -diag-disable=1011 # disables warning missing return at the end of non-void function - -diag-disable=2196 # disables "error #2196: routine is both "inline" and "noinline"" in icpc 2021.5.0 - -wd161 - -Wabi - -Warray-bounds - -Wcast-qual - -Wchar-subscripts - -Wcomment - -Wdeprecated - -Wenum-compare - -Wextra-tokens - -Wformat - -Wformat=2 - -Wformat-security - -Wic-pointer - -Wnarrowing - -Wno-return-type - -Wnon-virtual-dtor - -Wnonnull - -Wmaybe-uninitialized - -Wmain - -Wmissing-declarations -Wmissing-prototypes - -Wmultichar - -Woverloaded-virtual - -Woverflow - -Wp64 - -Wparentheses - -Wpointer-arith - -Wpointer-sign - -Wreorder - -Wreturn-type - -Wsequence-point - -Wshadow - -Wsign-compare - -Wshorten-64-to-32 - -Wmissing-prototypes - -Wno-strict-aliasing - -Wno-unknown-pragmas - -Wstrict-prototypes - -Wtrigraphs - -Wtype-limits - -Wuninitialized - -Wunused -Wunused-but-set-variable -Wunused-function -Wunused-parameter -Wunused-variable - -Wwrite-strings - > - $<$,$>: - -Werror - -Wall - -Wcast-qual - -Wformat=2 - -Wshadow - -Wsign-compare - -Wuninitialized - -Wunused - -Wunused-parameter - > - $<$: - /WX - /W4 - > - ) else() - target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_TEST_DYN_LINK=1") - target_compile_options (${TEST_EXE} PRIVATE -Werror -Wall) + + foreach(TEST_FILE ${TEST_SRCS}) + set(TEST_EXE "${TEST_FILE}.x") + add_executable(${TEST_EXE} ${TEST_FILE}) + + if(ENABLE_CUDA) + set_source_files_properties(${TEST_FILE} PROPERTIES LANGUAGE CUDA) + endif() + + # target_include_directories(${TEST_EXE} PRIVATE ${PROJECT_SOURCE_DIR}/include) + # target_include_directories(${TEST_EXE} SYSTEM PRIVATE ${Boost_INCLUDE_DIRS} ) + + target_link_libraries(${TEST_EXE} PRIVATE multi) + target_link_libraries (${TEST_EXE} PRIVATE multi Boost::unit_test_framework ) + if(TBB_FOUND) + target_link_libraries (${TEST_EXE} PRIVATE TBB::tbb) + target_compile_definitions(${TEST_EXE} PUBLIC TBB_FOUND=1) + endif() + + target_compile_definitions(${TEST_EXE} PUBLIC BOOST_TEST_MODULE="C++ Unit Tests for Multi ${TEST_EXE}") + target_compile_definitions(${TEST_EXE} PUBLIC BOOST_TEST_DYN_LINK=1) + target_compile_definitions(${TEST_EXE} PUBLIC BOOST_NO_CXX98_FUNCTION_BASE=1) + + target_compile_definitions(${TEST_EXE} PRIVATE "$<$:BOOST_PP_VARIADICS>") + + if(NOT DEFINED ENABLE_CIRCLE) + target_compile_options(${TEST_EXE} PRIVATE ${WARNS}) + else() + target_compile_definitions(${TEST_EXE} PRIVATE "BOOST_TEST_DYN_LINK=1") + target_compile_options (${TEST_EXE} PRIVATE -Werror -Wall) + endif() + add_test(NAME ${TEST_EXE} COMMAND $) + endforeach() + endif() - add_test(NAME ${TEST_EXE} COMMAND ./${TEST_EXE}) -endforeach() +endif() diff --git a/external_codes/boost_multi/multi/test/Jamfile b/external_codes/boost_multi/multi/test/Jamfile new file mode 100644 index 0000000000..1dc3da8fac --- /dev/null +++ b/external_codes/boost_multi/multi/test/Jamfile @@ -0,0 +1,93 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + + /boost/test//boost_unit_test_framework + + gcc:-Wall + gcc:-Wextra + + clang:-Wall + clang:-Wextra + + msvc:all + + # Additional flags by request + gcc:-Wsign-conversion + gcc:-Wconversion + #gcc:-Wundef # Fix pending in Boost.Test + gcc:-Wold-style-cast + #gcc:-Wduplicated-branches + gcc:-Wfloat-equal + + clang:-Wsign-conversion + clang:-Wconversion + clang:-Wundef + clang:-Wold-style-cast + clang:-Wfloat-equal + + # Clang-Cl likes to blow up builds with these compatibility checks + clang:-Wno-c++98-compat + clang:-Wno-c++98-compat-pedantic + clang:-Wno-c++17-compat + clang:-Wno-c++14-compat + clang:"_CRT_SECURE_NO_WARNINGS" + + msvc:on + #clang:on + gcc:on + + [ requires cxx17_if_constexpr cxx17_structured_bindings cxx17_std_apply ] + ; + +run allocator.cpp ; +run array_cref.cpp ; +run array_fancyref.cpp ; +run array_legacy_c.cpp ; +run array_ptr.cpp ; +run array_ref.cpp ; +run assignments.cpp ; +#run boost_array_concept.cpp ; +run comparisons.cpp ; +run concepts.cpp ; +run constructors.cpp ; +run conversions.cpp ; +run diagonal.cpp ; +run element_access.cpp ; +run element_transformed.cpp ; +#run execution_policy.cpp ; +run fill.cpp ; +run fix_complex.cpp ; +run flatted.cpp ; +run index_range.cpp ; +run initializer_list.cpp ; +run iterator.cpp ; +run layout.cpp ; +run member_array_cast.cpp ; +run minimalistic_ptr.cpp ; +run move.cpp ; +run nico_const_correctness.cpp ; +run one_based.cpp ; +run overload.cpp ; +run partitioned.cpp ; +run pmr.cpp ; +run ranges.cpp ; +run reextent.cpp ; +run reinterpret_array_cast.cpp ; +run reversed.cpp ; +run rotated.cpp ; +run scoped_allocator.cpp ; +run select_column.cpp ; +run sliced.cpp ; +run sort.cpp ; +run static_array_cast.cpp ; +run std_vector_substitutability.cpp ; +run subrange.cpp ; +run transform.cpp ; +run utility.cpp ; +run zero_dimensionality.cpp ; diff --git a/external_codes/boost_multi/multi/test/allocator.cpp b/external_codes/boost_multi/multi/test/allocator.cpp index 98d9a96b33..57c07d00bd 100644 --- a/external_codes/boost_multi/multi/test/allocator.cpp +++ b/external_codes/boost_multi/multi/test/allocator.cpp @@ -1,17 +1,53 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi allocators" -#include +#include -#include "multi/array.hpp" +#include -//#include "../../multi/memory/stack.hpp" //TODO(correaa) test custom allocator +#include -#include +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; +BOOST_AUTO_TEST_CASE(empty_stride) { + multi::array ma; + BOOST_REQUIRE(ma.size() == 0); + BOOST_REQUIRE(ma.stride() != 0); + BOOST_REQUIRE(size(ma) == 0); + + multi::array ma0({0, 0}, 0.0); + BOOST_REQUIRE(ma0.size() == 0); + BOOST_REQUIRE(ma0.stride() != 0); +#ifndef _MSC_VER // doesn't work with msvc 14.3 c++17 permissive mode + BOOST_REQUIRE(size(ma0) == 0); +#endif +} + BOOST_AUTO_TEST_CASE(std_vector_of_arrays) { std::vector> va; std::transform( @@ -20,32 +56,48 @@ BOOST_AUTO_TEST_CASE(std_vector_of_arrays) { [](auto idx){return multi::array({idx, idx}, static_cast(idx));} ); +#ifndef _MSC_VER // doesn't work with msvc 14.3 c++17 permissive mode BOOST_REQUIRE( size(va[0]) == 0 ); BOOST_REQUIRE( size(va[1]) == 1 ); BOOST_REQUIRE( size(va[2]) == 2 ); +#endif + BOOST_REQUIRE( va[1] [0][0] == 1 ); BOOST_REQUIRE( va[2] [0][0] == 2 ); - std::vector> wa = { - multi::array({0, 0}, 0.), - multi::array({1, 1}, 1.), - multi::array({2, 2}, 2.), +#ifndef _MSC_VER // doesn't work with msvc 14.3 c++17 permissive mode + std::vector> const wa = { // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array({0, 0}, 0.0), + multi::array({1, 1}, 1.0), + multi::array({2, 2}, 2.0), + }; +#else + std::vector> const wa = { // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array(multi::extensions_t<2>(0, 0), 0.0), + multi::array(multi::extensions_t<2>(1, 1), 1.0), + multi::array(multi::extensions_t<2>(2, 2), 2.0), }; +#endif + +#ifndef _MSC_VER // doesn't work with msvc 14.3 c++17 permissive mode BOOST_REQUIRE( size(va) == size(wa) ); +#endif BOOST_REQUIRE( va == wa ); - std::vector> ua(3); + std::vector> ua(3, std::allocator>{}); auto iex = multi::iextension(static_cast(ua.size())); std::transform( begin(iex), end(iex), begin(ua), - [](auto idx){return multi::array({idx, idx}, static_cast(idx));} + [](auto idx) {return multi::array({idx, idx}, static_cast(idx));} ); BOOST_REQUIRE( ua == va ); } +// TODO(correaa) make this code work with nvcc compiler (non device function called from device host through adl uninitialized_fill) +#if !(defined(__NVCC__) || defined(__HIP_PLATFORM_NVIDIA__) || defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)) BOOST_AUTO_TEST_CASE(array1d_of_arrays2d) { - multi::array, 1> arr(multi::extensions_t<1>{multi::iextension{10}}, multi::array{}); + multi::array, 1> arr(multi::extensions_t<1>(multi::iextension{10}), multi::array{}); BOOST_REQUIRE( size(arr) == 10 ); std::transform( @@ -56,46 +108,307 @@ BOOST_AUTO_TEST_CASE(array1d_of_arrays2d) { BOOST_REQUIRE( size(arr[0]) == 0 ); BOOST_REQUIRE( size(arr[1]) == 1 ); BOOST_REQUIRE( size(arr[8]) == 8 ); - BOOST_REQUIRE( arr[8][4][4] == 8 ); + BOOST_REQUIRE( arr[8][4][4] == 8.0 ); } BOOST_AUTO_TEST_CASE(array_3d_of_array_2d) { multi::array, 2> AA({10, 20}, multi::array{}); - for(int i = 0; i != 10; ++i) { - for(int j = 0; j != 20; ++j) { - AA[i][j] = multi::array({i+j, i+j, i+j}, 99.); - } - } + std::transform(extension(AA).begin(), extension(AA).end(), AA.begin(), AA.begin(), [](auto idx, auto&& row) -> decltype(row) { + std::transform(extension(row).begin(), extension(row).end(), row.begin(), [idx](auto jdx) { + return multi::array({idx + jdx, idx + jdx, idx + jdx}, 99.0); + }); + return std::forward(row); + }); + + BOOST_REQUIRE( size(AA[9][19]) == 9 + 19 ); + BOOST_REQUIRE( AA[9][19][1][1][1] == 99.0 ); +} + +BOOST_AUTO_TEST_CASE(array_3d_of_array_2d_no_init) { + multi::array, 2> AA({10, 20}); + std::transform(extension(AA).begin(), extension(AA).end(), AA.begin(), AA.begin(), [](auto idx, auto&& row) -> decltype(row) { + std::transform(extension(row).begin(), extension(row).end(), row.begin(), [idx](auto jdx) { + return multi::array({idx + jdx, idx + jdx, idx + jdx}, 99.0); + }); + return std::forward(row); + }); + BOOST_REQUIRE( size(AA[9][19]) == 9 + 19 ); BOOST_REQUIRE( AA[9][19][1][1][1] == 99. ); } +#endif + +BOOST_AUTO_TEST_CASE(const_elements) { + auto ptr = std::make_unique(2.0); +// *ptr = 3.0; // ok, can't assign + BOOST_REQUIRE( *ptr == 2.0 ); + +// multi::array> arr({10, 10}, 99.0); +// BOOST_REQUIRE( arr[1][2] == 99.0 ); +} + +#ifdef BOOST_MULTI_HAS_MEMORY_RESOURCE +BOOST_AUTO_TEST_CASE(pmr) { + std::array buffer = {{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C'}}; + std::pmr::monotonic_buffer_resource pool{std::data(buffer), std::size(buffer)}; + + multi::array> Aarr({2, 2}, 'x', &pool); + Aarr[0][0] = 'x'; Aarr[0][1] = 'y'; + Aarr[1][0] = 'z'; Aarr[1][1] = '&'; + + multi::array> Barr({3, 2}, 'o', &pool); + + BOOST_REQUIRE(( buffer != std::array{{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C'}} )); + +#if defined(__GLIBCXX__) + BOOST_REQUIRE(( buffer == std::array{{'x', 'y', 'z', '&', 'o', 'o', 'o', 'o', 'o', 'o', 'A', 'B', 'C'}} )); +#endif +#if defined(_LIBCPP_VERSION) + BOOST_REQUIRE(( buffer == std::array{{'0', '1', '2', 'o', 'o', 'o', 'o', 'o', 'o', 'x', 'y', 'z', '&'}} )); +#endif + + BOOST_REQUIRE(Aarr[0][0] == 'x'); + BOOST_REQUIRE(Barr[0][0] == 'o'); +} + +BOOST_AUTO_TEST_CASE(pmr2) { + std::array buffer = {{'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X'}}; + std::pmr::monotonic_buffer_resource pool{std::data(buffer), std::size(buffer)}; + +#ifndef _MSC_VER + multi::pmr::array Aarr({2, 2}, 'a', &pool); + multi::pmr::array Barr({3, 2}, 'b', &pool); +#else + multi::pmr::array Aarr(multi::extensions_t<2>{2, 2}, 'a', &pool); + multi::pmr::array Barr(multi::extensions_t<2>{3, 2}, 'b', &pool); +#endif + +#if defined(__GLIBCXX__) + BOOST_REQUIRE(( buffer == std::array{{'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b', 'b', 'X', 'X', 'X'}} )); +#endif +#if defined(_LIBCPP_VERSION) + BOOST_REQUIRE(( buffer == std::array{{'X', 'X', 'X', 'b', 'b', 'b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'}} )); +#endif + + BOOST_REQUIRE(Aarr[0][0] == 'a'); + BOOST_REQUIRE(Barr[0][0] == 'b'); +} + +BOOST_AUTO_TEST_CASE(pmr_double_uninitialized) { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool{static_cast(std::data(buffer)), 12*sizeof(double)}; + + multi::pmr::array Aarr({2, 2}, &pool); + + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + +#if defined(__GLIBCXX__) + BOOST_REQUIRE(Aarr[0][0] == 4.0); +#endif +#if defined(_LIBCPP_VERSION) + BOOST_REQUIRE(Aarr[0][0] == 996.0); +#endif +} +#endif + +BOOST_AUTO_TEST_CASE(static_allocator) { + using T = int; + multi::detail::static_allocator sa{}; + auto* pp = sa.allocate(10); + new(std::next(pp, 8)) T{42}; + BOOST_REQUIRE( *std::next(pp, 8) == 42 ); + // (pp + 8)->~double(); + sa.deallocate(pp, 10); +} + +#if defined( __cpp_constexpr) && (__cpp_constexpr > 202306L) +constexpr auto f() { + std::vector v = {1, 2, 3}; + return v.size(); +} + +BOOST_AUTO_TEST_CASE(constexpr_allocator_vector) { + static_assert(f() == 3); + BOOST_REQUIRE( f() == 3 ); +} + +constexpr auto g() { + multi::array arr = {{4, 5, 6}, {1, 2, 3}, {7, 8, 9}}; + std::sort(arr.begin(), arr.end()); + for(auto it = arr.diagonal().begin(); it != arr.diagonal().end(); ++it) { + *it += 5; + } + auto ret = arr[1][1]; + return ret; +} + +BOOST_AUTO_TEST_CASE(constexpr_allocator) { + constexpr auto gg = g(); + static_assert( gg == 10 ); + BOOST_REQUIRE( gg == 10 ); +} +#endif + +#if !defined(_MSC_VER) // static allocator does not work with MSVC implementation pf vector +BOOST_AUTO_TEST_CASE(static_allocator_on_vector_int) { + std::vector> vv(10, 42); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( vv[3] == 42 ); + + auto ww = vv; + BOOST_REQUIRE( ww[3] == 42 ); -BOOST_AUTO_TEST_CASE(array_3d_with_hint_int) { - multi::array const arr({3, 4}); - multi::array arr_hint({3, 4, 5}, arr.cbase()); + ww[3] = 51; + BOOST_REQUIRE( ww[3] == 51 ); + BOOST_REQUIRE( vv[3] == 42 ); - arr_hint[1][2][3] = 4; - BOOST_REQUIRE( size(arr_hint) == 3 ); - BOOST_REQUIRE( arr_hint[1][2][3] == 4 ); + auto xx = std::move(ww); + BOOST_REQUIRE( ww.empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) + BOOST_REQUIRE( vv[3] == 42 ); + BOOST_REQUIRE( xx[3] == 51 ); - multi::array arr2({3, 4, 5}, 0); - BOOST_REQUIRE( size(arr2) == 3 ); + // swap(xx, vv); + // BOOST_REQUIRE( vv[3] == 51 ); + // BOOST_REQUIRE( xx[3] == 42 ); - multi::array arr3({3, 4, 5}, 99); - BOOST_REQUIRE( size(arr3) == 3 ); + { + std::vector< std::vector> > const VV = {vv, xx, vv}; // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( VV.size() == 3 ); + // swap(VV[0], VV[1]); + // std::sort(VV.begin(), VV.end()); + // BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + // VV.resize(10, xx); + // std::sort(VV.begin(), VV.end()); + // BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + } } -BOOST_AUTO_TEST_CASE(array_3d_with_hint_size_t) { - multi::array const arr({3, 4}); - multi::array arr_hint({3, 4, 5}, arr.cbase()); +BOOST_AUTO_TEST_CASE(static_allocator_on_vector_string) { + std::string const cat = "catcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcatcat"; // NOLINT(fuchsia-default-arguments-calls) + std::string const dog = "dogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdogdog"; // NOLINT(fuchsia-default-arguments-calls) + + std::vector> vv(10, cat); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( vv[3] == cat ); + + auto ww = vv; + BOOST_REQUIRE( ww[3] == cat ); + + ww[3] = dog; + BOOST_REQUIRE( ww[3] == dog ); + BOOST_REQUIRE( vv[3] == cat ); + + auto xx = std::move(ww); + BOOST_REQUIRE( vv[3] == cat ); + BOOST_REQUIRE( xx[3] == dog ); + BOOST_REQUIRE( ww.empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) + + // vv.resize(15); + + // swap(xx, vv); + // BOOST_REQUIRE( vv[3] == dog ); + // BOOST_REQUIRE( xx[3] == cat ); + + { + std::vector< std::vector> > const VV = {vv, xx, vv}; // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( VV.size() == 3 ); + // swap(VV[0], VV[1]); + // std::sort(VV.begin(), VV.end()); + // BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + // VV.resize(10, xx); + // std::sort(VV.begin(), VV.end()); + // BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + } +} +#endif - arr_hint[1][2][3] = 4; - BOOST_REQUIRE( size(arr_hint) == 3 ); - BOOST_REQUIRE( arr_hint[1][2][3] == 4 ); +template +using small_array = multi::static_array>; +// https://godbolt.org/z/d8ozWahna - multi::array arr3({3, 4, 5}, 0UL); - BOOST_REQUIRE( size(arr3) == 3 ); +#if !defined(_MSC_VER) || (_MSC_VER > 193030706) // TODO(correaa) doesn't work on MSVC 14.3 in c++17 mode +BOOST_AUTO_TEST_CASE(small_array_int) { + small_array vv({4, 4}, 42); - multi::array arr4({3, 4, 5}, 99); - BOOST_REQUIRE( size(arr4) == 3 ); + BOOST_REQUIRE( vv[3][3] == 42 ); + + auto ww = vv; + BOOST_REQUIRE( ww[3][3] == 42 ); + BOOST_REQUIRE( ww.base() != vv.base() ); + auto* wwb = ww.base(); + auto* vvb = vv.base(); + + ww[3][3] = 51; + BOOST_REQUIRE( ww[3][3] == 51 ); + BOOST_REQUIRE( vv[3][3] == 42 ); + + swap(ww, vv); + BOOST_REQUIRE( vv[3][3] == 51 ); + BOOST_REQUIRE( ww[3][3] == 42 ); + + BOOST_REQUIRE( ww.base() == wwb ); + BOOST_REQUIRE( vv.base() == vvb ); + + auto xx = std::move(ww); + + BOOST_REQUIRE( vv[3][3] == 51 ); + BOOST_REQUIRE( xx[3][3] == 42 ); + // BOOST_REQUIRE( ww[3][3] == 42 ); + BOOST_REQUIRE( xx.base() != vv.base() ); + // BOOST_REQUIRE( ww.empty() ); + + small_array yy({4, 4}); + yy = vv; + BOOST_REQUIRE( yy == vv ); + +// #ifndef _MSC_VER // TODO(correaa) does not compile in MSVC 1.43 in c++17 mode + yy = std::move(vv); + BOOST_REQUIRE( vv.size() == 4 ); // NOLINT(clang-analyzer-cplusplus.Move,bugprone-use-after-move,hicpp-invalid-access-moved) +// #endif + + { + std::vector< small_array > VV = {vv, xx, vv}; // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( VV.size() == 3 ); + swap(VV[0], VV[1]); + std::sort(VV.begin(), VV.end()); + BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + VV.resize(10, xx); + std::sort(VV.begin(), VV.end()); + BOOST_REQUIRE( std::is_sorted(VV.begin(), VV.end()) ); + } +} +#endif + +BOOST_AUTO_TEST_CASE(props_of_static_allocator) { + { + std::vector vv(20, 11); // NOLINT(fuchsia-default-arguments-calls) + std::vector ww = vv; + BOOST_REQUIRE( ww == vv ); + + ww = vv; + BOOST_REQUIRE( ww == vv ); + + ww = std::move(vv); + BOOST_REQUIRE( vv.size() == 0 ); // NOLINT(readability-container-size-empty,bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) + + std::vector xx(20, 22); // NOLINT(fuchsia-default-arguments-calls) + swap( ww, xx ); + BOOST_REQUIRE( ww == std::vector(20, 22) ); // NOLINT(fuchsia-default-arguments-calls) + } +#if !defined(_MSC_VER) // static_allocator doesn't work with MSVC implementation of vector + { + std::vector> vv(20, 11); // NOLINT(fuchsia-default-arguments-calls) + std::vector> ww = vv; + BOOST_REQUIRE( ww == vv ); + + ww = vv; + BOOST_REQUIRE( ww == vv ); + + ww = std::move(vv); + BOOST_REQUIRE( vv.size() == 0 ); // NOLINT(readability-container-size-empty,bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) + + std::vector> xx(20, 22); // NOLINT(fuchsia-default-arguments-calls) + swap( ww, xx ); + BOOST_REQUIRE(( ww == std::vector>(20, 22) )); // NOLINT(fuchsia-default-arguments-calls) + } +#endif } diff --git a/external_codes/boost_multi/multi/test/array_cref.cpp b/external_codes/boost_multi/multi/test/array_cref.cpp index 4a6fb2c473..fcd64db12b 100644 --- a/external_codes/boost_multi/multi/test/array_cref.cpp +++ b/external_codes/boost_multi/multi/test/array_cref.cpp @@ -1,23 +1,46 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi References" -#include +#include -#include "multi/array_ref.hpp" +#include +#include -#include -#include +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; -using complex = std::complex; BOOST_AUTO_TEST_CASE(array_cref) { + using complex = std::complex; + static_assert( std::is_same::element_type, complex>{}, "!"); static_assert( std::is_same::rebind, complex const*>{}, "!"); - std::vector vec(100, 0.); - std::vector const cvec(100); + std::vector vec(100, 0.); // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + std::vector const cvec(100); // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) multi::array_ref A2D(vec.data(), multi::extensions_t<2>{10, 10}); multi::array_ref B2D(vec.data(), {10, 10}); @@ -26,7 +49,7 @@ BOOST_AUTO_TEST_CASE(array_cref) { BOOST_REQUIRE( &A2D[3][4] == &B2D[3][4] ); - multi::array_ref D2D(cvec.data(), {10, 10}); + multi::array_ref const D2D(cvec.data(), {10, 10}); multi::array_cref F2D( vec.data(), {10, 10}); BOOST_REQUIRE( D2D.layout() == F2D.layout() ); @@ -35,9 +58,64 @@ BOOST_AUTO_TEST_CASE(array_cref) { BOOST_REQUIRE( F2D[7][8] == 3. ); BOOST_REQUIRE( &A2D[7][8] == &F2D[7][8] ); -//#if defined(__cpp_deduction_guides) and not defined(__NVCC__) -// multi::array_ref G2D(dc.data(), {10, 10}); // TODO(correaa) -// BOOST_REQUIRE( G2D == D2D ); -//#endif +// #if defined(__cpp_deduction_guides) and not defined(__NVCC__) +// multi::array_ref G2D(dc.data(), {10, 10}); // TODO(correaa) +// BOOST_REQUIRE( G2D == D2D ); +// #endif +} + +#ifndef _MSC_VER // TODO(correaa) doesn't work on MSVC 14.3 in c++17 mode +BOOST_AUTO_TEST_CASE(arrays_1D_from_carray) { + double a_c_array[] = {1.0, 2.0, 3.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + multi::array an_array_value (a_c_array); // ok, it is a copy + multi::array_cref an_array_const_reference(a_c_array); // ok, it is read only reference + multi::array_ref an_array_reference (a_c_array); // ok, it is a reference + + BOOST_REQUIRE( an_array_value .size() == 3 && an_array_value [1] == 2.0 ); + BOOST_REQUIRE( an_array_const_reference.size() == 3 && an_array_const_reference[1] == 2.0 ); + BOOST_REQUIRE( an_array_reference .size() == 3 && an_array_reference [1] == 2.0 ); +} + +BOOST_AUTO_TEST_CASE(arrays_1D_from_const_carray) { + double const a_c_array[] = {1.0, 2.0, 3.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy types + multi::array an_array_value (a_c_array); // ok, it is a copy + multi::array_cref an_array_const_reference(a_c_array); // ok, it is read only reference +// multi::array_ref an_array_reference (a_c_array); // not ok, c array is const + + BOOST_REQUIRE( an_array_value .size() == 3 && an_array_value [1] == 2.0 ); + BOOST_REQUIRE( an_array_const_reference.size() == 3 && an_array_const_reference[1] == 2.0 ); +// BOOST_REQUIRE( an_array_reference .size() == 3 && an_array_reference [1] == 2. ); } +#endif +BOOST_AUTO_TEST_CASE(arrays_1D_from_explict_init_list) { + std::initializer_list const il = {1.0, 2.0, 3.0}; + multi::array an_array_value (il); // ok, it is a copy + multi::array_cref an_array_const_reference(il); // ok, it is read only +// multi::array_ref an_array_reference ({1., 2., 3.}); // not allowed, the init list elems are const + + BOOST_REQUIRE( an_array_value .size() == 3 && an_array_value [1] == 2.0 ); + BOOST_REQUIRE( an_array_const_reference.size() == 3 && an_array_const_reference[1] == 2.0 ); +// BOOST_REQUIRE( an_array_reference .size() == 3 && an_array_reference [1] == 2. ); +} + +BOOST_AUTO_TEST_CASE(arrays_1D_from_explict_auto_init_list) { + auto il = {1.0, 2.0, 3.0}; + multi::array an_array_value (il); // ok, it is a copy + multi::array_cref an_array_const_reference(il); // ok, it is read only +// multi::array_ref an_array_reference ({1., 2., 3.}); // not allowed, the init list elems are const + + BOOST_REQUIRE( an_array_value .size() == 3 && an_array_value [1] == 2.0 ); + BOOST_REQUIRE( an_array_const_reference.size() == 3 && an_array_const_reference[1] == 2.0 ); +// BOOST_REQUIRE( an_array_reference .size() == 3 && an_array_reference [1] == 2. ); +} + +BOOST_AUTO_TEST_CASE(arrays_1D_from_init_list) { + multi::array an_array_value ({1.0, 2.0, 3.0}); // ok, it is a copy +// multi::array_cref an_array_const_reference({1.0, 2.0, 3.0}); // not ok, constructor disable because memcheck detects use after scope +// multi::array_ref an_array_reference ({1., 2., 3.}); // not allowed, the init list elems are const + + BOOST_REQUIRE( an_array_value .size() == 3 && an_array_value [1] == 2. ); +// BOOST_REQUIRE( an_array_const_reference.size() == 3 && an_array_const_reference[1] == 2.0 ); +// BOOST_REQUIRE( an_array_reference .size() == 3 && an_array_reference [1] == 2. ); +} diff --git a/external_codes/boost_multi/multi/test/array_fancyref.cpp b/external_codes/boost_multi/multi/test/array_fancyref.cpp index 22378ebeb4..132ccacb08 100644 --- a/external_codes/boost_multi/multi/test/array_fancyref.cpp +++ b/external_codes/boost_multi/multi/test/array_fancyref.cpp @@ -1,58 +1,93 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi constructors" -#include - -#include "multi/array.hpp" +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4285) // Recursive return type for fancy_ptr if infix notationis applied +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif namespace fancy { template class ref; template class ptr { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - static double value; + static double const value; public: - using difference_type = std::ptrdiff_t; - using value_type = std::decay_t; - using pointer = T*; - using reference = ref; + using difference_type = std::ptrdiff_t; + using value_type = std::decay_t; + using pointer = T*; + using reference = ref; using iterator_category = std::random_access_iterator_tag; - ptr() = default;//ptr(ptr const=default; ptr& operator=(ptr const&)=default; - explicit ptr(std::nullptr_t) {} - template constexpr explicit ptr(ptr const& /*other*/) {} - constexpr ptr(ptr const& /*other*/) {} // NOLINT(hicpp-use-equals-default,modernize-use-equals-default) + ptr() noexcept = default; + explicit ptr(std::nullptr_t) noexcept {} + template + constexpr explicit ptr(ptr const& /*other*/) noexcept {} + // template(std::declval()))> + // constexpr explicit ptr(ptr const& /*other*/) noexcept {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) + constexpr ptr(ptr const& /*other*/) = default; // vvv it is important that these two functions are device or device host functions // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - constexpr auto operator*() const -> reference {return reference{};} + constexpr auto operator*() const noexcept -> reference { return reference{}; } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - constexpr auto operator+(difference_type /*unused*/) const -> ptr {return *this;} + constexpr auto operator+(difference_type /*unused*/) const noexcept -> ptr { return *this; } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - auto operator+=(difference_type /*difference*/) -> ptr& {return *this;} + auto operator+=(difference_type /*difference*/) noexcept -> ptr& { return *this; } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - auto operator++() -> ptr& {return operator+=(1);} + auto operator++() noexcept -> ptr& { return operator+=(1); } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - friend auto operator-(ptr const& /*a*/, ptr const& /*b*/) -> difference_type {return 0;} + friend auto operator-(ptr const& /*a*/, ptr const& /*b*/) noexcept -> difference_type { return 0; } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - auto operator==(ptr const& /*other*/) const -> bool {return true;} + auto operator==(ptr const& /*other*/) const noexcept -> bool { return true; } // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - auto operator!=(ptr const& /*other*/) const -> bool {return false;} -// explicit operator T*() const{return &value;} + auto operator!=(ptr const& /*other*/) const noexcept -> bool { return false; } + // explicit operator T*() const{return &value;} // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): this class simulates pointer - auto operator->() const -> ptr const& {return *this;} + auto operator->() const noexcept -> ptr const& { return *this; } // NOLINTNEXTLINE(fuchsia-trailing-return): this class simulates pointer - friend auto to_address(ptr const& pointer) -> ptr {return pointer;} - explicit operator bool() {return false;} -// operator double*() const{return &value;} - friend auto get_allocator(ptr const& /*self*/){return std::allocator{};} + // friend auto to_address(ptr const& pointer) -> ptr {return pointer;} + explicit operator bool() const noexcept { return false; } + // operator double*() const{return &value;} + friend auto get_allocator(ptr const& /*self*/) noexcept { return std::allocator{}; } }; -template<> double ptr::value = 42.; -template<> double ptr::value = 42.; +template<> double const ptr::value = 42.0; +template<> double const ptr::value = 42.0; template class ref { friend class ptr; @@ -60,56 +95,61 @@ template class ref { ref() = default; public: -// explicit ref(ref> const& other) : p_{other.p_} {} - ~ref() = default; - auto operator=(ref const& other) -> ref& = delete;//{ -// if(this == &other) {return *this;} -// *p_ = *other.p_; return *this; -// } -// ref(ref const&) = delete; - constexpr ref(ref const& /*other*/) = delete; - constexpr ref(ref&& /*other*/) noexcept {} // this is needed by nvcc, needs to be a device function for nvcc 11.2 and lower - - auto operator=(ref && other) noexcept -> ref& = delete;// {*p_ = std::move(*other.p_); return *this;} - constexpr operator T const&() const& {return ptr::value;} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + // explicit ref(ref> const& other) : p_{other.p_} {} + // ~ref() = default; + // auto operator=(ref const& other) -> ref& = delete; + // constexpr ref(ref const& /*other*/) = delete; + // constexpr ref(ref&& /*other*/) noexcept {} // this is needed by nvcc, needs to be a device function for nvcc 11.2 and lower + + // auto operator=(ref && other) noexcept -> ref& = delete; // {*p_ = std::move(*other.p_); return *this;} + + constexpr operator T const&() const& { return ptr::value; } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) // NOLINTNEXTLINE(fuchsia-overloaded-operator): this class simulates a reference - auto operator==(ref const& /*other*/) const {return true;} + auto operator==(ref const& /*other*/) const { return true; } // NOLINTNEXTLINE(fuchsia-overloaded-operator): this class simulates a reference - auto operator!=(ref const& /*other*/) const {return false;} + auto operator!=(ref const& /*other*/) const { return false; } using decay_t = std::decay_t; }; template struct allocator { - using pointer = ptr; + using pointer = ptr; using value_type = T; - auto allocate(std::size_t /*size*/) {return pointer{};} - void deallocate(pointer /*base*/, std::size_t /*size*/) {} -// std::true_type operator==(allocator const&){return {};} + auto allocate(std::size_t /*size*/) { return pointer{}; } + void deallocate(pointer /*base*/, std::size_t /*size*/) { + /*no-op;*/ + } + // std::true_type operator==(allocator const&){return {};} allocator() = default; - template explicit allocator(allocator const& /*other*/) {} + template explicit allocator(allocator const& /*other*/) { + /*no-op;*/ + } template - void construct(pointer /*location*/, Args&&... /*args*/) {} - void destroy(pointer /*location*/) {} + void construct(pointer /*location*/, Args const&... /*args*/) { + /*no-op;*/ + } + void destroy(pointer /*location*/) { + /*no-op;*/ + } }; // all these are optional, depending on the level of specialization needed template auto copy_n(Ptr /*first*/, Size /*count*/, ptr result) { -// std::cerr<< "called Pointer-based copy_n(Ptr, n, fancy::ptr)" < auto copy_n(ptr /*first*/, Size /*count*/, Ptr result) { -// std::cerr<< "called Pointer-based copy_n(fancy::ptr, n, Ptr)" < auto copy_n(ptr /*first*/, Size /*count*/, ptr result) { -// std::cerr<< "called Pointer-based copy_n(fancy::ptr, n, fancy::ptr)" < auto copy(It first, It last, fancy::ptr dest) { return copy(first, last, multi::array_iterator>{dest}); -// std::cerr << "1D copy(it1D, it1D, it1D) with strides " << stride(first) << " " << stride(dest) << std::endl; -// return dest; + // std::cerr << "1D copy(it1D, it1D, it1D) with strides " << stride(first) << " " << stride(dest) << std::endl; + // return dest; } template // custom copy 1D (aka strided copy) -auto copy(It/*first*/, It/*last*/, multi::array_iterator> dest) { -// std::cerr << "1D copy(it1D, it1D, it1D) with strides " << stride(first) << " " << stride(dest) << std::endl; +auto copy(It /*first*/, It /*last*/, multi::array_iterator> dest) { + // std::cerr << "1D copy(it1D, it1D, it1D) with strides " << stride(first) << " " << stride(dest) << std::endl; return dest; } -template // custom copy 2D (aka double strided copy) -auto copy(It/*first*/, It/*last*/, multi::array_iterator> dest) { -// std::cerr<<"2D copy(It, It, it2D) with strides 1"<< first.stride() <<" "<< dest.stride() < // custom copy 2D (aka double strided copy) +auto copy(It /*first*/, It /*last*/, multi::array_iterator> dest) { + // std::cerr<<"2D copy(It, It, it2D) with strides 1"<< first.stride() <<" "<< dest.stride() < // custom copy 2D (aka double strided copy) -//auto uninitialized_copy(Alloc&, It first, It last, multi::array_iterator> const& dest){ -// std::cerr << "2D uninitialized_copy(...) calls raw copy 2D" << std::endl; -// return copy(first, last, dest); -//} +// template // custom copy 2D (aka double strided copy) +// auto uninitialized_copy(Alloc&, It first, It last, multi::array_iterator> const& dest){ +// std::cerr << "2D uninitialized_copy(...) calls raw copy 2D" << std::endl; +// return copy(first, last, dest); +// } -} // namespace boost::multi +} // end namespace boost::multi //////////////////////////////////////////////////////////////////////////////// // user code @@ -155,4 +195,7 @@ BOOST_AUTO_TEST_CASE(multi_fancy) { multi::array> arr({5, 5}); BOOST_REQUIRE( arr.size() == 5 ); BOOST_REQUIRE( arr[1][1] == arr[2][2] ); + + multi::array> const arr2({0, 0}); + BOOST_REQUIRE( arr2.size() == 0 ); } diff --git a/external_codes/boost_multi/multi/test/array_legacy_c.cpp b/external_codes/boost_multi/multi/test/array_legacy_c.cpp index 95d83deea9..8833c172df 100644 --- a/external_codes/boost_multi/multi/test/array_legacy_c.cpp +++ b/external_codes/boost_multi/multi/test/array_legacy_c.cpp @@ -1,13 +1,42 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi legacy adaptor example" -#include - -#include "multi/array.hpp" - -#include -#include +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif namespace multi = boost::multi; @@ -17,26 +46,26 @@ namespace fake { using fftw_complex = double[2]; void fftw_plan_dft( - int rank, const int* n, + int rank, int const* n, fftw_complex* in, fftw_complex* out, int sign, unsigned flags ); void fftw_plan_dft( - int rank, const int* n, + int rank, int const* n, fftw_complex* in, fftw_complex* out, int sign, unsigned flags ) { (void)rank, (void)n, (void)in, (void)out, (void)sign, (void)flags; } -} // end namespace fake +} // end namespace fake BOOST_AUTO_TEST_CASE(array_legacy_c) { - using complex = std::complex; + using complex = std::complex; multi::array const in = { - {150., 16., 17., 18., 19.}, - { 5., 5., 5., 5., 5.}, - {100., 11., 12., 13., 14.}, - { 50., 6., 7., 8., 9.} + {{150.0, 0.0}, {16.0, 0.0}, {17.0, 0.0}, {18.0, 0.0}, {19.0, 0.0}}, + { {5.0, 0.0}, {5.0, 0.0}, {5.0, 0.0}, {5.0, 0.0}, {5.0, 0.0}}, + {{100.0, 0.0}, {11.0, 0.0}, {12.0, 0.0}, {13.0, 0.0}, {14.0, 0.0}}, + { {50.0, 0.0}, {6.0, 0.0}, {7.0, 0.0}, {8.0, 0.0}, {9.0, 0.0}}, }; multi::array, 2> out(extensions(in)); @@ -44,53 +73,47 @@ BOOST_AUTO_TEST_CASE(array_legacy_c) { BOOST_REQUIRE( dimensionality(out) == dimensionality(in) ); BOOST_REQUIRE( sizes(out) == sizes(in) ); - static_assert( sizeof(complex) == sizeof(fake::fftw_complex), "!" ); + static_assert(sizeof(complex) == sizeof(fake::fftw_complex), "!"); fake::fftw_plan_dft( - dimensionality(in), - std::apply([](auto... sizes) {return std::array{{static_cast(sizes)...}};}, in.sizes()).data(), - reinterpret_cast(const_cast(in .data_elements())), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast, cppcoreguidelines-pro-type-const-cast): testing legacy code - reinterpret_cast( out.data_elements() ), // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast): testing legacy code + decltype(in)::dimensionality, + std::apply([](auto... sizes) { return std::array{{static_cast(sizes)...}}; }, in.sizes()).data(), + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-pro-type-const-cast) testing legacy code + reinterpret_cast(const_cast(in.data_elements())), // NOSONAR + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast): testing legacy code + reinterpret_cast(out.data_elements()), 1, 0 ); -//struct basic : multi::layout_t<2>{ -// double* p = {}; -// basic() = default; -//}; - -//struct ref : basic{ -//}; - - { multi::array d2D = { - {150, 16, 17, 18, 19}, - { 30, 1, 2, 3, 4}, - {100, 11, 12, 13, 14}, - { 50, 6, 7, 8, 9} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 30.0, 1.0, 2.0, 3.0, 4.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; -// #if __has_cpp_attribute(no_unique_address) >=201803L and not defined(__NVCC__) and not defined(__PGI) -// BOOST_REQUIRE( sizeof(d2D)==sizeof(double*)+7*sizeof(std::size_t) ); -// #endif + // #if __has_cpp_attribute(no_unique_address) >=201803L and not defined(__NVCC__) and not defined(__PGI) + // BOOST_REQUIRE( sizeof(d2D)==sizeof(double*)+7*sizeof(std::size_t) ); + // #endif BOOST_REQUIRE( d2D.is_compact() ); BOOST_REQUIRE( rotated(d2D).is_compact() ); BOOST_REQUIRE( d2D[3].is_compact() ); - BOOST_REQUIRE( not rotated(d2D)[2].is_compact() ); + BOOST_REQUIRE( ! rotated(d2D)[2].is_compact() ); } { multi::array d2D({5, 3}); BOOST_REQUIRE( d2D.is_compact() ); BOOST_REQUIRE( rotated(d2D).is_compact() ); BOOST_REQUIRE( d2D[3].is_compact() ); - BOOST_REQUIRE( not rotated(d2D)[2].is_compact() ); + BOOST_REQUIRE( ! rotated(d2D)[2].is_compact() ); } } -inline constexpr auto f2(multi::array_ref&& array) -> double& {return array[2];} +#ifndef _MSC_VER // TODO(correaa) not supported by MSVC 14.3 in c++17 mode +constexpr auto f2(multi::array_ref&& array) -> double& { return std::move(array)[2]; } BOOST_AUTO_TEST_CASE(array_legacy_c_2) { - double arr[5] = {150, 16, 17, 18, 19}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + double arr[5] = {150.0, 16.0, 17.0, 18.0, 19.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) BOOST_REQUIRE( &f2(arr) == &arr[2] ); } - +#endif diff --git a/external_codes/boost_multi/multi/test/array_ptr.cpp b/external_codes/boost_multi/multi/test/array_ptr.cpp index 837b490ad5..c0ebf7348b 100644 --- a/external_codes/boost_multi/multi/test/array_ptr.cpp +++ b/external_codes/boost_multi/multi/test/array_ptr.cpp @@ -1,50 +1,94 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// © Alfredo A. Correa 2019-2022 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi array pointer" -#include - -#include "multi/array.hpp" +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; // NOLINTNEXTLINE(fuchsia-trailing-return): trailing return helps readability -template auto fwd_array(T&& array) -> T&& {return std::forward(array);} +template auto fwd_array(T&& array) -> T&& { return std::forward(array); } BOOST_AUTO_TEST_CASE(multi_array_ptr_equality) { multi::array arr = { - {1., 2., 3.}, - {4., 5., 6.}, - {7., 8., 9.}, - {1., 2., 3.} + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {1.0, 2.0, 3.0}, }; BOOST_REQUIRE( arr[2] == arr[2] ); BOOST_REQUIRE( &arr[2] == &arr[2] ); + BOOST_REQUIRE( &arr[2] != &(arr[2]({0, 2})) ); + BOOST_REQUIRE( !( &arr[2] == &std::as_const(arr)[2]({0, 2})) ); BOOST_REQUIRE( &arr[2] == &fwd_array(arr[2]) ); BOOST_REQUIRE( &fwd_array(arr[2]) == &arr[2] ); -// auto const& A2 = fwd_array(A[2]); + auto arr_ptr = &arr[2]; + BOOST_REQUIRE( arr_ptr == arr_ptr ); + + auto& arr_ptr_ref = arr_ptr; + arr_ptr = arr_ptr_ref; + arr_ptr = std::move(arr_ptr_ref); + + auto arr_ptr2 = &std::as_const(arr)[2]; + BOOST_REQUIRE( arr_ptr == arr_ptr2 ); + BOOST_REQUIRE( arr_ptr2 == arr_ptr ); + BOOST_REQUIRE( !(arr_ptr != arr_ptr) ); + + auto& arr_ptr2_ref = arr_ptr2; + arr_ptr2 = arr_ptr2_ref; + arr_ptr2_ref = arr_ptr2; + auto const& carr2 = arr[2]; BOOST_REQUIRE( carr2[0] == arr[2][0] ); BOOST_REQUIRE( carr2.base() == arr[2].base() ); BOOST_REQUIRE( &carr2 == &std::as_const(arr)[2] ); BOOST_REQUIRE( &carr2 == & arr [2] ); - auto const& ac2 = carr2; //fwd_array(A[2]); + auto const& ac2 = carr2; // fwd_array(A[2]); BOOST_REQUIRE( &ac2 == &std::as_const(arr)[2] ); + BOOST_REQUIRE( &std::as_const(arr)[2] == &ac2 ); BOOST_REQUIRE( &ac2 == & arr [2] ); } BOOST_AUTO_TEST_CASE(multi_array_ptr) { { - std::array, 4> arr {{ - {{ 0., 1., 2., 3., 4.}}, - {{ 5., 6., 7., 8., 9.}}, - {{10., 11., 12., 13., 14.}}, - {{15., 16., 17., 18., 19.}} - }}; + // clang-format off + std::array, 4> arr{ + {{{0.0, 1.0, 2.0, 3.0, 4.0}}, + {{5.0, 6.0, 7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0, 13.0, 14.0}}, + {{15.0, 16.0, 17.0, 18.0, 19.0}}}, + }; + // clang-format on - multi::array_ptr arrP{&arr}; + multi::array_ptr const arrP{&arr}; BOOST_REQUIRE( arrP->extensions() == multi::extensions(arr) ); BOOST_REQUIRE( extensions(*arrP) == multi::extensions(arr) ); @@ -53,12 +97,14 @@ BOOST_AUTO_TEST_CASE(multi_array_ptr) { BOOST_REQUIRE( extensions(*arrP) == extensions(arr) ); BOOST_REQUIRE( &arrP->operator[](1)[1] == &arr[1][1] ); - multi::array_ptr arrP2{&arr}; - BOOST_REQUIRE( arrP == arrP2 ); BOOST_REQUIRE( not (arrP != arrP2) ); + multi::array_ptr const arrP2{&arr}; + BOOST_REQUIRE( arrP == arrP2 ); + BOOST_REQUIRE( ! (arrP != arrP2) ); std::array, 4> arr2{}; - multi::array_ptr arr2P{&arr2}; - BOOST_REQUIRE( arr2P != arrP ); BOOST_REQUIRE( not (arr2P == arrP) ); + multi::array_ptr arr2P{&arr2}; + BOOST_REQUIRE( arr2P != arrP ); + BOOST_REQUIRE( ! (arr2P == arrP) ); arr2P = arrP; BOOST_REQUIRE( arrP == arr2P ); @@ -72,16 +118,16 @@ BOOST_AUTO_TEST_CASE(multi_array_ptr) { BOOST_REQUIRE( size(arrR) == arrP->size() ); } { - std::array, 4> arr = {{ - std::array{{ 0., 1., 2., 3., 4.}}, - std::array{{ 5., 6., 7., 8., 9.}}, - std::array{{10., 11., 12., 13., 14.}}, - std::array{{15., 16., 17., 18., 19.}} - }}; + std::array, 4> arr = { + {std::array{{0.0, 1.0, 2.0, 3.0, 4.0}}, + std::array{{5.0, 6.0, 7.0, 8.0, 9.0}}, + std::array{{10.0, 11.0, 12.0, 13.0, 14.0}}, + std::array{{15.0, 16.0, 17.0, 18.0, 19.0}}}, + }; std::vector> ptrs; ptrs.emplace_back(&arr[0][0], 5); // NOLINT(readability-container-data-pointer) test access - ptrs.emplace_back(&arr[2][0], 5); // NOLINT(readability-container-data-pointer) test access + ptrs.emplace_back(arr[2].data(), 5); ptrs.emplace_back(&arr[3][0], 5); // NOLINT(readability-container-data-pointer) test access BOOST_REQUIRE( &(*ptrs[2])[4] == &arr[3][4] ); @@ -89,34 +135,85 @@ BOOST_AUTO_TEST_CASE(multi_array_ptr) { BOOST_REQUIRE( ptrs[2]->operator[](4) == 19 ); } { - std::vector v1(100, 3.); - std::vector const v2(100, 4.); - multi::array_ptr v1P2D(v1.data(), {10, 10}); - multi::array_cptr v2P2D(v2.data(), {10, 10}); + std::vector v1(100, 3.0); // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls) + std::vector const v2(100, 4.0); // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls) + multi::array_ptr const v1P2D(v1.data(), {10, 10}); + multi::array_cptr const v2P2D(v2.data(), {10, 10}); *v1P2D = *v2P2D; v1P2D->operator=(*v2P2D); - BOOST_REQUIRE( v1[8] == 4. ); + BOOST_REQUIRE( v1[8] == 4.0 ); } } BOOST_AUTO_TEST_CASE(span_like) { - std::vector vec = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}; + std::vector vec = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls) using my_span = multi::array_ref; - auto aP = & my_span{vec.data() + 2,{5}}; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + auto aP = &my_span{vec.data() + 2, {5}}; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) BOOST_REQUIRE( aP->size() == 5 ); - BOOST_REQUIRE( (*aP)[0] == 2. ); + BOOST_REQUIRE( (*aP)[0] == 2.0 ); auto const& aCRef = *aP; BOOST_REQUIRE( aCRef.size() == 5 ); BOOST_REQUIRE( &aCRef[0] == &vec[2] ); - BOOST_REQUIRE( aCRef[0] == 2. ); + BOOST_REQUIRE( aCRef[0] == 2.0 ); auto&& aRef = *aP; - aRef[0] = 99.; - BOOST_REQUIRE( vec[2] == 99. ); + aRef[0] = 99.0; + BOOST_REQUIRE( vec[2] == 99.0 ); +} + +BOOST_AUTO_TEST_CASE(multi_array_ptr_assignment) { + multi::array arr = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {1.0, 2.0, 3.0}, + }; + { + auto rowP = &arr[2]; + + rowP = *std::addressof(rowP); + + auto rowP2 = rowP; + rowP2 = rowP; // self assigment + + BOOST_REQUIRE( rowP == rowP2 ); + BOOST_REQUIRE( ! (rowP != rowP2) ); + + auto rowP0 = &arr[0]; + + BOOST_REQUIRE( rowP0 != rowP2 ); + BOOST_REQUIRE( ! (rowP0 == rowP2) ); + + rowP2 = decltype(rowP2){nullptr}; + BOOST_REQUIRE( ! rowP2 ); + + auto rowP3 = std::exchange(rowP, nullptr); + BOOST_REQUIRE( rowP3 == &arr[2] ); + BOOST_REQUIRE( rowP == nullptr ); + BOOST_REQUIRE( ! rowP ); + } + { + auto rowP = &arr(); + + rowP = *std::addressof(rowP); + + decltype(rowP) rowP2; + rowP2 = rowP; + + BOOST_REQUIRE( rowP == rowP2 ); + + rowP2 = decltype(rowP2){nullptr}; + BOOST_REQUIRE( ! rowP2 ); + + auto rowP3 = std::exchange(rowP, nullptr); + BOOST_REQUIRE( rowP3 == &arr() ); + BOOST_REQUIRE( rowP == nullptr ); + BOOST_REQUIRE( ! rowP ); + } } diff --git a/external_codes/boost_multi/multi/test/array_ref.cpp b/external_codes/boost_multi/multi/test/array_ref.cpp index c4b6b9d748..8b476544ba 100644 --- a/external_codes/boost_multi/multi/test/array_ref.cpp +++ b/external_codes/boost_multi/multi/test/array_ref.cpp @@ -1,47 +1,147 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi array reference" -#include - -#include "multi/array.hpp" - -#include // for std::iota +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include // for std::cout +#include // for std::iota + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(array_ref_from_carray) { // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test - double arr[4][5] { - { 0., 1., 2., 3., 4.}, - { 5., 6., 7., 8., 9.}, - {10., 11., 12., 13., 14.}, - {15., 16., 17., 18., 19.} + double arr[4][5] = { + { 0.0, 1.0, 2.0, 3.0, 4.0}, + { 5.0, 6.0, 7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0, 13.0, 14.0}, + {15.0, 16.0, 17.0, 18.0, 19.0}, }; - multi::array_ptr map{&arr}; + multi::array_ptr const map{&arr}; BOOST_REQUIRE( &map->operator[](1)[1] == &arr[1][1] ); - BOOST_REQUIRE( (*&arr)[1][1] == 6. ); + BOOST_REQUIRE( (*&arr)[1][1] == 6.0 ); multi::array_ref&& mar = *map; BOOST_REQUIRE( &mar[1][1] == &arr[1][1] ); - mar[1][1] = 9.; + mar[1][1] = 9.0; BOOST_REQUIRE( &mar[1][1] == &arr[1][1] ); auto const& a_const = arr; -// double const(&a_const)[4][5] = a; - BOOST_REQUIRE( &a_const[1][1] == &arr[1][1] ); + // double const(&a_const)[4][5] = a; + BOOST_REQUIRE(&a_const[1][1] == &arr[1][1]); - static_assert( decltype(mar(2, {1, 3}))::rank_v == 1 , "!"); + static_assert(decltype(mar(2, {1, 3}))::rank_v == 1); BOOST_REQUIRE( size(mar(2, {1, 3})) == 2 ); BOOST_REQUIRE( &mar(2, {1, 3})[1] == &arr[2][2] ); + + [[maybe_unused]] multi::array_ref const& cmar = *map; + // *(cmar.base()) = 99.0; + // *(cmar[0].base()) = 88.0; + // *(cmar.data_elements()) = 99.0; +} + +BOOST_AUTO_TEST_CASE(array_ref_test_ub) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test + double arr[4][4] = { + { 0.0, 1.0, 2.0, 3.0}, + { 5.0, 6.0, 7.0, 8.0}, + {10.0, 11.0, 12.0, 13.0}, + {15.0, 16.0, 17.0, 18.0}, + }; + + multi::array_ref const map{arr}; // multi::array_ref const map(&arr[0][0], {4, 4}); + + auto const& diag = map.diagonal(); + + BOOST_REQUIRE( diag.begin() != diag.end() ); + // -Werror=array-bounds + // BOOST_REQUIRE( std::accumulate(diag.begin(), diag.end(), 0.0) == 0.0 + 6.0 + 12.0 + 18.0 ); +} + +BOOST_AUTO_TEST_CASE(array_ref_test_no_ub) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test + double arr[5][4] = { + { 0.0, 1.0, 2.0, 3.0}, + { 5.0, 6.0, 7.0, 8.0}, + {10.0, 11.0, 12.0, 13.0}, + {15.0, 16.0, 17.0, 18.0}, + }; + + multi::array_ref const map(&arr[0][0], {4, 4}); + // multi::array_ref const map{reinterpret_cast(arr)}; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + auto const& diag = map.diagonal(); + BOOST_REQUIRE( diag.begin() != diag.end() ); + BOOST_REQUIRE( std::accumulate(diag.begin(), diag.end(), 0.0) == 0.0 + 6.0 + 12.0 + 18.0 ); +} + +BOOST_AUTO_TEST_CASE(array_ref_test_no_ub2) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test + double arr[][4] = { + {}, + {0.0, 1.0, 2.0, 3.0}, + {5.0, 6.0, 7.0, 8.0}, + {10.0, 11.0, 12.0, 13.0}, + {15.0, 16.0, 17.0, 18.0}, + {}, + }; + + multi::array_ref const map(&arr[1][0], {4, 4}); + // multi::array_ref const map{reinterpret_cast(arr)}; // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + auto const& diag = map.diagonal(); + BOOST_REQUIRE( diag.begin() != diag.end() ); + BOOST_REQUIRE( std::accumulate(diag.begin(), diag.end(), 0.0) == 0.0 + 6.0 + 12.0 + 18.0 ); +} + +BOOST_AUTO_TEST_CASE(array_ref_test_allocated_ub_unique_ptr) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) for illustration + std::unique_ptr const arrp(new double const [4UL * 4UL] { 0.0, 1.0, 2.0, 3.0, 5.0, 6.0, 7.0, 8.0, 10.0, 11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 18.0 }); + + BOOST_REQUIRE( arrp[3] == 3.0 ); + { + multi::array_ref const map(arrp.get(), {4, 4}); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + + auto const& diag = map.diagonal(); + + BOOST_REQUIRE( diag.begin() != diag.end() ); + BOOST_REQUIRE( std::accumulate(diag.begin(), diag.end(), 0.0) == 0.0 + 6.0 + 12.0 + 18.0 ); // is this UB? + } } BOOST_AUTO_TEST_CASE(array_ref_1D_reindexed) { - std::array stdarr{ {"a", "b", "c", "d", "e"} }; + using namespace std::string_literals; // NOLINT(build/namespaces) for literal "string"s + + // clang-format off + std::array stdarr = {{"a"s, "b"s, "c"s, "d"s, "e"s}}; + // clang-format on multi::array_ref mar = *multi::array_ptr(&stdarr); @@ -56,7 +156,10 @@ BOOST_AUTO_TEST_CASE(array_ref_1D_reindexed) { BOOST_REQUIRE( &mar.stenciled({2, 4})[idx] == &mar[idx] ); } - multi::array arr({{2, 7}}, std::string{"xx"}); + // clang-format off + multi::array arr({{2, 7}}, std::string{"xx"}); // NOLINT(fuchsia-default-arguments-calls) std::string + // clang-format on + BOOST_REQUIRE( size(arr) == 5 ); BOOST_REQUIRE( extension(arr) == multi::iextension(2, 7) ); arr[2] = "a"; @@ -66,19 +169,19 @@ BOOST_AUTO_TEST_CASE(array_ref_1D_reindexed) { arr[6] = "e"; BOOST_REQUIRE( std::equal(arr.begin(), arr.end(), mar.begin(), mar.end()) ); - auto arrB = multi::array({"a", "b", "c", "d", "e"}).reindex(2); + auto arrB = multi::array({"a", "b", "c", "d", "e"}).reindex(2); // std::string NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( size(arrB) == 5 ); BOOST_REQUIRE( arrB[2] == "a" ); BOOST_REQUIRE( arrB[6] == "e" ); } BOOST_AUTO_TEST_CASE(array_ref_of_nested_std_array_reindexed) { - std::array, 4> arr = {{ - {{ 0., 1., 2., 3., 4.}}, - {{ 5., 6., 7., 8., 9.}}, - {{10., 11., 12., 13., 14.}}, - {{15., 16., 17., 18., 19.}} - }}; + std::array, 4> arr = { + {{{0.0, 1.0, 2.0, 3.0, 4.0}}, + {{5.0, 6.0, 7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0, 13.0, 14.0}}, + {{15.0, 16.0, 17.0, 18.0, 19.0}}}, + }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test type multi::array_ref mar = *multi::array_ptr(&arr); @@ -86,11 +189,12 @@ BOOST_AUTO_TEST_CASE(array_ref_of_nested_std_array_reindexed) { } BOOST_AUTO_TEST_CASE(array_ref_reindexed) { - double (&&arr)[4][5] = { // NOLINT(hicpp-avoid-c-arrays, modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays): test - { 0, 1, 2, 3, 4}, - { 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19} + // NOLINTNEXTLINE(hicpp-avoid-c-arrays, modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays): test + double arr[4][5] = { + { 0.0, 1.0, 2.0, 3.0, 4.0}, + { 5.0, 6.0, 7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0, 13.0, 14.0}, + {15.0, 16.0, 17.0, 18.0, 19.0}, }; // NOLINTNEXTLINE(hicpp-avoid-c-arrays, modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays): special type @@ -105,13 +209,13 @@ BOOST_AUTO_TEST_CASE(array_ref_reindexed) { BOOST_REQUIRE( &mar.reindexed(1)[1][0] == &mar[0][0] ); BOOST_REQUIRE( sizes(mar[0].reindexed(1)) == sizes(mar[0]) ); - BOOST_REQUIRE( mar[0].reindexed(1).extension().start () == mar[0].extension().start () + 1 ); - BOOST_REQUIRE( mar[0].reindexed(1).extension().finish() == mar[0].extension().finish() + 1 ); + BOOST_REQUIRE( mar[0].reindexed(1).extension().first() == mar[0].extension().first () + 1 ); + BOOST_REQUIRE( mar[0].reindexed(1).extension().last() == mar[0].extension().last() + 1 ); auto diff = &mar[0].reindexed(1)[1] - &mar[0][0]; BOOST_REQUIRE( diff == 0 ); -// BOOST_REQUIRE( &(((mar<<1).reindexed(2)>>1).reindexed(1))[1][2] == &mar[0][0] ); + // BOOST_REQUIRE( &(((mar<<1).reindexed(2)>>1).reindexed(1))[1][2] == &mar[0][0] ); BOOST_REQUIRE( &mar.reindexed(1, 2)[1][2] == &mar[0][0] ); BOOST_REQUIRE( &mar.reindexed(1)({1, 5})[1][0] == &mar[0][0] ); @@ -120,131 +224,226 @@ BOOST_AUTO_TEST_CASE(array_ref_reindexed) { BOOST_REQUIRE( &mar.stenciled({2, 4})[2][0] == &mar[2][0] ); BOOST_REQUIRE( &mar.stenciled({2, 4}, {1, 3})[2][1] == &mar[2][1] ); -// BOOST_REQUIRE( &mar[0][0] == mar.origin() ); // origin changed meaning in on 2020/Dec/16 -// BOOST_REQUIRE( mar.base() == mar.origin() ); + // BOOST_REQUIRE( &mar[0][0] == mar.origin() ); // origin changed meaning in on 2020/Dec/16 + // BOOST_REQUIRE( mar.base() == mar.origin() ); -// BOOST_REQUIRE( mar.stenciled({2, 4}).origin() == mar.origin() ); // origin changed meaning in on 2020/Dec/16 + // BOOST_REQUIRE( mar.stenciled({2, 4}).origin() == mar.origin() ); // origin changed meaning in on 2020/Dec/16 BOOST_REQUIRE( mar.stenciled({2, 4}).base() != mar.base() ); BOOST_REQUIRE( &mar.stenciled({2, 4})[2][0] == mar.stenciled({2, 4}).base() ); { + // NOLINTBEGIN(fuchsia-default-arguments-calls) std::string ctor multi::array arrB = { {"a", "b", "c", "d", "e"}, {"f", "g", "h", "f", "g"}, - {"h", "i", "j", "k", "l"} + {"h", "i", "j", "k", "l"}, }; + // NOLINTEND(fuchsia-default-arguments-calls) std::string ctor arrB.reindex(2); BOOST_REQUIRE( size(arrB) == 3 ); BOOST_REQUIRE( arrB[2][0] == "a" ); } { + // NOLINTBEGIN(fuchsia-default-arguments-calls) std::string ctor multi::array arrB = { {"a", "b", "c", "d", "e"}, {"f", "g", "h", "f", "g"}, - {"h", "i", "j", "k", "l"} + {"h", "i", "j", "k", "l"}, }; + // NOLINTEND(fuchsia-default-arguments-calls) std::string ctor arrB.reindex(2, 1); BOOST_REQUIRE( size(arrB) == 3 ); BOOST_REQUIRE( arrB[2][1] == "a" ); } { - multi::array arrB = (multi::array - {{"a", "b", "c", "d", "e"}, - {"f", "g", "h", "f", "g"}, - {"h", "i", "j", "k", "l"}})//.reindex(2, 1); - ; + using namespace std::string_literals; // NOLINT(build/namespaces) for literal "string"s + multi::array arrB = (multi::array{ + {"a"s, "b"s, "c"s, "d"s, "e"s}, + {"f"s, "g"s, "h"s, "f"s, "g"s}, + {"h"s, "i"s, "j"s, "k"s, "l"s}, + }); // .reindex(2, 1); // std::string NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arrB.reindex(2).extension() == multi::iextension(2, 5) ); auto exts = arrB.reindexed(2).extensions(); - multi::array arrC(exts); + multi::array const arrC(exts); BOOST_REQUIRE( size(arrC) == 3 ); BOOST_REQUIRE( size(arrC) == size(arrB) ); - BOOST_REQUIRE( arrC.extension().start() == 2 ); - BOOST_REQUIRE( arrC.extension().finish() == 5 ); + BOOST_REQUIRE( arrC.extension().first() == 2 ); + BOOST_REQUIRE( arrC.extension().last() == 5 ); } } BOOST_AUTO_TEST_CASE(array_ref_with_stencil) { - std::array, 4> arr = {{ - {{ 0., 1., 2., 3., 4.}}, - {{ 5., 6., 7., 8., 9.}}, - {{10., 11., 12., 13., 14.}}, - {{15., 16., 17., 18., 19.}} - }}; + std::array, 4> arr = { + {{{0.0, 1.0, 2.0, 3.0, 4.0}}, + {{5.0, 6.0, 7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0, 13.0, 14.0}}, + {{15.0, 16.0, 17.0, 18.0, 19.0}}}, + }; // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test type auto const& mar = *multi::array_ptr(&arr); + BOOST_REQUIRE( mar.size() == 4 ); // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test type multi::array ss = { - { 0., +1., 0.}, - {+1., -4., +1.}, - { 0., +1., 0.} + { 0.0, +1.0, 0.0}, + {+1.0, -4.0, +1.0}, + { 0.0, +1.0, 0.0}, }; auto const& stencil = ss.reindexed(-1, -1); + BOOST_REQUIRE( stencil.size() == 3 ); BOOST_REQUIRE( &stencil[-1][-1] == stencil.base() ); - - multi::array gy(extensions(mar), 0.); - - { - auto xs = extensions(mar); - for(auto eye = std::get<0>(xs).start() + 1; eye != std::get<0>(xs).finish()-1; ++eye) { - for(auto jay = std::get<1>(xs).start() + 1; jay != std::get<1>(xs).finish() - 1; ++jay) { - auto xt = extensions(stencil); - for(auto kay : std::get<0>(xt)) { - for(auto ell : std::get<1>(xt)) { - gy[eye][jay] += stencil[kay][ell]*mar[eye + kay][jay + ell]; - } - } - } - } - } } BOOST_AUTO_TEST_CASE(array_ref_1D_from_vector) { - std::vector vec = {1, 2, 3}; + std::vector vec = {1.0, 2.0, 3.0}; // std::vector NOLINT(fuchsia-default-arguments-calls) + // clang-format off multi::array_ref aref({{1, 3}}, vec.data()); + // clang-format on BOOST_REQUIRE( aref.extension() == multi::iextension(1, 3) ); BOOST_REQUIRE( &aref[1] == vec.data() ); } BOOST_AUTO_TEST_CASE(array_ref_2D_from_vector) { - std::vector vec = {1, 2, 3, 4, 5, 6}; + std::vector vec = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; // std::string NOLINT(fuchsia-default-arguments-calls) + multi::array_ref aref({2, 3}, vec.data()); + BOOST_REQUIRE( &aref[1][0] == &vec[3] ); } BOOST_AUTO_TEST_CASE(array_ref_2D_from_vector_with_offset) { - std::vector vec = { - 1, 2, 3, - 4, 5, 6 - }; + // NOLINTNEXTLINE(fuchsia-default-arguments-calls) + std::vector vec = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + multi::array_ref aref({multi::iextension(1, 3), multi::iextension(1, 4)}, vec.data()); - auto exts = aref.extensions(); - BOOST_REQUIRE( std::get<0>(exts) == multi::iextension(1, 3) ); - BOOST_REQUIRE( std::get<1>(exts).start() == 1 ); - BOOST_REQUIRE( std::get<1>(exts).finish() == 4 ); - BOOST_REQUIRE( std::get<1>(exts) == multi::iextension(1, 4) ); - BOOST_REQUIRE( exts == decltype(exts)(multi::iextension(1, 3), multi::iextension(1, 4)) ); + { + auto exts = aref.extensions(); +#ifndef _MSC_VER + auto const [exts0, exts1] = exts; +#else + auto const exts0 = std::get<0>(exts); + auto const exts1 = std::get<1>(exts); +#endif + BOOST_REQUIRE( exts0 == multi::iextension(1, 3) ); + + BOOST_REQUIRE( exts1.first() == 1 ); + BOOST_REQUIRE( exts1.last () == 4 ); + + BOOST_REQUIRE( exts1 == multi::iextension(1, 4) ); + + BOOST_REQUIRE( exts == decltype(exts)(multi::iextension(1, 3), multi::iextension(1, 4)) ); + } + { + auto exts = aref.extensions(); + BOOST_REQUIRE( std::get<0>(exts) == multi::iextension(1, 3) ); + BOOST_REQUIRE( std::get<1>(exts).first() == 1 ); + BOOST_REQUIRE( std::get<1>(exts).last () == 4 ); + BOOST_REQUIRE( std::get<1>(exts) == multi::iextension(1, 4) ); + BOOST_REQUIRE( exts == decltype(exts)(multi::iextension(1, 3), multi::iextension(1, 4)) ); + } + { + auto const exts = aref.extensions(); + BOOST_REQUIRE( std::get<0>(exts) == multi::iextension(1, 3) ); + BOOST_REQUIRE( std::get<1>(exts).first() == 1 ); + BOOST_REQUIRE( std::get<1>(exts).last () == 4 ); + BOOST_REQUIRE( std::get<1>(exts) == multi::iextension(1, 4) ); + BOOST_REQUIRE( exts == decltype(exts)(multi::iextension(1, 3), multi::iextension(1, 4)) ); + } + { + auto const exts = aref.extensions(); + BOOST_REQUIRE( exts.get<0>() == multi::iextension(1, 3) ); + BOOST_REQUIRE( exts.get<1>().first() == 1 ); + BOOST_REQUIRE( exts.get<1>().last () == 4 ); + BOOST_REQUIRE( exts.get<1>() == multi::iextension(1, 4) ); + BOOST_REQUIRE(( exts == decltype(exts){multi::iextension(1, 3), multi::iextension(1, 4)} )); + } + { + auto const exts = aref.extensions(); + BOOST_REQUIRE( std::get<0>(exts) == multi::iextension(1, 3) ); + BOOST_REQUIRE( std::get<1>(exts).first() == 1 ); + BOOST_REQUIRE( std::get<1>(exts).last () == 4 ); + BOOST_REQUIRE( std::get<1>(exts) == multi::iextension(1, 4) ); + BOOST_REQUIRE( exts == decltype(exts)(multi::iextension(1, 3), multi::iextension(1, 4)) ); + } + { + BOOST_REQUIRE( std::get<0>(aref.extensions()) == multi::iextension(1, 3) ); + BOOST_REQUIRE( std::get<1>(aref.extensions()).first() == 1 ); + BOOST_REQUIRE( std::get<1>(aref.extensions()).last () == 4 ); + BOOST_REQUIRE( std::get<1>(aref.extensions()) == multi::iextension(1, 4) ); + BOOST_REQUIRE( aref.extensions() == decltype(aref.extensions())(multi::iextension(1, 3), multi::iextension(1, 4)) ); + } + { + auto ss = aref.sizes(); + BOOST_REQUIRE( std::get<0>(ss) == 2 ); + BOOST_REQUIRE( std::get<1>(ss) == 3 ); + BOOST_REQUIRE( ss == decltype(ss)(2, 3) ); + } + { + auto [nn, mm] = aref.sizes(); + BOOST_REQUIRE( nn == 2 ); + BOOST_REQUIRE( mm == 3 ); + } + { + auto const ss = aref.sizes(); + BOOST_REQUIRE( std::get<0>(ss) == 2 ); + BOOST_REQUIRE( std::get<1>(ss) == 3 ); + BOOST_REQUIRE( ss == decltype(ss)(2, 3) ); + } + { + BOOST_REQUIRE( std::get<0>(aref.sizes()) == 2 ); + BOOST_REQUIRE( std::get<1>(aref.sizes()) == 3 ); + BOOST_REQUIRE( aref.sizes() == decltype(aref.sizes())(2, 3) ); + } + { + auto const ss = aref.sizes(); + using std::get; + BOOST_REQUIRE( get<0>(ss) == 2 ); + BOOST_REQUIRE( get<1>(ss) == 3 ); + BOOST_REQUIRE( ss == decltype(ss)(2, 3) ); + } + { + using std::get; + BOOST_REQUIRE( get<0>(aref.sizes()) == 2 ); + BOOST_REQUIRE( get<1>(aref.sizes()) == 3 ); + BOOST_REQUIRE( aref.sizes() == decltype(aref.sizes())(2, 3) ); + } +#if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) // GCC: use of function template name with no prior declaration in function call with explicit template arguments is a C++20 extension + { + auto const ss = aref.sizes(); + BOOST_REQUIRE( get<0>(ss) == 2 ); + BOOST_REQUIRE( get<1>(ss) == 3 ); + BOOST_REQUIRE( ss == decltype(ss)(2, 3) ); + } + { + BOOST_REQUIRE( get<0>(aref.sizes()) == 2 ); + BOOST_REQUIRE( get<1>(aref.sizes()) == 3 ); + BOOST_REQUIRE( aref.sizes() == decltype(aref.sizes())(2, 3) ); + } +#endif BOOST_REQUIRE( &aref[1][1] == vec.data() ); } BOOST_AUTO_TEST_CASE(array_2D_with_offset) { - multi::array arr({multi::iextension(1, 3), multi::iextension(2, 5)}, 1.2); + multi::array const arr({multi::iextension(1, 3), multi::iextension(2, 5)}, 1.2); - BOOST_REQUIRE( arr.extension().start() == 1 ); - BOOST_REQUIRE( arr.extension().finish() == 3 ); + BOOST_REQUIRE( arr.extension().first() == 1 ); + BOOST_REQUIRE( arr.extension().last () == 3 ); } BOOST_AUTO_TEST_CASE(array_ref_1D) { + // clang-format off + // NOLINTNEXTLINE(fuchsia-default-arguments-calls) std::array arr = {{"a", "b", "c", "d", "e"}}; - + // clang-format on multi::array_ref&& mar = *multi::array_ptr{&arr}; -// multi::Array mar = *multi::Array(&a); + // multi::Array mar = *multi::Array(&a); BOOST_REQUIRE( extension(mar).first() == 0 ); BOOST_REQUIRE( extension(mar).last() == 5 ); @@ -260,80 +459,212 @@ BOOST_AUTO_TEST_CASE(array_ref_1D) { BOOST_REQUIRE( *extension(mar1).begin() == 1 ); BOOST_REQUIRE( size(mar1) == size(mar) ); - BOOST_REQUIRE( mar1.layout().extension().start() == 1 ); - BOOST_REQUIRE( extension(mar1).start() == 1 ); + BOOST_REQUIRE( mar1.layout().extension().first() == 1 ); + BOOST_REQUIRE( extension(mar1).first() == 1 ); BOOST_REQUIRE( &mar1[1] == &arr[0] ); // NOLINT(readability-container-data-pointer) test access BOOST_REQUIRE( mar1.base() == &arr[0] ); // NOLINT(readability-container-data-pointer) test access BOOST_REQUIRE( mar1.base() == arr.data() ); } BOOST_AUTO_TEST_CASE(array_ref_original_tests_carray) { - double darr[4][5] = {{1., 2.}, {2., 3.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type - multi::array_ref ref(&darr[0][0], {4, 5}); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double darr[4][5] = { + {1.0, 2.0}, + {2.0, 3.0}, + }; + multi::array_ref ref(&darr[0][0], {4, 5}); multi::array_ref cref(&darr[0][0], {4, 5}); - multi::array_ref crefc(&darr[0][0], {4, 5}); - multi::array_cref ref2(&darr[0][0], {4, 5}); + multi::array_ref crefc(&darr[0][0], {4, 5}); + multi::array_cref ref2(&darr[0][0], {4, 5}); - BOOST_REQUIRE( &ref[1][2] == &cref[1][2] ); + BOOST_REQUIRE( &ref[1][2] == &cref [1][2] ); BOOST_REQUIRE( &ref[1][2] == &crefc[1][2] ); - BOOST_REQUIRE( &ref[1][2] == &ref2[1][2] ); + BOOST_REQUIRE( &ref[1][2] == & ref2[1][2] ); + + ref[1][1] = 2.0; - ref[1][1] = 2.; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double darr2[4][5] = { + {1.0, 0.0}, + {2.0, 3.0}, + }; + darr2[1][0] = 2.0; - double darr2[4][5] = {{1., 2.}, {2., 3.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + auto const& dd = std::as_const(darr2); - auto const& dd = static_cast(darr2); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type BOOST_REQUIRE( &(dd[1][2]) == &(darr2[1][2]) ); BOOST_REQUIRE(( & ref[1].static_array_cast()[1] == &ref[1][1] )); BOOST_REQUIRE(( &multi::static_array_cast(ref[1])[1] == &ref[1][1] )); } +BOOST_AUTO_TEST_CASE(array_ref_cast_carray) { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double darr[2][2] = { + {1.0, 2.0}, + {2.0, 3.0}, + }; + multi::array_ref ref(&darr[0][0], {2, 2}); + + auto&& other_darr = static_cast(ref); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + + double(&other_darr2)[2][2] = static_cast(ref); // NOLINT(hicpp-use-auto,modernize-use-auto,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double(&other_darr3)[2][2](ref); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + + BOOST_REQUIRE( &ref [1][0] == &darr[1][0] ); + BOOST_REQUIRE( &other_darr [1][0] == &darr[1][0] ); + BOOST_REQUIRE( &other_darr2[1][0] == &darr[1][0] ); + BOOST_REQUIRE( &other_darr3[1][0] == &darr[1][0] ); + +// Homebrew GCC-13 terminates rather than having the expected exception caught. +#if !(defined(__GNUC__) && __GNUC__ >= 5 && defined(__APPLE__)) + BOOST_REQUIRE_THROW( + ([&] { + double(&other_darr4)[3][3](ref); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + other_darr4[1][1] += 1.0; + }()), + std::bad_cast + ); +#endif +} + BOOST_AUTO_TEST_CASE(array_ref_original_tests_const_carray) { - double const d2D[4][5] = {{1., 2.}, {2., 3.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type - multi::array_ref d2Rce(&d2D[0][0], {4, 5}); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double const d2D[4][5] = { + {1.0, 2.0}, + {2.0, 3.0}, + }; + multi::array_ref d2Rce(&d2D[0][0], {4, 5}); + BOOST_REQUIRE( &d2Rce[2][3] == &d2D[2][3] ); BOOST_REQUIRE( d2Rce.size() == 4 ); BOOST_REQUIRE( num_elements(d2Rce) == 20 ); } BOOST_AUTO_TEST_CASE(array_ref_original_tests_const_carray_string) { - #if not defined(__circle_build__) // circle 170 crashes https://github.com/seanbaxter/circle/issues/114 - std::string const dc3D[4][2][3] = { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + // NOLINTBEGIN(fuchsia-default-arguments-calls) std::string ctor + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + std::string const dc3D[4][2][3] = { {{"A0a", "A0b", "A0c"}, {"A1a", "A1b", "A1c"}}, {{"B0a", "B0b", "B0c"}, {"B1a", "B1b", "B1c"}}, {{"C0a", "C0b", "C0c"}, {"C1a", "C1b", "C1c"}}, {{"D0a", "D0b", "D0c"}, {"D1a", "D1b", "D1c"}}, }; + // NOLINTEND(fuchsia-default-arguments-calls) std::string ctor + multi::array_cref cref(&dc3D[0][0][0], {4, 2, 3}); - BOOST_REQUIRE( num_elements(cref) == 24 and cref[2][1][1] == "C1b" ); + BOOST_REQUIRE( num_elements(cref) == 24 && cref[2][1][1] == "C1b" ); + auto const& A2 = cref.sliced(0, 3).rotated()[1].sliced(0, 2).unrotated(); - BOOST_REQUIRE( multi::rank>{} == 2 and num_elements(A2) == 6 ); + BOOST_REQUIRE( multi::rank>{} == 2 && num_elements(A2) == 6 ); - BOOST_REQUIRE( std::get<0>(sizes(A2)) == 3 and std::get<1>(sizes(A2)) == 2 ); + BOOST_REQUIRE( std::get<0>(sizes(A2)) == 3 && std::get<1>(sizes(A2)) == 2 ); auto const& A3 = cref({0, 3}, 1, {0, 2}); - BOOST_REQUIRE( multi::rank>{} == 2 and num_elements(A3) == 6 ); + BOOST_REQUIRE( multi::rank>{} == 2 && num_elements(A3) == 6 ); BOOST_REQUIRE( A2.layout()[2][1] == &A2[2][1] - A2.base() ); BOOST_REQUIRE( A2.rotated().layout()[1][2] == &A2.rotated()[1][2] - A2.rotated().base() ); - #endif +} + +BOOST_AUTO_TEST_CASE(array_ref_sizes_assingment) { + multi::array_cref const cref(nullptr, {4, 2, 3}); + { + auto [sizes1, sizes2, sizes3] = cref.sizes(); + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } + { + auto sizes1 = std::get<0>(cref.sizes()); + auto sizes2 = std::get<1>(cref.sizes()); + auto sizes3 = std::get<2>(cref.sizes()); + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } + { + multi::size_t sizes1; // NOLINT(cppcoreguidelines-init-variables) + multi::size_t sizes2; // NOLINT(cppcoreguidelines-init-variables) + multi::size_t sizes3; // NOLINT(cppcoreguidelines-init-variables) + multi::tie(sizes1, sizes2, sizes3) = cref.sizes(); + + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } + { + auto const [sizes1, sizes2, sizes3] = cref.sizes(); + + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } + // { + // // NOLINTNEXTLINE(runtime/int) + // long sizes1; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + // // NOLINTNEXTLINE(runtime/int) + // long sizes2; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + // // NOLINTNEXTLINE(runtime/int) + // long sizes3; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + + // multi::tie(sizes1, sizes2, sizes3) = static_cast>(cref.sizes()); + + // BOOST_REQUIRE( sizes1 == 4L ); + // BOOST_REQUIRE( sizes2 == 2L ); + // BOOST_REQUIRE( sizes3 == 3L ); + // } + { + // NOLINTNEXTLINE(runtime/int) + long long sizes1; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + // NOLINTNEXTLINE(runtime/int) + long long sizes2; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + // NOLINTNEXTLINE(runtime/int) + long long sizes3; // NOLINT(google-runtime-int,cppcoreguidelines-init-variables) test bad idiom + multi::tie(sizes1, sizes2, sizes3) = cref.sizes(); + + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } + { + int64_t sizes1; // NOLINT(cppcoreguidelines-init-variables) + int64_t sizes2; // NOLINT(cppcoreguidelines-init-variables) + int64_t sizes3; // NOLINT(cppcoreguidelines-init-variables) + multi::tie(sizes1, sizes2, sizes3) = cref.sizes(); + + BOOST_REQUIRE( sizes1 == 4 ); + BOOST_REQUIRE( sizes2 == 2 ); + BOOST_REQUIRE( sizes3 == 3 ); + } } BOOST_AUTO_TEST_CASE(array_ref_rebuild_2D) { - double d2D[4][5] = {{1., 2.}, {2., 3.}}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double d2D[4][5] = { + {1.0, 2.0}, + {2.0, 3.0}, + }; multi::array_ref d2R(&d2D[0][0], {4, 5}); - auto&& d2B = d2R(); + + auto&& d2B = d2R(); auto&& d2B_ref = multi::ref(d2B.begin(), d2B.end()); - BOOST_REQUIRE( d2B.base() == d2B_ref.base() ); - BOOST_REQUIRE( d2B.layout() == d2B_ref.layout() ); + BOOST_REQUIRE( d2B[0][0] == d2B_ref[0][0] ); + BOOST_REQUIRE( &d2B[0][0] == &d2B_ref[0][0] ); + + BOOST_REQUIRE( d2B.base() == d2B_ref.base() ); + BOOST_REQUIRE( d2B.layout() == d2B_ref.layout() ); + BOOST_REQUIRE( &d2R() == &multi::ref(d2B.begin(), d2B.end()) ); } BOOST_AUTO_TEST_CASE(array_ref_rebuild_1D) { - double d1D[5] = {1., 2., 3., 4., 5.}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double d1D[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + multi::array_ref d1R(&d1D[0], {5}); - auto&& d1B = d1R(); + + auto&& d1B = d1R(); auto&& d1B_ref = multi::ref(d1B.begin(), d1B.end()); BOOST_REQUIRE( d1B.base() == d1B_ref.base() ); @@ -343,35 +674,409 @@ BOOST_AUTO_TEST_CASE(array_ref_rebuild_1D) { BOOST_AUTO_TEST_CASE(array_ref_move_assigment_2D) { { - multi::array arr ({5, 4}); std::iota(arr.elements().begin(), arr.elements().end(), 0.); - multi::array arr2({5, 4}); std::iota(arr2.elements().begin(), arr2.elements().end(), 10.); + multi::array arr({5, 4}); + std::iota(arr.elements().begin(), arr.elements().end(), 0.); + + multi::array arr2({5, 4}); + std::iota(arr2.elements().begin(), arr2.elements().end(), 10.); - multi::array_ref&& Aref{{5, 4}, arr.data_elements()}; - multi::array_ref&& Bref{{5, 4}, arr2.data_elements()}; + auto&& Aref = multi::array_ref({5, 4}, arr.data_elements()); + auto&& Bref = multi::array_ref({5, 4}, arr2.data_elements()); Bref = Aref; BOOST_REQUIRE( arr2 == arr ); } { - multi::array arr ({5, 4}); std::iota(arr.elements().begin(), arr.elements().end(), 0.); - multi::array arr2({5, 4}); std::iota(arr2.elements().begin(), arr2.elements().end(), 10.); + multi::array arr({5, 4}); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); + + multi::array arr2({5, 4}); + std::iota(arr2.elements().begin(), arr2.elements().end(), 10.0); - multi::array_ref&& ref2{{5, 4}, arr2.data_elements()}; + auto&& ref2 = multi::array_ref({5, 4}, arr2.data_elements()); - ref2 = multi::array_ref{{5, 4}, arr.data_elements()}; + ref2 = multi::array_ref({5, 4}, arr.data_elements()); BOOST_REQUIRE( arr2 == arr ); } { - multi::array arr ({5, 4}); std::iota(arr.elements().begin(), arr.elements().end(), 0.); - multi::array arr2({5, 4}); std::iota(arr2.elements().begin(), arr2.elements().end(), 10.); + multi::array arr({5, 4}); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); - multi::array_ref&& ref {{5, 4}, arr .data_elements()}; - multi::array_ref&& ref2{{5, 4}, arr2.data_elements()}; + multi::array arr2({5, 4}); + std::iota(arr2.elements().begin(), arr2.elements().end(), 10.0); + + auto&& ref = multi::array_ref({5, 4}, arr.data_elements()); + auto&& ref2 = multi::array_ref({5, 4}, arr2.data_elements()); ref2 = std::move(ref); BOOST_REQUIRE( arr2 == arr ); } } + +auto f1d5(double const (&carr)[5]) -> double; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +auto f1d5(double const (&carr)[5]) -> double { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + return carr[1]; +} + +void f2d54(double const (&carr)[5][4]); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) +void f2d54(double const (&carr)[5][4]) { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + BOOST_REQUIRE(carr[0][1] == 1.0); +} + +BOOST_AUTO_TEST_CASE(array_ref_conversion_1D) { + multi::array arr({5}, double{}); + BOOST_REQUIRE( arr.size() == 5 ); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); + + { + auto& carr = static_cast(arr); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + BOOST_REQUIRE( &carr[3] == &arr[3] ); + + BOOST_REQUIRE(f1d5(static_cast(arr)) == 1.0); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + } + { + double(&carr)[5](arr); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + BOOST_REQUIRE( &carr[3] == &arr[3] ); + } +} + +BOOST_AUTO_TEST_CASE(array_ref_conversion_2D) { + multi::array arr({5, 4}); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); + + { + auto& carr = static_cast(arr); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + BOOST_REQUIRE( &carr[3][2] == &arr[3][2] ); + + f2d54(static_cast(arr)); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + } + { + double(&carr)[5][4](arr); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + BOOST_REQUIRE( &carr[3][2] == &arr[3][2] ); + // f2d54((double(&)[5][4])(arr)); // this will warn with -Wold-style-cast NOLINT(google-readability-casting,cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + } +} + +#ifndef _MSC_VER +BOOST_AUTO_TEST_CASE(as_span) { +#ifdef BOOST_MULTI_HAS_SPAN + auto print_me0 = [](std::span rng) { + std::cout << "rng.size(): " << rng.size() << '\n'; // (4) + std::for_each(rng.begin(), rng.end(), [](auto const& elem) { std::cout << elem << ' '; }); + std::cout << "\n\n"; + }; +#endif + + auto print_me1 = [](multi::array_ref const& rng) { + std::cout << "rng.size(): " << rng.size() << '\n'; // (4) + std::for_each(rng.begin(), rng.end(), [](auto const& elem) { std::cout << elem << ' '; }); + std::cout << "\n\n"; + }; + + auto print_me2 = [](multi::array_ptr const& ptr) { + std::cout << "ptr->size(): " << ptr->size() << '\n'; // (4) + std::for_each(ptr->begin(), ptr->end(), [](auto const& elem) { std::cout << elem << ' '; }); + std::cout << "\n\n"; + }; + +#ifdef BOOST_MULTI_HAS_SPAN + { + int arr[] = {1, 2, 3, 4}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy arrays + print_me0(arr); + + // vvv this fails in certain versions of clang (14?ss) + // std::vector vec = {1, 2, 3, 4, 5}; // NOLINT(fuchsia-default-arguments-calls) + // print_me0(vec); + + // clang-format off + std::array arr2 = {{1, 2, 3, 4, 5, 6}}; + // clang-format on + + print_me0(arr2); + } +#endif + { + int arr[] = {1, 2, 3, 4}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test c-arrays + + print_me1(multi::array_ref{arr}); + print_me1(arr); + + std::vector vec = {1, 2, 3, 4, 5}; // NOLINT(fuchsia-default-arguments-calls) + + print_me1(*multi::array_ptr{vec.data(), 5}); + + // clang-format off + std::array arr2 = {{1, 2, 3, 4, 5, 6}}; + // clang-format on + + print_me1(arr2); + print_me1(*multi::array_ptr{arr2.data(), {6}}); + + multi::static_array marr( +// #ifdef _MSC_VER // problems with MSVC 14.3 c++17 + multi::extensions_t<1> +// #endif + {10}, + 99 + ); + + print_me1(*multi::array_ptr(marr.data_elements(), 10)); + + // #ifndef _MSC_VER + auto& alias = marr; + + marr = alias; + BOOST_REQUIRE(marr[5] == 99); + + marr = alias(); + BOOST_REQUIRE(marr[5] == 99); + // #endif + } + { + int arr[] = {1, 2, 3, 4}; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test c-arrays + print_me2(multi::array_ptr{&arr}); + print_me2(&arr); + + std::vector vec = {1, 2, 3, 4, 5}; // NOLINT(fuchsia-default-arguments-calls) + print_me2({vec.data(), 5}); + + // clang-format off + std::array arr2 = {{1, 2, 3, 4, 5, 6}}; + // clang-format on + + // print_me2(&arr2); // this crashes clang-tidy + print_me2({arr2.data(), {6}}); + + // multi::static_array marr({10}, 99); + // print_me2(&marr); // TODO(correaa) make this work + } +} +#endif + +BOOST_AUTO_TEST_CASE(diagonal) { + // NOLINTNEXTLINE(hicpp-avoid-c-arrays, modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays): test + double arr[4][3] = { + { 0.0, 1.0, 2.0}, + { 5.0, 1.0, 7.0}, + {10.0, 11.0, 2.0}, + {99.0, 99.0, 99.9}, + }; + + // NOLINTNEXTLINE(hicpp-avoid-c-arrays, modernize-avoid-c-arrays, cppcoreguidelines-avoid-c-arrays): special type + multi::array_ref mar = *multi::array_ptr(&arr); + + BOOST_REQUIRE( &mar({0, 3}, {0, 3}).diagonal()[0] == &arr[0][0] ); + BOOST_REQUIRE( &mar({0, 3}, {0, 3}).diagonal()[1] == &arr[1][1] ); + BOOST_REQUIRE( &mar({0, 3}, {0, 3}).diagonal()[2] == &arr[2][2] ); + + auto sum = 0.0; + + // NOLINTNEXTLINE(altera-unroll-loops) test for-range loop + for(auto const& aii : mar.diagonal()) { + sum += aii; + } + BOOST_REQUIRE( sum == mar[0][0] + mar[1][1] + mar[2][2]); +} + +BOOST_AUTO_TEST_CASE(function_passing) { + multi::array arr({3, 3}); + multi::array_ref& arrR = arr; + + arrR[0][0] = 2.1; + + arr.reextent({5, 5}); + + assert(&arrR[0][0] == &arr[0][0]); +} + +namespace boost::multi { +template>> +using Array = + std::conditional_t, + std::conditional_t< + std::is_const_v>, + boost::multi::array_ref>, D> const&, + boost::multi::array_ref, D>&>, + multi::array>; +} // end namespace boost::multi + +BOOST_AUTO_TEST_CASE(function_passing_2) { + multi::Array arr({3, 3}); + [[maybe_unused]] multi::Array arrR = arr; + + arrR[0][0] = 5.1; + + [[maybe_unused]] multi::Array arrCR = arr; + + assert(&arrCR[0][0] == &arrR[0][0]); + + [[maybe_unused]] multi::Array arrCR2 = arrCR; + + arr.reextent({5, 5}); + + assert(&arrR[0][0] == &arr[0][0]); +} + +template +auto trace_array_deduce(multi::array const& arr) -> T { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), T{0}); +} + +template double trace_array_deduce(multi::array const&); + +template +auto trace_generic(Array const& arr) -> T { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), T{0}); +} + +template double trace_generic>(multi::array const&); + +inline auto trace_separate_ref(multi::array_ref const& arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +inline auto trace_separate_sub(multi::subarray const& arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +inline auto trace_separate_ref2(multi::array_const_view arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +// unusable for arrays +inline auto trace_separate_ref3(multi::array_view arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +// unusable for arrays +inline auto trace_separate_ref4(multi::array_ref arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +// unusable for arrays +inline auto trace_separate_sub4(multi::subarray arr) -> double { + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), 0.0); +} + +BOOST_AUTO_TEST_CASE(function_passing_3) { + multi::array const arr({3, 3}, 1.0); + + BOOST_REQUIRE( trace_array_deduce (arr) == 3 ); + BOOST_REQUIRE( trace_array_deduce(arr) == 3 ); + + multi::array const arr_paren_copy(arr()); + BOOST_REQUIRE( arr_paren_copy.size() == 3 ); + + BOOST_REQUIRE( trace_generic (arr) == 3 ); + BOOST_REQUIRE(( trace_generic >(arr) == 3 )); + // BOOST_REQUIRE(( trace_generic&>(arr) == 3 )); // can't generate element_type + + BOOST_REQUIRE( trace_generic (arr()) == 3 ); + BOOST_REQUIRE(( trace_generic >(arr()) == 3 )); // this will make a copy + // BOOST_REQUIRE(( trace_generic&>(arr()) == 3 )); // can't generate element_type + + BOOST_REQUIRE(( trace_generic >(arr) == 3 )); + // BOOST_REQUIRE(( trace_generic&>(arr) == 3 )); // can't generate element_type + BOOST_REQUIRE(( trace_generic >(arr) == 3 )); + // BOOST_REQUIRE(( trace_generic&>(arr) == 3 )); // can't generate element_type + + // BOOST_REQUIRE(( trace_generic >(arr({0, 3}, {0, 3})) == 3 )); + // BOOST_REQUIRE(( trace_generic&>(arr()) == 3 )); // can't generate element_type + + BOOST_REQUIRE(( trace_separate_ref (arr) == 3 )); + BOOST_REQUIRE(( trace_separate_sub (arr) == 3 )); + +// BOOST_REQUIRE(( trace_separate_ref2 (arr) == 3 )); // not allowed + // BOOST_REQUIRE(( trace_separate_ref3 (arr) == 3 )); // not allowed + + // BOOST_REQUIRE(( trace_separate_ref4 (arr) == 3 )); // not allowed + // BOOST_REQUIRE(( trace_separate_sub4 (arr) == 3 )); // not allowed +} + +#if __cplusplus > 202002L || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L) +BOOST_AUTO_TEST_CASE(function_passing_3_lambdas) { + auto buffer = std::make_unique(9); + std::fill_n(buffer.get(), 9, 1.0); + + multi::array const arr({3, 3}, 1.0); + multi::array_ref const aref(buffer.get(), {3, 3}); + auto const& asub = arr({0, 3}, {0, 3}); + + auto deduce_array = [](Arr const& a) { return std::accumulate(a.diagonal().begin(), a.diagonal().end(), typename Arr::element_type{0}); }; // NOLINT(readability/braces) cpplint 1.6.1 gets confused + + BOOST_REQUIRE( deduce_array(arr) == 3 ); + BOOST_REQUIRE( deduce_array(aref) == 3 ); + BOOST_REQUIRE( deduce_array(asub) == 3 ); + + auto deduce_element = [](multi::array const& a) { return std::accumulate(a.diagonal().begin(), a.diagonal().end(), T{0}); }; // NOLINT(readability/braces) cpplint 1.6.1 gets confused + + BOOST_REQUIRE( deduce_element(arr) == 3 ); + // BOOST_REQUIRE( deduce_element(aref) == 3 ); + // BOOST_REQUIRE( deduce_element(asub) == 3 ); + + auto deduce_element_ref = [](multi::array_ref const& a) { return std::accumulate(a.diagonal().begin(), a.diagonal().end(), T{0}); }; // NOLINT(readability/braces) cpplint 1.6.1 gets confused + + BOOST_REQUIRE( deduce_element_ref(arr) == 3 ); + BOOST_REQUIRE( deduce_element_ref(aref) == 3 ); + // BOOST_REQUIRE( deduce_element_ref(asub) == 3 ); + + auto deduce_element_sub = [](multi::subarray const& a) { return std::accumulate(a.diagonal().begin(), a.diagonal().end(), T{0}); }; // NOLINT(readability/braces) cpplint 1.6.1 gets confused + + BOOST_REQUIRE( deduce_element_sub(arr) == 3 ); + BOOST_REQUIRE( deduce_element_sub(aref) == 3 ); + BOOST_REQUIRE( deduce_element_sub(asub) == 3 ); +} +#endif + +template +auto mut_trace_array_deduce(multi::array& arr) -> T { + arr[0][1] = 4.0; + + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), T{0}); +} + +template double mut_trace_array_deduce(multi::array&); + +template +auto mut_trace_generic(Array& arr) -> T { + arr[0][1] = 4.0; + + auto const& diag = arr.diagonal(); + return std::accumulate(diag.begin(), diag.end(), T{0}); +} + +BOOST_AUTO_TEST_CASE(function_passing_4) { + multi::array arr({3, 3}, 1.0); + + BOOST_REQUIRE( mut_trace_array_deduce (arr) == 3 ); + BOOST_REQUIRE( mut_trace_array_deduce(arr) == 3 ); + + BOOST_REQUIRE( mut_trace_generic (arr) == 3 ); + BOOST_REQUIRE(( mut_trace_generic >(arr) == 3 )); +} + +BOOST_AUTO_TEST_CASE(array_fill_constructor) { + multi::array arr(3, multi::array{1.0, 2.0, 3.0, 4.0}); + + BOOST_REQUIRE( arr[0][1] == 2.0 ); + BOOST_REQUIRE( arr[1][1] == 2.0 ); +} + +BOOST_AUTO_TEST_CASE(array_fill_constructor_1D) { + multi::array arr(3, 1.0); + + BOOST_REQUIRE( arr[0] == 1.0 ); + BOOST_REQUIRE( arr[1] == 1.0 ); +} diff --git a/external_codes/boost_multi/multi/test/array_vector_substitutability.cpp b/external_codes/boost_multi/multi/test/array_vector_substitutability.cpp deleted file mode 100644 index 2d0abb2fd3..0000000000 --- a/external_codes/boost_multi/multi/test/array_vector_substitutability.cpp +++ /dev/null @@ -1,164 +0,0 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi legacy adaptor example" -#define BOOST_TEST_DYN_LINK -#include - -#include "multi/array.hpp" - -#include -#include - -namespace multi = boost::multi; - -template // e.g. std::vector or multi::array -void resize_copy_1(std::vector const& source, DynamicArray& darr) { - darr = DynamicArray(source); -} - -template // e.g. std::vector or multi::array -void resize_copy_2(std::vector const& source, DynamicArray& darr) { - darr = DynamicArray(source.begin(), source.end()); -} - -template // e.g. std::vector or multi::array -void resize_copy_3(std::vector const& source, DynamicArray& darr) { - darr = std::decay_t(source.begin(), source.end()); // or std::decay_t(source.begin(), source.end()) -} - -template // e.g. std::vector or multi::array -void resize_copy_4(It first, It last, DynamicArray& darr) { - darr = DynamicArray(first, last); // or std::decay_t(source.begin(), source.end()) -} - -template // e.g. std::vector or multi::array -void resize_copy_5(It first, It last, DynamicArray& darr) { - darr.assign(first, last); // or std::decay_t(source.begin(), source.end()) -} - -// void resize_copy_6 ----> see below test_resize_copy_6 - -BOOST_AUTO_TEST_CASE(test_resize_copy_1) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - resize_copy_1(source, dest_v); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - resize_copy_1(source, dest_a); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} - -BOOST_AUTO_TEST_CASE(test_resize_copy_2) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - resize_copy_2(source, dest_v); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - resize_copy_2(source, dest_a); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} - -BOOST_AUTO_TEST_CASE(test_resize_copy_3) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - resize_copy_3(source, dest_v); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - resize_copy_3(source, dest_a); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} - -BOOST_AUTO_TEST_CASE(test_resize_copy_4) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - resize_copy_4(source.begin(), source.end(), dest_v); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - resize_copy_4(source.begin(), source.end(), dest_a); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} - -BOOST_AUTO_TEST_CASE(test_resize_copy_5) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - resize_copy_5(source.begin(), source.end(), dest_v); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - resize_copy_5(source.begin(), source.end(), dest_a); - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} - -BOOST_AUTO_TEST_CASE(test_resize_copy_6) { - std::vector const source = {0., 1., 2., 3.}; - - std::vector dest_v = {99., 99.}; - multi::array dest_a = {88., 88.}; - - BOOST_REQUIRE( dest_v.size() == 2 ); - BOOST_REQUIRE( dest_a.size() == 2 ); - - { // look same code as below - dest_v = decltype(dest_v)(source); - } - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); - - { // look same code as above - dest_a = decltype(dest_a)(source); - } - - BOOST_REQUIRE( dest_v.size() == 4 ); - BOOST_REQUIRE( dest_v[3] == 3. ); -} diff --git a/external_codes/boost_multi/multi/test/assignments.cpp b/external_codes/boost_multi/multi/test/assignments.cpp index dc066125bd..72f67342f5 100644 --- a/external_codes/boost_multi/multi/test/assignments.cpp +++ b/external_codes/boost_multi/multi/test/assignments.cpp @@ -1,153 +1,214 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4244) +#endif -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi assignments" -#include +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif -#include +#include -#include "multi/array.hpp" +// #if defined(__clang__) +// #pragma clang diagnostic pop +// #elif defined(__GNUC__) +// #pragma GCC diagnostic pop +// #elif defined(_MSC_VER) +// #pragma warning(pop) +// #endif namespace multi = boost::multi; -inline static constexpr auto make_ref(double* ptr) -> multi::array_ref { - return multi::array_ref(ptr, {5, 7}); +namespace { + +constexpr auto make_ref(int* ptr) { + return multi::array_ref(ptr, {5, 7}); } +} // namespace + BOOST_AUTO_TEST_CASE(equality_1D) { - multi::array arr = {1., 2., 3.}; - multi::array arr2 = {1., 2., 3.}; - BOOST_REQUIRE( arr == arr2 ); - BOOST_REQUIRE( not (arr != arr2) ); + multi::array arr = {1.0, 2.0, 3.0}; + multi::array arr2 = {1.0, 2.0, 3.0}; + + BOOST_REQUIRE( arr == arr2 ); + BOOST_REQUIRE( ! (arr != arr2) ); - BOOST_REQUIRE( arr() == arr2() ); - BOOST_REQUIRE( not (arr() != arr2()) ); + BOOST_REQUIRE( arr() == arr2() ); + BOOST_REQUIRE( ! (arr() != arr2()) ); } BOOST_AUTO_TEST_CASE(equality_2D) { multi::array arr = { - {1., 2., 3.}, - {4., 5., 6.} + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, }; multi::array arr2 = { - {1., 2., 3.}, - {4., 5., 6.} + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, }; BOOST_REQUIRE( arr == arr2 ); - BOOST_REQUIRE( not (arr != arr2) ); + BOOST_REQUIRE( ! (arr != arr2) ); BOOST_REQUIRE( arr() == arr2() ); - BOOST_REQUIRE( not (arr() != arr2()) ); + BOOST_REQUIRE( ! (arr() != arr2()) ); BOOST_REQUIRE( arr[0] == arr2[0] ); - BOOST_REQUIRE( not (arr[0] != arr2[0]) ); + BOOST_REQUIRE( ! (arr[0] != arr2[0]) ); } BOOST_AUTO_TEST_CASE(multi_copy_move) { - multi::array arr({3, 3}, 0.); + multi::array arr({3, 3}, 0.0); multi::array arr2 = arr; BOOST_REQUIRE( arr == arr2 ); auto* arr_data = arr.data_elements(); + multi::array arr3 = std::move(arr); BOOST_REQUIRE( arr3.data_elements() == arr_data ); - multi::array arr4(std::move(arr2)); + multi::array const arr4(std::move(arr2)); BOOST_REQUIRE( size(arr4) == 3 ); } -#if 1 BOOST_AUTO_TEST_CASE(range_assignment) { -{ - auto ext = multi::make_extension_t(10L); - multi::array vec(ext.begin(), ext.end()); - BOOST_REQUIRE( ext.size() == vec.size() ); - BOOST_REQUIRE( vec[1] = 10 ); -} -{ - multi::array vec(multi::extensions_t<1>{multi::iextension{10}}); - auto ext = extension(vec); - vec.assign(ext.begin(), ext.end()); - BOOST_REQUIRE( vec[1] == 1 ); -} + { + auto const ext = multi::make_extension_t(10L); + + multi::array vec(ext.begin(), ext.end()); + + BOOST_REQUIRE( ext.size() == vec.size() ); + BOOST_REQUIRE( vec[1] = 10 ); + } + { + multi::array vec(multi::extensions_t<1>{multi::iextension{10}}); + + auto const ext = extension(vec); + + vec.assign(ext.begin(), ext.end()); + BOOST_REQUIRE( vec[1] == 1 ); + } } BOOST_AUTO_TEST_CASE(rearranged_assignment) { - multi::array tmp({14, 14, 7, 4}); - multi::array src({2, 14, 14, 7, 2}); src[0][1][2][3][1] = 99.; + multi::array tmp( +#ifdef _MSC_VER // problem with 14.3 c++17 + multi::extensions_t<4> +#endif + {14, 14, 7, 4} + ); + + auto const ext5 = multi::extensions_t<5>{2, 14, 14, 7, 2}; + + [[maybe_unused]] auto const ext52 = ext5; + + [[maybe_unused]] multi::array const src_test(ext5); + + multi::array src( +#ifdef _MSC_VER // problem with 14.3 c++17 + multi::extensions_t<5> +#endif + {2, 14, 14, 7, 2} + ); + + src[0][1][2][3][1] = 99; BOOST_REQUIRE( extensions(tmp.unrotated().partitioned(2).transposed().rotated()) == extensions(src) ); } +BOOST_AUTO_TEST_CASE(rearranged_assignment_resize) { + multi::array const arrA({4, 5}); + multi::array arrB({2, 3}); + + arrB = arrA; + BOOST_REQUIRE( arrB.size() == 4 ); +} + BOOST_AUTO_TEST_CASE(rvalue_assignments) { using complex = std::complex; - std::vector const vec1(200, 99.); - std::vector vec2(200); - auto linear1 = [&] {return multi::array_cptr(vec1.data(), 200);}; - auto linear2 = [&] {return multi::array_ptr(vec2.data(), 200);}; - *linear2() = *linear1(); -} + std::vector const vec1(200, 99.0); // NOLINT(fuchsia-default-arguments-calls) + std::vector vec2(200); // NOLINT(fuchsia-default-arguments-calls) -#if 0 // self-move-assigment is a standard warning in clang (-Wmove) -BOOST_AUTO_TEST_CASE(self_assigment) { - multi::array A = {1., 2., 3.}; - A = std::move(A); - std::cout << A[0] << std::endl; - BOOST_REQUIRE( A.empty() ); + auto linear1 = [&] { return multi::array_cptr(vec1.data(), 200); }; + auto linear2 = [&] { return multi::array_ptr(vec2.data(), 200); }; - multi::array B = {{1., 2., 3.}, {2., 3., 4.}}; - B = std::move(B); - BOOST_REQUIRE( B.empty() ); + *linear2() = *linear1(); } -#endif BOOST_AUTO_TEST_CASE(assignments) { { - std::vector vec( static_cast(5*7), 99.); - constexpr double val = 33.; - multi::array arr({5, 7}, val); - multi::array_ref(vec.data(), {5, 7}) = arr; + std::vector vec(static_cast(5 * 7), 99); // NOLINT(fuchsia-default-arguments-calls) + + constexpr int val = 33; + + multi::array const arr({5, 7}, val); + multi::array_ref(vec.data(), {5, 7}) = arr(); // arr() is a subarray + BOOST_REQUIRE( vec[9] == val ); - BOOST_REQUIRE( not vec.empty() ); - BOOST_REQUIRE( not is_empty(arr) ); + BOOST_REQUIRE( ! vec.empty() ); + BOOST_REQUIRE( ! is_empty(arr) ); } { - std::vector vec(5*7L, 99.); - std::vector wec(5*7L, 33.); + std::vector vec(5 * 7L, 99); // NOLINT(fuchsia-default-arguments-calls) + std::vector wec(5 * 7L, 33); // NOLINT(fuchsia-default-arguments-calls) - multi::array_ptr Bp{wec.data(), {5, 7}}; + multi::array_ptr const Bp(wec.data(), {5, 7}); make_ref(vec.data()) = *Bp; + + auto&& mref = make_ref(vec.data()); + mref = Bp->sliced(0, 5); + make_ref(vec.data()) = Bp->sliced(0, 5); - BOOST_REQUIRE( vec[9] == 33. ); + BOOST_REQUIRE( vec[9] == 33 ); } { - std::vector vec(5*7L, 99.); - std::vector wec(5*7L, 33.); + std::vector vec(5 * 7L, 99); // NOLINT(fuchsia-default-arguments-calls) + std::vector wec(5 * 7L, 33); // NOLINT(fuchsia-default-arguments-calls) make_ref(vec.data()) = make_ref(wec.data()); - BOOST_REQUIRE( vec[9] == 33. ); + BOOST_REQUIRE( vec[9] == 33 ); } } template auto eye(multi::extensions_t<2> exts, Allocator alloc) { - multi::array ret(exts, 0., alloc); - ret.diagonal().fill(1.); + multi::array ret(exts, 0, alloc); + ret.diagonal().fill(1); return ret; } template -auto eye(multi::extensions_t<2> exts) {return eye(exts, std::allocator{});} +auto eye(multi::extensions_t<2> exts) { return eye(exts, std::allocator{}); } BOOST_AUTO_TEST_CASE(assigment_temporary) { - multi::array Id = eye( multi::extensions_t<2>({3, 3}) ); + multi::array Id = eye(multi::extensions_t<2>({3, 3})); BOOST_REQUIRE( Id == eye({3, 3}) ); BOOST_REQUIRE( Id[1][1] == 1 ); BOOST_REQUIRE( Id[1][0] == 0 ); } - -#endif diff --git a/external_codes/boost_multi/multi/test/boost_array_concept.cpp b/external_codes/boost_multi/multi/test/boost_array_concept.cpp new file mode 100644 index 0000000000..aa71370a6a --- /dev/null +++ b/external_codes/boost_multi/multi/test/boost_array_concept.cpp @@ -0,0 +1,293 @@ +// Copyright 2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +// Test explicitly calls deprecated function +// #if defined(__clang__) +// # pragma clang diagnostic push +// # pragma clang diagnositc ignored "-Wdeprecated-declarations" +// #elif defined(__GNUC__) +// # pragma GCC diagnostic push +// # pragma GCC diagnositc ignored "-Wdeprecated-declarations" +// #endif + +#include + +// #if defined(__clang__) +// # pragma clang diagnostic pop +// #elif defined(__GNUC__) +// # pragma GCC diagnostic pop +// #endif + +// Suppress warnings from other boost libraries +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +# pragma clang diagnostic ignored "-Wunknown-pragmas" +// # pragma clang diagnositc ignored "-Wdeprecated-declarations" +# pragma clang diagnostic ignored "-Wunused-variable" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +# pragma GCC diagnostic ignored "-Wunknown-pragmas" +// # pragma GCC diagnositc ignored "-Wdeprecated-declarations" +# pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#include + +#include + +BOOST_AUTO_TEST_CASE(concepts_boost_array) { + using BMA [[maybe_unused]] = boost::multi_array; // maybe_unused for bug in nvcc 11.8 + + BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::ConstMultiArrayConcept)); + BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::MutableMultiArrayConcept)); +} + +BOOST_AUTO_TEST_CASE(concepts_boost_array_1D) { + using BMA = boost::multi_array; + + BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::ConstMultiArrayConcept)); + BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::MutableMultiArrayConcept)); +} + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(backwards) { + multi::array const MA({2, 2}); + + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + // BOOST_REQUIRE(A.index_bases()[0] == 0); // dangles? + // BOOST_REQUIRE(A.index_bases()[1] == 0); + + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + + { + #ifdef __NVCC__ + #pragma nv_diagnostic push + #pragma nv_diag_suppress = deprecated_entity_with_custom_message // nvcc #? + #endif + + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + // auto ib = MA.index_bases(); (void)ib; + // BOOST_REQUIRE(ib[0] == 0); // dangles? + // BOOST_REQUIRE(ib[1] == 0); + + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + + #ifdef __NVCC__ + #pragma nv_diagnostic pop + #endif + } + // { + // #pragma GCC diagnostic push + // #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + // std::array ib(MA.index_bases()); (void)ib; + // BOOST_REQUIRE(ib[0] == 0); + // BOOST_REQUIRE(ib[1] == 0); + // #pragma GCC diagnostic pop + // } + { + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + // BOOST_REQUIRE(static_cast(MA.index_bases())[0] == 0); // dangles + // BOOST_REQUIRE(static_cast(MA.index_bases())[1] == 0); + + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + } + { + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + // BOOST_REQUIRE(MA.index_bases()[0] == 0); // dangles + // BOOST_REQUIRE(MA.index_bases()[1] == 0); + + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + } + { + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + // std::ptrdiff_t const* ib = MA.index_bases(); + // BOOST_REQUIRE(ib); + // BOOST_REQUIRE(ib[0] == 0); // dangles + // BOOST_REQUIRE(ib[1] == 0); + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + } + { + #ifdef __GNUC__ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #endif + std::vector const ib(2); (void)ib; // NOLINT(fuchsia-default-arguments-calls) + // std::copy_n(static_cast(MA.index_bases()), 2, ib.begin()); + // BOOST_REQUIRE(ib[0] == 0); // dangles + // BOOST_REQUIRE(ib[1] == 0); + + #ifdef __GNUC__ + #pragma GCC diagnostic pop + #endif + } + // { + // #pragma GCC diagnostic push + // #pragma GCC diagnostic ignored "-Wdeprecated-declarations" + // std::vector ib(2); // NOLINT(fuchsia-default-arguments-calls) + // std::copy_n(MA.index_bases().to_array().data(), 2, ib.begin()); + // BOOST_REQUIRE(ib[0] == 0); + // BOOST_REQUIRE(ib[1] == 0); + // #pragma GCC diagnostic pop + // } +} + +BOOST_AUTO_TEST_CASE(concepts_array) { + using MA = multi::array; + + // BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::ConstMultiArrayConcept)); + // BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::MutableMultiArrayConcept)); + + BOOST_CONCEPT_ASSERT((boost::Assignable)); + BOOST_CONCEPT_ASSERT((boost::SGIAssignable)); + BOOST_CONCEPT_ASSERT((boost::DefaultConstructible)); + BOOST_CONCEPT_ASSERT((boost::CopyConstructible)); + BOOST_CONCEPT_ASSERT((boost::EqualityComparable)); + BOOST_CONCEPT_ASSERT((boost::LessThanComparable)); + // BOOST_CONCEPT_ASSERT((boost::Comparable)); + + // Function Object Concept Checking Classes + BOOST_CONCEPT_ASSERT((boost::Generator>)); + BOOST_CONCEPT_ASSERT((boost::UnaryFunction)); + BOOST_CONCEPT_ASSERT((boost::BinaryFunction)); + + // BOOST_CONCEPT_ASSERT((boost::AdaptableGenerator>)); // needs result_type TODO(correaa) add to array?, should result_type be array? or subarray? + // BOOST_CONCEPT_ASSERT((boost::AdaptableUnaryFunction)); + // BOOST_CONCEPT_ASSERT((boost::AdaptableBinaryFunction)); + + // Container Concept Checking Classes + BOOST_CONCEPT_ASSERT((boost::Container)); + BOOST_CONCEPT_ASSERT((boost::Mutable_Container)); + BOOST_CONCEPT_ASSERT((boost::ForwardContainer)); + BOOST_CONCEPT_ASSERT((boost::Mutable_ForwardContainer)); + // BOOST_CONCEPT_ASSERT((boost::ReversibleContainer)); // TODO(correaa) make it reversible, `const_reverse_iterator _i = cc.rbegin();` + // BOOST_CONCEPT_ASSERT((boost::Mutable_ReversibleContainer)); + // BOOST_CONCEPT_ASSERT((boost::RandomAccessContainer)); + // BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessContainer)); + // BOOST_CONCEPT_ASSERT((boost::Sequence)); // TODO(correaa) needs insert and erase, which will not be provided + BOOST_CONCEPT_ASSERT((boost::Collection)); +} + +BOOST_AUTO_TEST_CASE(concepts_array_1D) { + using MA = multi::array; + + BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::ConstMultiArrayConcept)); + // BOOST_CONCEPT_ASSERT((boost::multi_array_concepts::MutableMultiArrayConcept)); + + BOOST_CONCEPT_ASSERT((boost::Assignable)); + BOOST_CONCEPT_ASSERT((boost::SGIAssignable)); + BOOST_CONCEPT_ASSERT((boost::DefaultConstructible)); + BOOST_CONCEPT_ASSERT((boost::CopyConstructible)); + BOOST_CONCEPT_ASSERT((boost::EqualityComparable)); + BOOST_CONCEPT_ASSERT((boost::LessThanComparable)); + // BOOST_CONCEPT_ASSERT((boost::Comparable)); + + // Function Object Concept Checking Classes + BOOST_CONCEPT_ASSERT((boost::Generator>)); + BOOST_CONCEPT_ASSERT((boost::UnaryFunction)); + // BOOST_CONCEPT_ASSERT((boost::BinaryFunction)); + + // BOOST_CONCEPT_ASSERT((boost::AdaptableGenerator>)); // needs result_type TODO(correaa) add to array?, should result_type be array? or subarray? + // BOOST_CONCEPT_ASSERT((boost::AdaptableUnaryFunction)); + // BOOST_CONCEPT_ASSERT((boost::AdaptableBinaryFunction)); + + // Container Concept Checking Classes + BOOST_CONCEPT_ASSERT((boost::Container)); + BOOST_CONCEPT_ASSERT((boost::Mutable_Container)); + BOOST_CONCEPT_ASSERT((boost::ForwardContainer)); + BOOST_CONCEPT_ASSERT((boost::Mutable_ForwardContainer)); + // BOOST_CONCEPT_ASSERT((boost::ReversibleContainer)); // TODO(correaa) make it reversible, `const_reverse_iterator _i = cc.rbegin();` + // BOOST_CONCEPT_ASSERT((boost::Mutable_ReversibleContainer)); + // BOOST_CONCEPT_ASSERT((boost::RandomAccessContainer)); + // BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessContainer)); + // BOOST_CONCEPT_ASSERT((boost::Sequence)); // TODO(correaa) needs insert and erase, which will not be provided + BOOST_CONCEPT_ASSERT((boost::Collection)); +} + +BOOST_AUTO_TEST_CASE(concepts_iterator) { + using MAIt = multi::array::iterator; + + BOOST_CONCEPT_ASSERT((boost::Assignable)); + BOOST_CONCEPT_ASSERT((boost::SGIAssignable)); + BOOST_CONCEPT_ASSERT((boost::DefaultConstructible)); + BOOST_CONCEPT_ASSERT((boost::CopyConstructible)); + BOOST_CONCEPT_ASSERT((boost::EqualityComparable)); + BOOST_CONCEPT_ASSERT((boost::LessThanComparable)); + + BOOST_CONCEPT_ASSERT((boost::InputIterator)); + BOOST_CONCEPT_ASSERT((boost::OutputIterator)); + BOOST_CONCEPT_ASSERT((boost::OutputIterator)); + + // Iterator Concept Checking Classes + BOOST_CONCEPT_ASSERT((boost::ForwardIterator)); + BOOST_CONCEPT_ASSERT((boost::Mutable_ForwardIterator)); + BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator)); + BOOST_CONCEPT_ASSERT((boost::Mutable_BidirectionalIterator)); + BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator)); + BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator)); +} + +BOOST_AUTO_TEST_CASE(concepts_const_iterator) { + using MAIt [[maybe_unused]] = multi::array::const_iterator; // maybe_unused for bug in nvcc 11.8 + + BOOST_CONCEPT_ASSERT((boost::Assignable)); + BOOST_CONCEPT_ASSERT((boost::SGIAssignable)); + BOOST_CONCEPT_ASSERT((boost::DefaultConstructible)); + BOOST_CONCEPT_ASSERT((boost::CopyConstructible)); + BOOST_CONCEPT_ASSERT((boost::EqualityComparable)); + BOOST_CONCEPT_ASSERT((boost::LessThanComparable)); + + BOOST_CONCEPT_ASSERT((boost::InputIterator)); + // BOOST_CONCEPT_ASSERT((boost::OutputIterator)); + // BOOST_CONCEPT_ASSERT((boost::OutputIterator)); + + BOOST_CONCEPT_ASSERT((boost::ForwardIterator)); +// BOOST_CONCEPT_ASSERT((boost::Mutable_ForwardIterator)); + BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator)); +// BOOST_CONCEPT_ASSERT((boost::Mutable_BidirectionalIterator)); + BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator)); +// BOOST_CONCEPT_ASSERT((boost::Mutable_RandomAccessIterator)); +} diff --git a/external_codes/boost_multi/multi/test/comparisons.cpp b/external_codes/boost_multi/multi/test/comparisons.cpp index ea687cdf9c..1c281f6a21 100644 --- a/external_codes/boost_multi/multi/test/comparisons.cpp +++ b/external_codes/boost_multi/multi/test/comparisons.cpp @@ -1,99 +1,141 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi comparisons" -#define BOOST_TEST_DYN_LINK -#include - -#include "multi/array.hpp" - -#include +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(comparison_complex) { -using complex = std::complex; -{ - multi::array arr = {1., 2., 3.}; - multi::array arr2 = {1., 2., 3.}; - BOOST_REQUIRE( arr[1] == arr2[1] ); - BOOST_REQUIRE( arr == arr2 ); BOOST_REQUIRE( not (arr != arr2) ); - BOOST_REQUIRE( arr2 == arr ); BOOST_REQUIRE( not (arr2 != arr) ); -} -{ - multi::array const arr = {{1., 2., 3.}, {4., 5., 6.}}; - multi::array const arr2 = {{1., 2., 3.}, {4., 5., 6.}}; - BOOST_REQUIRE( arr[1][1] == arr2[1][1] ); - BOOST_REQUIRE( arr == arr2 ); BOOST_REQUIRE( not (arr != arr2) ); - BOOST_REQUIRE( arr2 == arr ); BOOST_REQUIRE( not (arr2 != arr) ); - BOOST_REQUIRE( std::equal(arr[1].begin(), arr[1].end(), begin(arr2[1]), end(arr2[1])) ); -} + using complex = std::complex; + { + multi::array arr = {1.0, 2.0, 3.0}; + multi::array arr2 = { + {1.0, 0.0}, + {2.0, 0.0}, + {3.0, 0.0}, + }; + + BOOST_REQUIRE( arr[1] == arr2[1] ); + BOOST_REQUIRE( arr == arr2 ); + BOOST_REQUIRE( ! (arr != arr2) ); + BOOST_REQUIRE( arr2 == arr ); + BOOST_REQUIRE( ! (arr2 != arr) ); + } + { + multi::array const arr = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + }; + + multi::array const arr2 = { + {{1.0, 0.0}, {2.0, 0.0}, {3.0, 0.0}}, + {{4.0, 0.0}, {5.0, 0.0}, {6.0, 0.0}}, + }; + + BOOST_REQUIRE( arr[1][1] == arr2[1][1] ); + BOOST_REQUIRE( arr == arr2 ); + BOOST_REQUIRE( ! (arr != arr2) ); + BOOST_REQUIRE( arr2 == arr ); + BOOST_REQUIRE( ! (arr2 != arr) ); + BOOST_REQUIRE( std::equal(arr[1].begin(), arr[1].end(), begin(arr2[1]), end(arr2[1])) ); + } } BOOST_AUTO_TEST_CASE(multi_comparisons_swap) { multi::array arr = { - {{ 1.2, 1.1}, { 2.4, 1.}}, - {{11.2, 3.0}, {34.4, 4.}}, - {{ 1.2, 1.1}, { 2.4, 1.}} + { {1.2, 1.1}, {2.4, 1.0}}, + {{11.2, 3.0}, {34.4, 4.0}}, + { {1.2, 1.1}, {2.4, 1.0}}, }; BOOST_REQUIRE( arr[0] < arr[1] ); - swap( arr[0], arr[1] ); + swap(arr[0], arr[1]); BOOST_REQUIRE( arr[1] < arr[0] ); - swap( arr[0], arr[1] ); + swap(arr[0], arr[1]); BOOST_REQUIRE( arr[0] < arr[1] ); } BOOST_AUTO_TEST_CASE(comparisons_equality) { multi::array arr = { - {{ 1.2, 1.1}, { 2.4, 1.}}, - {{11.2, 3.0}, {34.4, 4.}}, - {{ 1.2, 1.1}, { 2.4, 1.}} + { {1.2, 1.1}, {2.4, 1.0}}, + {{11.2, 3.0}, {34.4, 4.0}}, + { {1.2, 1.1}, {2.4, 1.0}}, }; - multi::array_ref ref(arr.data_elements(), extensions(arr)); - multi::array_cref cref(data_elements(arr) , extensions(arr)); + multi::array_ref ref(arr.data_elements(), extensions(arr)); + multi::array_cref cref(data_elements(arr), extensions(arr)); - BOOST_REQUIRE( arr == arr ); BOOST_REQUIRE( not (arr != arr) ); - BOOST_REQUIRE( ref == arr ); BOOST_REQUIRE( not (ref != arr) ); - BOOST_REQUIRE( ref == cref ); BOOST_REQUIRE( not (ref != cref) ); + BOOST_REQUIRE( arr == arr ); + BOOST_REQUIRE( ! (arr != arr) ); + BOOST_REQUIRE( ref == arr ); + BOOST_REQUIRE( ! (ref != arr) ); + BOOST_REQUIRE( ref == cref ); + BOOST_REQUIRE( ! (ref != cref) ); BOOST_REQUIRE( arr[0] == arr[2] ); BOOST_REQUIRE( ref[0] == arr[2] ); BOOST_REQUIRE( ref[0] == cref[2] ); - BOOST_REQUIRE( not ( arr[0] != arr[2]) ); - BOOST_REQUIRE( not ( ref[0] != ref[2]) ); + BOOST_REQUIRE( ! ( arr[0] != arr[2]) ); + BOOST_REQUIRE( ! ( ref[0] != ref[2]) ); - BOOST_REQUIRE( not ( arr[0] != arr[2]) ); - BOOST_REQUIRE( not ( ref[0] != ref[2]) ); + BOOST_REQUIRE( ! ( arr[0] != arr[2]) ); + BOOST_REQUIRE( ! ( ref[0] != ref[2]) ); } BOOST_AUTO_TEST_CASE(comparisons_ordering) { multi::array arr = { - {{ 1.2, 1.1}, { 2.4, 1.}}, - {{11.2, 3.0}, {34.4, 4.}}, - {{ 1.2, 1.1}, { 2.4, 1.}} + { {12, 11}, {24, 10}}, + {{112, 30}, {344, 40}}, + { {12, 11}, {24, 10}}, }; - multi::array_ref ref(arr.data_elements(), extensions(arr)); - multi::array_cref cref(data_elements(arr) , extensions(arr)); + multi::array_ref ref(arr.data_elements(), extensions(arr)); + + multi::array_cref cref(data_elements(arr), extensions(arr)); BOOST_REQUIRE( arr[0] <= arr[1] ); BOOST_REQUIRE( ref[0] <= arr[1] ); BOOST_REQUIRE( cref[0] <= cref[1] ); BOOST_REQUIRE( arr[0][0] <= arr[0][1] ); - BOOST_REQUIRE( ref[0][0] <= arr[0][1] ); + BOOST_REQUIRE( ref[0][0] <= arr[0][1] ); - BOOST_REQUIRE( arr[1][0][0] == 11.2 ); - BOOST_REQUIRE( ref[1][0][0] == 11.2 ); - BOOST_REQUIRE( cref[1][0][0] == 11.2 ); + BOOST_REQUIRE( arr[1][0][0] == 112 ); + BOOST_REQUIRE( ref[1][0][0] == 112 ); + BOOST_REQUIRE( cref[1][0][0] == 112 ); - BOOST_REQUIRE( arr[0][0][0] == 1.2 ); - BOOST_REQUIRE( ref[0][0][0] == 1.2 ); - BOOST_REQUIRE( cref[0][0][0] == 1.2 ); + BOOST_REQUIRE( arr[0][0][0] == 12 ); + BOOST_REQUIRE( ref[0][0][0] == 12 ); + BOOST_REQUIRE( cref[0][0][0] == 12 ); swap(ref[0], ref[1]); diff --git a/external_codes/boost_multi/multi/test/concepts.cpp b/external_codes/boost_multi/multi/test/concepts.cpp index 1a86c49a05..faf72923a9 100644 --- a/external_codes/boost_multi/multi/test/concepts.cpp +++ b/external_codes/boost_multi/multi/test/concepts.cpp @@ -1,16 +1,40 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// © Alfredo Correa 2022 - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi concepts" -#include - -#include "multi/array.hpp" - -#include +// Copyright 2022-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +// #include +#include namespace multi = boost::multi; -using NDArrays = std::tuple< +// using NDArrays = boost::mp11::mp_list< // fails with Boost.Test 1.67 +using NDArrays = boost::mpl::list< multi::array, multi::array, multi::array @@ -34,4 +58,3 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(convertibles, NDArray, NDArrays) static_assert( std::is_same_v::value_type> ); static_assert( std::is_same_v::reference > ); } - diff --git a/external_codes/boost_multi/multi/test/constructors.cpp b/external_codes/boost_multi/multi/test/constructors.cpp index aff228cd43..1f3453d4d0 100644 --- a/external_codes/boost_multi/multi/test/constructors.cpp +++ b/external_codes/boost_multi/multi/test/constructors.cpp @@ -1,38 +1,61 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2021 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi constructors" -#include +#include -#include "multi/array.hpp" +#include -#include +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif -namespace multi = boost::multi; +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif -using complex = std::complex; +#include -struct multiplies_bind1st{ - explicit multiplies_bind1st(multi::array&& marr) : m_(std::move(marr)) {} // this produces a bug in nvcc11.0 +namespace multi = boost::multi; + +struct multiplies_bind1st { + using complex = std::complex; + explicit multiplies_bind1st(multi::array&& marr) : m_(std::move(marr)) {} // this produces a bug in nvcc11.0 private: multi::array m_; }; BOOST_AUTO_TEST_CASE(multi_construct_1d) { - multi::static_array arr(multi::extensions_t<1>{multi::iextension{10}}, 1.); -// multi::static_array arr(multi::array::extensions_type{10}, 1.); + multi::static_array arr(multi::extensions_t<1>{multi::iextension{10}}, 1.0); + // multi::static_array arr(multi::array::extensions_type{10}, 1.0); BOOST_REQUIRE( size(arr) == 10 ); - BOOST_REQUIRE( arr[1] == 1. ); + BOOST_REQUIRE( arr[1] == 1.0 ); } BOOST_AUTO_TEST_CASE(multi_constructors_inqnvcc_bug) { + using complex = std::complex; + multi::array marr({10, 10}); multiplies_bind1st(std::move(marr)); } BOOST_AUTO_TEST_CASE(multi_constructors_1d) { { - multi::array arr(multi::extensions_t<1>{multi::iextension{10}}); + multi::array const arr(multi::extensions_t<1>{multi::iextension{10}}); BOOST_REQUIRE( size(arr)==10 ); } { @@ -45,14 +68,16 @@ BOOST_AUTO_TEST_CASE(multi_constructors_1d) { BOOST_REQUIRE( size(arr)==10 ); BOOST_REQUIRE( arr[5]== double{} ); } - #if defined(__cpp_deduction_guides) and not defined(__NVCC__) and not defined(__circle_build__) // circle 170 crashes +#if defined(__cpp_deduction_guides) && !defined(__NVCC__) { - multi::array arr(multi::extensions_t<1>{{0, 10}}, double{}); + multi::array arr(multi::extensions_t<1>({0, 10}), double{}); BOOST_REQUIRE( size(arr)==10 ); BOOST_REQUIRE( arr[5]== double{} ); } { + // clang-format off multi::array arr({{0, 10}}, double{}); + // clang-format on BOOST_REQUIRE( size(arr)==10 ); BOOST_REQUIRE( arr[5]== double{} ); } @@ -66,11 +91,11 @@ BOOST_AUTO_TEST_CASE(multi_constructors_1d) { BOOST_REQUIRE( size(arr)==10 ); BOOST_REQUIRE( arr[5]== double{} ); } - #endif +#endif } BOOST_AUTO_TEST_CASE(multi_constructors_2d_ctad) { -#if defined(__cpp_deduction_guides) and not defined(__NVCC__) and not defined(__circle_build__) // circle 170 crashes +#if defined(__cpp_deduction_guides) && !defined(__NVCC__) multi::array arr({10, 20}, double{}); BOOST_REQUIRE( size(arr)==10 ); BOOST_REQUIRE( arr[5][6] == double{} ); @@ -78,22 +103,126 @@ BOOST_AUTO_TEST_CASE(multi_constructors_2d_ctad) { } BOOST_AUTO_TEST_CASE(multi_constructors) { -{//multi::array arr({10}); assert(size(A)==1); // warning in clang -}{//multi::array arr({10}, double{}); assert(size(arr)==10); // warning in clang -}{//multi::array arr({10}, double{}); assert(size(arr)==10); // warning in clang -}{//multi::array arr({10}, 0.); assert(size(arr)==10); // warning in clang -}{//multi::array arr({10}, {}); assert(size(arr)==10); // error ambiguous -}{ multi::array arr = {10} ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -}{ multi::array arr = {10} ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -}{ multi::array arr = {10} ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -}{ multi::array arr({10}) ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -}{ multi::array arr({10}) ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -}{ multi::array arr({10}) ; BOOST_REQUIRE( size(arr)==1 and arr[0]==10 ); -//}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked -//}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked -//}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked -}{ multi::array arr({0, 10}) ; BOOST_REQUIRE( size(arr)==2 ); -}{ multi::array arr({0, 10}) ; BOOST_REQUIRE( size(arr)==2 ); -} { multi::array arr({0, 10}) ; BOOST_REQUIRE( size(arr)==2 ); + { + // multi::array arr({10}); assert(size(A)==1); // warning in clang + } { + // multi::array arr({10}, double{}); assert(size(arr)==10); // warning in clang + } { + // multi::array arr({10}, double{}); assert(size(arr)==10); // warning in clang + } { + // multi::array arr({10}, 0.); assert(size(arr)==10); // warning in clang + } { + // multi::array arr({10}, {}); assert(size(arr)==10); // error ambiguous + } { + multi::array arr = {10}; + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + } + { + multi::array arr = {10}; + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + } + { + multi::array arr = {10}; + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + } + { + multi::array arr({10}); + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + } + { + multi::array arr({10}); + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + } + { + multi::array arr({10}); + BOOST_REQUIRE( size(arr)==1 && arr[0]==10 ); + //}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked + //}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked + //}{ multi::array arr({{10}}) ; assert( size(arr)==1 and arr[0]==10 ); // clang warns about double bracked + } + { + multi::array const arr({0, 10}); + BOOST_REQUIRE( size(arr)==2 ); + } + { + multi::array const arr({0, 10}); + BOOST_REQUIRE( size(arr)==2 ); + } + { + multi::array const arr({0, 10}); + BOOST_REQUIRE( size(arr)==2 ); + } + { + using T = multi::array; + + static_assert(std::is_nothrow_destructible_v); + static_assert(std::is_default_constructible_v); + static_assert(std::is_nothrow_default_constructible_v); + + static_assert(std::is_copy_constructible_v); + static_assert(std::is_copy_assignable_v); + + // static_assert( std::is_nothrow_copy_constructible_v ); + // static_assert( std::is_nothrow_copy_assignable_v ); + + static_assert(std::is_move_constructible_v); + static_assert(std::is_move_assignable_v); + + static_assert(std::is_nothrow_move_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + } } + +BOOST_AUTO_TEST_CASE(views_are_not_allocable) { + // multi::array const AA = {{1.0, 2.0}, {3.0, 4.0}}; + // [[maybe_unused]] decltype(AA[0])* pp = new decltype(AA[0]){AA[0]}; + // delete pp; } + +BOOST_AUTO_TEST_CASE(views_are_not_placeable) { + // multi::array const AA = {{1.0, 2.0}, {3.0, 4.0}}; + // auto&& A0 = AA[0]; + // new(std::addressof(A0)) decltype(AA[0]){AA[1]}; +} + +BOOST_AUTO_TEST_CASE(views_cannot_be_elements) { + multi::array const AA = { + {1.0, 2.0}, + {3.0, 4.0}, + }; + std::vector vv; + vv.emplace_back(AA[0]); + vv.push_back(AA[0]); + // auto&& A0 = AA[0]; + // vv.push_back(A0); +} + +BOOST_AUTO_TEST_CASE(views_cannot_be_elements2) { + // multi::array const AA = {{1.0, 2.0}, {3.0, 4.0}}; + // std::vector vv(3, AA[0]); +} + +// vvv this test gives an error with Windows' GCC +// BOOST_AUTO_TEST_CASE(submultis_are_allocable) { +// multi::array const AA = { +// {1.0, 2.0}, +// {3.0, 4.0}, +// }; +// [[maybe_unused]] auto pp = std::unique_ptr>(new multi::array{AA[0]}); // NOLINT(modernize-make-unique) testing new +// BOOST_REQUIRE(pp); +// } + +// vvv this test gives an error with Windows' GCC +// BOOST_AUTO_TEST_CASE(submultis_are_placeable) { +// multi::array const AA = { +// {1.0, 2.0}, +// {3.0, 4.0}, +// }; + +// using D1 = multi::array; + +// void* buf = ::operator new(sizeof(D1)); +// D1* pd1 = new (buf) D1{AA[0]}; +// pd1->~D1(); // NOSONAR(cpp:S3432) testing placement new +// ::operator delete(buf); +// } diff --git a/external_codes/boost_multi/multi/test/conversions.cpp b/external_codes/boost_multi/multi/test/conversions.cpp new file mode 100644 index 0000000000..c711b4a2ac --- /dev/null +++ b/external_codes/boost_multi/multi/test/conversions.cpp @@ -0,0 +1,157 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +// # pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +// # pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +# pragma warning(disable : 4244) +#endif + +namespace multi = boost::multi; + +// NOLINTBEGIN(fuchsia-default-arguments-calls) // this is a defect in std::complex, not in the library +BOOST_AUTO_TEST_CASE(complex_conversion_float_to_double) { + std::complex const cee{1.0, 2.0}; + + std::complex const zee = cee; + + static_assert(multi::detail::is_explicitly_convertible_v, std::complex>); + static_assert(multi::detail::is_implicitly_convertible_v, std::complex>); + + BOOST_CHECK_CLOSE( cee.real(), static_cast(zee.real()), 1E-6 ); + + multi::static_array, 1> const CEE1(10, std::complex{}); // NOLINT(fuchsia-default-arguments-calls) + multi::static_array, 1> const ZEE1 = CEE1; +} + +BOOST_AUTO_TEST_CASE(complex_conversion_double_to_float) { + std::complex const zee{1.0, 2.0}; + + static_assert( multi::detail::is_explicitly_convertible_v, std::complex>); + static_assert(!multi::detail::is_implicitly_convertible_v, std::complex>); + + std::complex const cee{zee}; + + BOOST_CHECK_CLOSE( cee.real(), static_cast(zee.real()) , 1E-6); + + multi::static_array, 1> const ZEE1(10, std::complex{}); + multi::static_array, 1> const CEE1{ZEE1}; +} + +BOOST_AUTO_TEST_CASE(double_to_complex_conversion_documentation) { + // conversions from real to complex is implicit ... + double const dee = 5.0; + std::complex const zee = dee; + + BOOST_REQUIRE_CLOSE( zee.real(), 5.0, 1E-6 ); + BOOST_REQUIRE_CLOSE( zee.imag(), 0.0, 1E-6 ); + + // ... therefore from array of reals to arrays of complex is also + multi::array DEE({10, 10}, dee); + multi::array, 2> ZEE = DEE; + + BOOST_REQUIRE_CLOSE( ZEE[3][4].real(), 5.0, 1E-6 ); + BOOST_REQUIRE_CLOSE( ZEE[3][4].imag(), 0.0, 1E-6 ); + + multi::array, 2> ZEE2{DEE}; + + BOOST_REQUIRE_CLOSE( ZEE2[3][4].real(), 5.0, 1E-6); + BOOST_REQUIRE_CLOSE( ZEE2[3][4].imag(), 0.0, 1E-6 ); + + // multi::array DEE2{ZEE}; // compilation error, good +} + +void fun(multi::array, 2> arr); +void fun(multi::array, 2> arr) { arr.clear(); } + +void gun(multi::array, 2> const& /*unused*/); +void gun(multi::array, 2> const& /*unused*/) { + /* no-op */ +} + +BOOST_AUTO_TEST_CASE(conversion_in_function_call) { + multi::array, 2> ZEE({10, 10}); + fun(multi::array, 2>{ZEE}); + gun(multi::array, 2>{ZEE}); +} + +BOOST_AUTO_TEST_CASE(double_to_float) { + double const dee = 5.0; + // float const eff{dee}; // -Wc++11-narrowing // NOLINT(bugprone-narrowing-conversions) + // float const eff = dee; // NOLINT(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + // float const eff(dee); // NOLINT(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + auto const eff = static_cast(dee); + + // BOOST_REQUIRE( eff == 5.0 ); // -Wdouble-promotion + BOOST_REQUIRE_CLOSE( eff, 5.0F, 1E-6 ); + + multi::array const DEE({10, 10}, dee); + // multi::array const EFF(DEE); + auto const EFF = static_cast>(DEE); // TODO(correaa) investigate producing intermediate types accessible through interminediate types + + BOOST_REQUIRE_CLOSE( EFF[3][4], 5.0F, 1E-6 ); + + // multi::array const EFF = DEE; +} + +BOOST_AUTO_TEST_CASE(complex_to_complex_conversion) { + std::complex const cee{1.0, 2.0}; + std::complex const zee = cee; + + BOOST_REQUIRE_CLOSE( zee.real(), 1.0, 1E-6 ); + BOOST_REQUIRE_CLOSE( zee.imag(), 2.0, 1E-6 ); + + // std::complex cee2 = zee; // implicit conversion, compilation error + std::complex const cee2{zee}; + + BOOST_REQUIRE_CLOSE( cee2.real(), 1.0F, 1E-6 ); + BOOST_REQUIRE_CLOSE( cee2.imag(), 2.0F, 1E-6 ); + + multi::array, 2> const CEE({10, 10}, cee); + multi::array, 2> const ZEE = CEE; + + BOOST_REQUIRE_CLOSE( ZEE[3][4].real(), 1.0, 1E-6); + BOOST_REQUIRE_CLOSE( ZEE[3][4].imag(), 2.0, 1E-6); + + // multi::array, 2> const CEE2 = ZEE; // implicit conversion, compilation error + multi::array, 2> const CEE2{ZEE}; + + BOOST_REQUIRE_CLOSE( CEE2[3][4].real(), 1.0F, 1E-6 ); + BOOST_REQUIRE_CLOSE( CEE2[3][4].imag(), 2.0F, 1E-6 ); +} +// NOLINTEND(fuchsia-default-arguments-calls) diff --git a/external_codes/boost_multi/multi/test/diagonal.cpp b/external_codes/boost_multi/multi/test/diagonal.cpp new file mode 100644 index 0000000000..41aadfe768 --- /dev/null +++ b/external_codes/boost_multi/multi/test/diagonal.cpp @@ -0,0 +1,145 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +namespace multi = boost::multi; + +template +auto trace_with_indices(Array2D const& arr) { + typename Array2D::element_type sum{0}; + for(auto i : extension(arr)) { // NOLINT(altera-unroll-loops) testing loops + sum += arr[i][i]; + } + return sum; +} + +template +auto trace_with_diagonal(Array2D const& arr) { + typename Array2D::element_type sum{0}; + for(auto aii : arr.diagonal()) { // NOLINT(altera-unroll-loops) testing loops + sum += aii; + } + return sum; +} + +template +auto trace_with_accumulate(Array2D const& arr) { + return std::accumulate(arr.diagonal().begin(), arr.diagonal().end(), static_cast(0)); +} + +BOOST_AUTO_TEST_CASE(trace_test) { + using int_element = multi::index; + multi::array arr({5, 5}, 0); + + auto [is, js] = extensions(arr); + for(auto i : is) { // NOLINT(altera-unroll-loops) testing loops + for(auto j : js) { // NOLINT(altera-unroll-loops) testing loops + arr[i][j] = 10 * i + j; + } + } + + auto tr = trace_with_diagonal(arr); + + BOOST_REQUIRE( tr == 00 + 11 + 22 + 33 + 44 ); + + BOOST_REQUIRE( trace_with_diagonal(arr) == trace_with_indices(arr) ); + BOOST_REQUIRE( trace_with_diagonal(arr) == trace_with_accumulate(arr) ); +// BOOST_REQUIRE( trace_with_diagonal(arr) == trace_with_reduce(arr) ); +} + +BOOST_AUTO_TEST_CASE(broadcasted) { + multi::array const arr = { + {0, 1, 2, 3}, + {4, 5, 6, 7}, + {8, 9, 10, 11}, + }; + + auto const& a3D = arr.broadcasted(); + + BOOST_TEST( &a3D[0][2][1] == &arr[2][1] ); + BOOST_TEST( &a3D[1][2][1] == &arr[2][1] ); + + { + auto const& arr_instance = a3D[0]; + BOOST_REQUIRE( &arr_instance[3][1] == &arr[3][1] ); + } + { + auto const& arr_instance = a3D[99]; + BOOST_REQUIRE( &arr_instance[3][1] == &arr[3][1] ); + } + { + auto const& arr_instance = a3D[-99]; + BOOST_REQUIRE( &arr_instance[3][1] == &arr[3][1] ); + } + { + auto const& a3D_self = a3D(); + BOOST_TEST( &a3D_self[ 4][3][1] == &arr[3][1] ); + BOOST_TEST( &a3D_self[99][3][1] == &arr[3][1] ); + } + { + // [[maybe_unused]] auto const& a3D_finite = a3D({0, 9}); + // BOOST_TEST( &a3D_finite[ 4][3][1] == &arr[3][1] ); + // BOOST_TEST( &a3D_finite[99][3][1] == &arr[3][1] ); + } + +// BOOST_REQUIRE( a3D_finite.size() == 5 ); +// BOOST_REQUIRE( a3D_finite.begin() + 5 == a3D_finite.end() ); +} + +BOOST_AUTO_TEST_CASE(broadcast_1D) { + multi::array const arr = {0, 1, 2, 3}; + + auto const& a2D = arr.broadcasted(); + + BOOST_TEST( &a2D[0][2] == &arr[2] ); + BOOST_TEST( &a2D[1][2] == &arr[2] ); +} + +BOOST_AUTO_TEST_CASE(broadcast_0D) { + multi::array arr = {0, 1, 2, 3}; + multi::array const vv(2); + + auto const& v1D = vv.broadcasted(); + + BOOST_TEST( &v1D[0] == vv.base() ); + BOOST_TEST( &v1D[1] == vv.base() ); + + multi::array r1D({4}, 0); + std::transform(arr.begin(), arr.end(), v1D.begin(), r1D.begin(), std::plus<>{}); + + BOOST_TEST( r1D[3] == arr[3] + 2 ); + + std::transform(arr.begin(), arr.end(), v1D.begin(), arr.begin(), [](auto, auto ve) {return ve;}); + BOOST_TEST( arr[3] == 2 ); + + // std::copy_n(v1D.begin(), arr.size(), arr.begin()); + // BOOST_TEST( arr[3] == 2 ); +} diff --git a/external_codes/boost_multi/multi/test/element_access.cpp b/external_codes/boost_multi/multi/test/element_access.cpp index 74ad68308c..8b471bfff8 100644 --- a/external_codes/boost_multi/multi/test/element_access.cpp +++ b/external_codes/boost_multi/multi/test/element_access.cpp @@ -1,31 +1,41 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi element access" -#include - -#include "multi/array.hpp" +#include +#include #include -#include // for iota - -namespace multi = boost::multi; - -template void what(T&&) = delete; +#include // for std::iota + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +// # pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +// # pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif -namespace test_bee { - struct bee{}; +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif - template auto paren(Array&& arr, bee const&/*unused*/) -> decltype(auto) { - return std::forward(arr)(0); - } -} // end namespace test_bee +#include -BOOST_AUTO_TEST_CASE(overload_paren) { - multi::array arr({10}); - test_bee::bee zero; - BOOST_REQUIRE( &arr(0) == &arr(zero) ); -} +namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(empty_intersection) { multi::array arr({10}); @@ -39,8 +49,11 @@ BOOST_AUTO_TEST_CASE(empty_intersection) { } BOOST_AUTO_TEST_CASE(multi_tests_element_access_with_tuple) { - multi::array arr({3, 3}, 44.); - std::array point = {{1, 2}}; + multi::array arr({3, 3}, 'k'); + + std::array point = { + {1, 2} + }; BOOST_REQUIRE( arr[point[0]][point[1]] == arr(1, 2) ); BOOST_REQUIRE( &arr(point[0], point[1]) == &arr[point[0]][point[1]] ); @@ -48,25 +61,25 @@ BOOST_AUTO_TEST_CASE(multi_tests_element_access_with_tuple) { BOOST_REQUIRE( &arr[point[0]][point[1]] == &arr(point[0], point[1]) ); BOOST_REQUIRE( &arr(point[0], point[1]) == &arr.apply(point) ); -#if not defined(__circle_build__) BOOST_REQUIRE( &arr[point[0]][point[1]] == &std::apply(arr, point) ); BOOST_REQUIRE( &arr[point[0]][point[1]] == & apply(arr, point) ); -#endif } BOOST_AUTO_TEST_CASE(multi_tests_extension_with_tuple) { { - multi::array::extensions_type ext = {3, 4}; - multi::array arr(ext, 44.); + multi::array::extensions_type const ext = {3, 4}; + + multi::array const arr(ext, 44.0); + BOOST_REQUIRE( size(arr) == 3 ); } { auto const [en, em] = std::make_tuple(3, 4); - multi::array arr({en, em}, 44.); + multi::array const arr({en, em}, 44.0); BOOST_REQUIRE( size(arr) == 3 ); } { - auto arr = std::apply([](auto const&... szs) {return multi::array({szs...}, 55.);}, std::make_tuple(3, 4)); + auto arr = std::apply([](auto const&... szs) { return multi::array({szs...}, 55.0); }, std::make_tuple(3, 4)); BOOST_REQUIRE( size(arr) == 3 ); BOOST_REQUIRE( std::get<0>(sizes(arr)) == 3 ); BOOST_REQUIRE( std::get<1>(sizes(arr)) == 4 ); @@ -74,48 +87,43 @@ BOOST_AUTO_TEST_CASE(multi_tests_extension_with_tuple) { } BOOST_AUTO_TEST_CASE(multi_test_constness_reference) { - multi::array const carr({10, 10}, 99.); + multi::array const carr({10, 10}, '9'); BOOST_REQUIRE( size( carr(1, {0, 3}) ) == 3 ); - BOOST_REQUIRE( carr(1, {0, 3})[1] == 99. ); - static_assert( decltype( carr({0, 3}, 1) )::rank_v == 1 , "!"); + BOOST_REQUIRE( carr(1, {0, 3})[1] == '9' ); + static_assert(decltype(carr({0, 3}, 1))::rank_v == 1); BOOST_REQUIRE( size(carr.sliced(0, 3)) == 3 ); BOOST_REQUIRE( carr.range({0, 3}).rotated()[1].unrotated().size() == 3 ); - BOOST_REQUIRE( carr({0, 3}, {0, 3})[1][1] == 99. ); - - static_assert(not std::is_assignable_v, "!"); + BOOST_REQUIRE( carr({0, 3}, {0, 3})[1][1] == '9' ); -// none of these lines should compile because m is read-only -// m(1, {0, 3})[1] = 88.; -// m({0, 3}, 1)[1] = 77.; -// m({0, 3}, {0, 3})[1][1] = 66.; + static_assert(! std::is_assignable_v); } -#if 1 - BOOST_AUTO_TEST_CASE(multi_test_stencil) { - multi::array arr = - {{"a", "b", "c", "d", "e"}, - {"f", "g", "h", "f", "g"}, - {"h", "i", "j", "k", "l"}} - ; + using namespace std::string_literals; // NOLINT(build/namespaces) ""s + + multi::array arr = { + {"a"s, "b"s, "c"s, "d"s, "e"s}, + {"f"s, "g"s, "h"s, "f"s, "g"s}, + {"h"s, "i"s, "j"s, "k"s, "l"s}, + }; BOOST_REQUIRE( size(arr) == 3 ); BOOST_REQUIRE( arr.num_elements() == 3*5L ); BOOST_REQUIRE( arr[1][2] == "h" ); BOOST_REQUIRE( size(arr ({1, 3}, {2, 5})) == 2 ); - BOOST_REQUIRE( extension(arr ({1, 3}, {2, 5})).start() == 0 ); + BOOST_REQUIRE( extension(arr ({1, 3}, {2, 5})).first() == 0 ); BOOST_REQUIRE( arr ({1, 3}, {2, 5}).num_elements() == 2*3L ); BOOST_REQUIRE( arr ({1, 3}, {2, 5}).num_elements() == 2*3L ); BOOST_REQUIRE( arr ({1, 3}, {2, 5})[0][0] == "h" ); BOOST_REQUIRE( &arr ({1, 3}, {2, 5})[0][0] == &arr[1][2] ); BOOST_REQUIRE( size(arr.stenciled({1, 3}, {2, 5})) == 2 ); - BOOST_REQUIRE( extension(arr.stenciled({1, 3}, {2, 5})).start() == 1 ); + BOOST_REQUIRE( extension(arr.stenciled({1, 3}, {2, 5})).first() == 1 ); BOOST_REQUIRE( arr.stenciled({1, 3}, {2, 5}).num_elements() == 2*3L ); BOOST_REQUIRE( arr.stenciled({1, 3}, {2, 5}) [1][2] == "h" ); BOOST_REQUIRE( &arr.stenciled({1, 3}, {2, 5}) [1][2] == &arr[1][2] ); @@ -129,54 +137,64 @@ BOOST_AUTO_TEST_CASE(multi_test_stencil) { BOOST_REQUIRE( &arr({1, 3}, {2, 5}).elements().back() == &arr(2, 4) ); } +BOOST_AUTO_TEST_CASE(empty_elements) { + multi::array arr1; + multi::array arr2; + + BOOST_REQUIRE( arr1.elements().size() == 0 ); + BOOST_REQUIRE( arr2.elements().size() == 0 ); + BOOST_REQUIRE( arr1.elements() == arr2.elements() ); + BOOST_REQUIRE( !(arr1.elements() != arr2.elements()) ); +} + BOOST_AUTO_TEST_CASE(multi_test_elements_1D) { - multi::array arr = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.}; + multi::array arr = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; BOOST_REQUIRE( arr.size() == 10 ); BOOST_REQUIRE( arr.elements().size() == 10 ); BOOST_REQUIRE( &arr.elements()[0] == &arr[0] ); BOOST_REQUIRE( &arr.elements()[9] == &arr[9] ); - BOOST_REQUIRE( arr.elements().begin() < arr.elements().end() ); - BOOST_REQUIRE( arr.elements().end() > arr.elements().begin() ); - BOOST_REQUIRE( arr.elements().begin() != arr.elements().end() ); - BOOST_REQUIRE( not( arr.elements().begin() == arr.elements().end() ) ); + BOOST_REQUIRE( arr.elements().begin() < arr.elements().end() ); + BOOST_REQUIRE( arr.elements().end() > arr.elements().begin() ); + BOOST_REQUIRE( arr.elements().begin() != arr.elements().end() ); + BOOST_REQUIRE( !( arr.elements().begin() == arr.elements().end() ) ); BOOST_REQUIRE( arr().elements().begin() < arr().elements().end() ); BOOST_REQUIRE( arr().elements().begin() == arr().elements().begin() ); - BOOST_REQUIRE( arr().elements().begin() < arr().elements().end() or arr().elements().begin() == arr().elements().end() ); + BOOST_REQUIRE( arr().elements().begin() < arr().elements().end() || arr().elements().begin() == arr().elements().end() ); BOOST_REQUIRE( arr().elements().begin() <= arr().elements().end() ); BOOST_REQUIRE( arr().elements().end() > arr().elements().begin() ); BOOST_REQUIRE( arr().elements().end() >= arr().elements().begin() ); - arr.elements() = {9., 8., 7., 6., 5., 4., 3., 2., 1., 0.}; - BOOST_REQUIRE( arr[2] == 7. ); - BOOST_REQUIRE( arr.elements()[2] == 7. ); + arr.elements() = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + BOOST_REQUIRE( arr[2] == 7 ); + BOOST_REQUIRE( arr.elements()[2] == 7 ); } BOOST_AUTO_TEST_CASE(multi_test_elements_1D_as_range) { - multi::array arr = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9.}; + multi::array arr = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; BOOST_REQUIRE( arr.size() == 10 ); - arr().elements() = {9., 8., 7., 6., 5., 4., 3., 2., 1., 0.}; - BOOST_REQUIRE( arr[2] == 7. ); - BOOST_REQUIRE( arr.elements()[2] == 7. ); + arr().elements() = {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + BOOST_REQUIRE( arr[2] == 7 ); + BOOST_REQUIRE( arr.elements()[2] == 7 ); } BOOST_AUTO_TEST_CASE(elements_from_init_list_2D) { - multi::array arr({3, 2}); - arr().elements() = {1., 2., 3., 4., 5., 6.}; - BOOST_REQUIRE(arr[1][0] == 3.); + multi::array arr({3, 2}); + arr().elements() = {1, 2, 3, 4, 5, 6}; + BOOST_REQUIRE(arr[1][0] == 3); - arr.elements() = {10., 20., 30., 40., 50., 60.}; - BOOST_REQUIRE(arr[1][0] == 30.); + arr.elements() = {10, 20, 30, 40, 50, 60}; + BOOST_REQUIRE(arr[1][0] == 30); } BOOST_AUTO_TEST_CASE(front_back_2D) { - multi::array arr({3, 4}); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0.); + multi::array arr({3, 4}); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0); BOOST_REQUIRE( arr.front()[2] == arr[0][2] ); BOOST_REQUIRE( &arr.front()[2] == &arr[0][2] ); @@ -186,8 +204,8 @@ BOOST_AUTO_TEST_CASE(front_back_2D) { } BOOST_AUTO_TEST_CASE(front_back_1D) { - multi::array arr({30}, double{}); - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0.); + multi::array arr({30}, double{}); + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0); BOOST_REQUIRE( arr.front() == arr[ 0] ); BOOST_REQUIRE( &arr.front() == &arr[ 0] ); @@ -197,13 +215,13 @@ BOOST_AUTO_TEST_CASE(front_back_1D) { } BOOST_AUTO_TEST_CASE(elements_rvalues) { - using movable_type = std::vector; - movable_type movable_value(5., 99.); + using movable_type = std::vector; + movable_type const movable_value(5, 99); // NOLINT(fuchsia-default-arguments-calls) multi::array arr = {movable_value, movable_value, movable_value}; BOOST_REQUIRE( arr.size() == 3 ); - movable_type front = std::move(arr)[0]; + movable_type const front = std::move(arr)[0]; BOOST_REQUIRE( front == movable_value ); BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing purposes @@ -213,13 +231,13 @@ BOOST_AUTO_TEST_CASE(elements_rvalues) { } template -void assign_elements_from_to(Array1D&& arr, std::deque>& dest) { +void assign_elements_from_to(Array1D&& arr, std::deque>& dest) { // NOLINT(google-runtime-references) dest is mutated std::copy(std::forward(arr).begin(), std::forward(arr).end(), std::back_inserter(dest)); } BOOST_AUTO_TEST_CASE(elements_rvalues_nomove) { using movable_type = std::vector; - movable_type movable_value(5., 99.); + movable_type const movable_value(5., 99.0); // NOLINT(fuchsia-default-arguments-calls) multi::array arr = {movable_value, movable_value, movable_value}; BOOST_REQUIRE( arr.size() == 3 ); @@ -234,24 +252,20 @@ BOOST_AUTO_TEST_CASE(elements_rvalues_nomove) { assign_elements_from_to(std::move(arr), q2); - BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing purposes + // BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing purposes BOOST_REQUIRE( q1 == q2 ); } BOOST_AUTO_TEST_CASE(elements_rvalues_assignment) { - std::vector vec = {1., 2., 3.}; - std::move(vec) = std::vector{3., 4., 5.}; - std::move(vec)[1] = 99.; // it compiles // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing purposes -// std::move(v[1]) = 99.; // does not compile + std::vector vec = {1.0, 2.0, 3.0}; // NOLINT(fuchsia-default-arguments-calls) + + std::move(vec) = std::vector{3.0, 4.0, 5.0}; // NOLINT(fuchsia-default-arguments-calls) -// double a = 5.; -// std::move(a) = 9.; // does not compile -// BOOST_REQUIRE( a == 9. ); + std::move(vec)[1] = 99.0; // it compiles // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing purposes + + multi::array arr1 = {1.0, 2.0, 3.0}; + multi::array const arr2 = {1.0, 2.0, 3.0}; - multi::array arr1 = {1., 2., 3.}; - multi::array arr2 = {1., 2., 3.}; std::move(arr1) = arr2; // this compiles TODO(correaa) should it? } - -#endif diff --git a/external_codes/boost_multi/multi/test/element_transformed.cpp b/external_codes/boost_multi/multi/test/element_transformed.cpp index e4fc7aa3e1..612dafb518 100644 --- a/external_codes/boost_multi/multi/test/element_transformed.cpp +++ b/external_codes/boost_multi/multi/test/element_transformed.cpp @@ -1,21 +1,47 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi element transformed" -#include - -#include "multi/array.hpp" - -#include -#include +// Copyright 2022-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +# pragma clang diagnostic ignored "-Wignored-qualifiers" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; -using complex = std::complex; -constexpr complex I{0, 1}; // NOLINT(readability-identifier-length) I imaginary unit - BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_function_reference) { - multi::array arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit + + multi::array arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; constexpr auto conj = static_cast(std::conj); @@ -23,185 +49,223 @@ BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_function_reference) { BOOST_REQUIRE( conjd_arr[0] == conj(arr[0]) ); BOOST_REQUIRE( conjd_arr[1] == conj(arr[1]) ); -// Ac[0] = 5. + 4.*I; // this doesn't compile, good! + // Ac[0] = 5. + 4.*I; // this doesn't compile, good! BOOST_REQUIRE( conjd_arr[0] == 1. - 2.*I ); - BOOST_REQUIRE( real(std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.})) == std::norm(arr[0]) + std::norm(arr[1]) ); - BOOST_REQUIRE( imag(std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.})) == 0. ); + BOOST_TEST_REQUIRE( real(std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.0, 0.0})) == std::norm(arr[0]) + std::norm(arr[1]) ); + BOOST_REQUIRE( imag(std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.0, 0.0})) == 0. ); - BOOST_REQUIRE( std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.}) == std::norm(arr[0]) + std::norm(arr[1]) ); + BOOST_TEST_REQUIRE( std::inner_product(arr.begin(), arr.end(), conjd_arr.begin(), complex{0.0, 0.0}) == std::norm(arr[0]) + std::norm(arr[1]) ); } BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_lambda) { - multi::array arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit - auto const& conjd_arr = arr.element_transformed([](auto const& cee) {return std::conj(cee);}); + multi::array arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; + + // g++ -std=20 needs the transformation (lambda) to be noexcept + auto const& conjd_arr = arr.element_transformed([](auto const& cee) noexcept { return std::conj(cee); }); BOOST_REQUIRE( conjd_arr[0] == std::conj(arr[0]) ); BOOST_REQUIRE( conjd_arr[1] == std::conj(arr[1]) ); -// Ac[0] = 5. + 4.*I; // this doesn't compile, good! - BOOST_REQUIRE( conjd_arr[0] == 1. - 2.*I ); + // Ac[0] = 5. + 4.*I; // this doesn't compile, good! + BOOST_REQUIRE( conjd_arr[0] == 1.0 - 2.0*I ); } BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_lambda_with_const_return) { - multi::array arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit + + multi::array arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; - auto const& conjd_arr = arr.element_transformed([](auto const& cee) -> auto const {return std::conj(cee);}); // NOLINT(readability-const-return-type) to disable assignment + // g++ -std=20 needs the transformation (lambda) to be noexcept + // NOLINTNEXTLINE(readability-const-return-type) a way to disable assignment + auto&& conjd_arr = arr.element_transformed([](auto const& cee) noexcept -> auto const { return std::conj(cee); }); // `const` allows this idiom. it needs -Wno-nonportable-cfstrings and -Wignored-qualifiers in clang BOOST_REQUIRE( conjd_arr[0] == std::conj(arr[0]) ); BOOST_REQUIRE( conjd_arr[1] == std::conj(arr[1]) ); -// Ac[0] = 5. + 4.*I; // this doesn't compile, good! - BOOST_REQUIRE( conjd_arr[0] == 1. - 2.*I ); + // conjd_arr[0] = 5.0 + 4.0*I; // this doesn't compile, good! otherwise it would be misleading (see above) + BOOST_REQUIRE( conjd_arr[0] == 1.0 - 2.0*I ); } -template struct Conjd; +template struct Conjd; // NOLINT(readability-identifier-naming) for testing -constexpr struct Conj_t { // NOLINT(readability-identifier-naming) for testing - template constexpr auto operator()(ComplexRef&& zee) const {return Conjd{zee};} - template constexpr auto operator()(Conjd const&) const = delete; - template constexpr auto operator()(Conjd &&) const = delete; - template constexpr auto operator()(Conjd &) const = delete; -} Conj; +struct Conj_t { // NOLINT(readability-identifier-naming) for testing + template constexpr auto operator()(ComplexRef&& zee) const { return Conjd{std::forward(zee)}; } + template constexpr auto operator()(Conjd const&) const = delete; + template constexpr auto operator()(Conjd&&) const = delete; + template constexpr auto operator()(Conjd&) const = delete; +}; +inline constexpr Conj_t Conj; template struct Conjd { // NOLINT(readability-identifier-naming) for testing - using decay_type = decltype( + std::declval() ); + using decay_type = decltype(+std::declval()); - constexpr operator decay_type() const {return std::conj(c_);} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) + // explicit constexpr operator decay_type() const { return std::conj(c_); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) - friend constexpr auto operator==(decay_type const& other, Conjd const& self) -> bool {return std::conj(self.c_) == other;} - friend constexpr auto operator!=(decay_type const& other, Conjd const& self) -> bool {return std::conj(self.c_) != other;} + friend constexpr auto operator==(decay_type const& other, Conjd const& self) -> bool { return std::conj(self.c_) == other; } + friend constexpr auto operator!=(decay_type const& other, Conjd const& self) -> bool { return std::conj(self.c_) != other; } - friend constexpr auto operator==(Conjd const& self, decay_type const& other) -> bool {return other == std::conj(self.c_);} - friend constexpr auto operator!=(Conjd const& self, decay_type const& other) -> bool {return other != std::conj(self.c_);} + friend constexpr auto operator==(Conjd const& self, decay_type const& other) -> bool { return other == std::conj(self.c_); } + friend constexpr auto operator!=(Conjd const& self, decay_type const& other) -> bool { return other != std::conj(self.c_); } - friend constexpr auto operator==(Conjd const& self, Conjd const& other) -> bool {return other.c_ == self.c_;} - friend constexpr auto operator!=(Conjd const& self, Conjd const& other) -> bool {return other.c_ != self.c_;} + friend constexpr auto operator==(Conjd const& self, Conjd const& other) -> bool { return other.c_ == self.c_; } + friend constexpr auto operator!=(Conjd const& self, Conjd const& other) -> bool { return other.c_ != self.c_; } - constexpr auto operator=(decay_type const& other) && -> Conjd& {c_ = std::conj(other); return *this;} + constexpr auto operator=(decay_type const& other) && -> Conjd& { + c_ = std::conj(other); + return *this; + } private: - constexpr explicit Conjd(ComplexRef cee) : c_{cee} {} - ComplexRef c_; + constexpr explicit Conjd(ComplexRef& cee) : c_{cee} {} + ComplexRef& c_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) can be a reference friend decltype(Conj); }; BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_proxy) { - multi::array const arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit + + multi::array const arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; auto const& conj_arr = arr.element_transformed(Conj); BOOST_REQUIRE( std::conj(arr[0]) == conj_arr[0] ); BOOST_REQUIRE( std::conj(arr[1]) == conj_arr[1] ); -// Ac[0] = 5. + 4.*I; // not allowed, compile error, Ac is const - BOOST_REQUIRE( conj_arr[0] == 1. - 2.*I ); + // Ac[0] = 5. + 4.*I; // not allowed, compile error, Ac is const + BOOST_REQUIRE( conj_arr[0] == 1.0 - 2.0*I ); } BOOST_AUTO_TEST_CASE(element_transformed_1D_conj_using_mutable_proxy) { - multi::array arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit + + multi::array arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; auto&& conj_arr = arr.element_transformed(Conj); // NOLINT(readability-const-return-type) to disable assignment BOOST_REQUIRE( std::conj(arr[0]) == conj_arr[0] ); BOOST_REQUIRE( std::conj(arr[1]) == conj_arr[1] ); - conj_arr[0] = 5. + 4.*I; - BOOST_REQUIRE( conj_arr[0] == 5. + 4.*I ); - BOOST_REQUIRE( arr[0] == 5. - 4.*I ); + conj_arr[0] = 5.0 + 4.0 * I; + BOOST_REQUIRE( conj_arr[0] == 5.0 + 4.0*I ); + BOOST_REQUIRE( arr[0] == 5.0 - 4.0*I ); } BOOST_AUTO_TEST_CASE(transform_ptr_single_value) { - complex cee = 1. + 2.*I; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit + + complex cee = 1.0 + 2.0 * I; - constexpr auto conj_ro = [](auto const& zee) {return std::conj(zee);}; // NOLINT(readability-const-return-type,clang-diagnostic-ignored-qualifiers) to prevent assignment + // NOLINTNEXTLINE(readability-const-return-type,clang-diagnostic-ignored-qualifiers) to prevent assignment + constexpr auto conj_ro = [](auto const& zee) noexcept { return std::conj(zee); }; // g++ -std=20 needs the transformation (lambda) to be noexcept - multi::transform_ptr conjd_ceeP{&cee, conj_ro}; - BOOST_REQUIRE( *conjd_ceeP == std::conj(1. + 2.*I) ); + multi::transform_ptr const conjd_ceeP{&cee, conj_ro}; + BOOST_REQUIRE( *conjd_ceeP == std::conj(1.0 + 2.0*I) ); } BOOST_AUTO_TEST_CASE(transform_ptr_1D_array) { - multi::array arr = { 1. + 2.*I, 3. + 4.*I}; + using complex = std::complex; + auto const I = complex{0.0, 1.0}; // NOLINT(readability-identifier-length) I imaginary unit - constexpr auto conj_ro = [](auto const& zee) {return std::conj(zee);}; // NOLINT(readability-const-return-type,clang-diagnostic-ignored-qualifiers) to prevent assignment + multi::array arr = {1.0 + 2.0 * I, 3.0 + 4.0 * I}; + + // NOLINT(readability-const-return-type,clang-diagnostic-ignored-qualifiers) to prevent assignment + constexpr auto conj_ro = [](auto const& zee) noexcept { return std::conj(zee); }; // g++ -std=20 needs the transformation (lambda) to be noexcept auto const& conjd_arr = arr.element_transformed(conj_ro); BOOST_REQUIRE( conjd_arr[0] == conj_ro(arr[0]) ); BOOST_REQUIRE( conjd_arr[1] == conj_ro(arr[1]) ); -// Ac[0] = 5. + 4.i; // doesn't compile thanks to the `auto const` in the `conj` def + // Ac[0] = 5. + 4.i; // doesn't compile thanks to the `auto const` in the `conj` def } BOOST_AUTO_TEST_CASE(arthur_odwyer_array_transform_int) { struct S { // NOLINT(readability-identifier-naming) - int a; - int b; + int a; + int b; }; multi::array arr({2}, S{}); - auto&& ref = arr.element_transformed(&S::a); - ref[0] = 99.; + auto&& ref = arr.element_transformed(&S::a); + ref[0] = 99.0; - BOOST_REQUIRE( arr[0].a == 99. ); + BOOST_REQUIRE( arr[0].a == 99.0 ); auto const& cref = arr.element_transformed(&S::a); - BOOST_REQUIRE( cref[0] == 99. ); -// cr[0] = 99.; // compile error "assignment of read-only location" + BOOST_REQUIRE( cref[0] == 99.0 ); + // cr[0] = 99.; // compile error "assignment of read-only location" } BOOST_AUTO_TEST_CASE(arthur_odwyer_array_transform_int_array) { struct S { // NOLINT(readability-identifier-naming) - int a[10]; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) testing - int b; + int a[10]; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) testing + int b; }; multi::array vec({2}, S{}); + auto&& ref = vec.element_transformed(&S::a); - ref[0][1] = 99.; - BOOST_REQUIRE( ref[0][1] == 99. ); - BOOST_REQUIRE( vec[0].a[1] == 99. ); + ref[0][1] = 99.0; + + BOOST_REQUIRE( ref[0][1] == 99.0 ); + BOOST_REQUIRE( vec[0].a[1] == 99.0 ); auto const& cref = vec.element_transformed(&S::a); - BOOST_REQUIRE( cref[0][1] == 99. ); -// cref[0][1] = 99.; // compile error "assignment of read-only location" + BOOST_REQUIRE( cref[0][1] == 99.0 ); + // cref[0][1] = 99.0; // compile error "assignment of read-only location" } BOOST_AUTO_TEST_CASE(indirect_transformed) { - std::vector vec = {0., 1.1, 2.2, 3.3, 4.4, 5.5}; + std::vector vec = {0.0, 1.1, 2.2, 3.3, 4.4, 5.5}; // std::vector NOLINT(fuchsia-default-arguments-calls) using index_t = std::vector::size_type; multi::array const arr = {4, 3, 2, 1, 0}; - auto&& indirect_v = arr.element_transformed([&vec](index_t idx) noexcept -> double& {return vec[idx];}); + auto&& indirect_v = arr.element_transformed([&vec](index_t idx) noexcept -> double& { return vec[idx]; }); BOOST_REQUIRE( indirect_v[1] == vec[3] ); BOOST_REQUIRE( &indirect_v[1] == &vec[3] ); - indirect_v[1] = 99.; - BOOST_REQUIRE( vec[3] == 99. ); + indirect_v[1] = 99.0; + BOOST_REQUIRE( vec[3] == 99.0 ); + + // for(auto&& elem : indirect_v) {elem = 88.;} + // std::fill(indirect_v.begin(), indirect_v.end(), 88.0); - for(auto&& elem : indirect_v) {elem = 88.;} - BOOST_REQUIRE( vec[3] == 88. ); +#if !defined(_MSC_VER) + indirect_v.fill(88.0); + BOOST_REQUIRE( vec[3] == 88.0 ); - auto const& const_indirect_v = indirect_v; (void)const_indirect_v; -// const_indirect_v[1] = 999.; // does not compile, good! - BOOST_REQUIRE( const_indirect_v[3] == 88. ); + auto const& const_indirect_v = indirect_v; + (void)const_indirect_v; + // const_indirect_v[1] = 999.; // does not compile, good! + BOOST_REQUIRE(const_indirect_v[3] == 88.0); +#endif } BOOST_AUTO_TEST_CASE(indirect_transformed_carray) { - double carr[5][3] = { // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) testing legacy types + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) testing legacy types + double carr[5][3] = { { 0.0, 1.0, 2.0}, {10.0, 11.0, 12.0}, {20.0, 21.0, 22.0}, {30.0, 31.0, 32.0}, - {40.0, 41.0, 42.0} + {40.0, 41.0, 42.0}, }; using index_t = std::vector::size_type; + multi::array const arr = {4, 3, 2, 1, 0}; - auto&& indirect_v = arr.element_transformed([&carr](index_t idx) noexcept -> double(&)[3] {return carr[idx];}); // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + auto&& indirect_v = arr.element_transformed([&carr](index_t idx) noexcept -> double(&)[3] { return carr[idx]; }); BOOST_REQUIRE( &indirect_v[1][2] == &carr[3][2] ); BOOST_REQUIRE( indirect_v[1][2] == 32.0 ); @@ -211,6 +275,6 @@ BOOST_AUTO_TEST_CASE(indirect_transformed_carray) { auto const& const_indirect_v = indirect_v; - BOOST_TEST( const_indirect_v[1][2] == 11111.0 ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) testing legacy type -// const_indirect_v[1][2] = 999.; // doesn't compile, good! + BOOST_REQUIRE( const_indirect_v[1][2] == 11111.0 ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) testing legacy type + // const_indirect_v[1][2] = 999.; // doesn't compile, good! } diff --git a/external_codes/boost_multi/multi/test/execution_policy.disable_cpp b/external_codes/boost_multi/multi/test/execution_policy.disable_cpp new file mode 100644 index 0000000000..52972f0d8b --- /dev/null +++ b/external_codes/boost_multi/multi/test/execution_policy.disable_cpp @@ -0,0 +1,323 @@ +// Copyright 2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // NOLINT(build/c++11) +#include +#include +#include +#include // NOLINT(build/c++11) + +#if defined(TBB_FOUND) || (defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) && (__GLIBCXX__ >= 20190502)) +#if !defined(__NVCC__) && !(defined(__clang__) && defined(__CUDA__)) +#if !defined(PSTL_USE_PARALLEL_POLICIES) || !(PSTL_USE_PARALLEL_POLICIES == 0) +#include +#endif +#endif +#endif + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +namespace multi = boost::multi; + +BOOST_AUTO_TEST_CASE(dummy_test) { + multi::static_array const arr(multi::extensions_t<1>{multi::iextension{10}}, 1.0); + BOOST_REQUIRE( arr[0] == 1.0 ); +} + +// BOOST_AUTO_TEST_CASE(multi_par_construct_1d) { +// multi::static_array const arr(multi::extensions_t<1>{multi::iextension{10}}, 1.0); +// // multi::static_array arr(multi::array::extensions_type{10}, 1.0); +// BOOST_REQUIRE( size(arr) == 10 ); +// BOOST_REQUIRE( arr[1] == 1.0 ); + +// #if defined(TBB_FOUND) || (defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) && (__GLIBCXX__ >= 20190502)) +// #if !defined(__NVCC__) && !(defined(__clang__) && defined(__CUDA__)) +// #if !defined(PSTL_USE_PARALLEL_POLICIES) || !(PSTL_USE_PARALLEL_POLICIES == 0) +// multi::static_array const arr2(std::execution::par, arr); + +// BOOST_REQUIRE( arr2 == arr ); +// #endif +// #endif +// #endif +// } + +// BOOST_AUTO_TEST_CASE(copy_par_1d) { +// multi::array const arr(1000000, 1.0); +// BOOST_REQUIRE( size(arr) == 1000000 ); +// BOOST_REQUIRE( arr[1] == 1.0 ); + +// #if defined(TBB_FOUND) || (defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) && (__GLIBCXX__ >= 20190502)) +// #if !defined(__NVCC__) && !(defined(__clang__) && defined(__CUDA__)) +// #if !defined(PSTL_USE_PARALLEL_POLICIES) || !(PSTL_USE_PARALLEL_POLICIES == 0) +// #if defined(__cpp_lib_execution) && (__cpp_lib_execution >= 201603L) +// multi::array arr2(arr.extensions()); + +// std::copy(std::execution::par, arr.begin(), arr.end(), arr2.begin()); + +// BOOST_REQUIRE( arr2 == arr ); +// #endif +// #endif +// #endif +// #endif +// } + +// class watch // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) // NOSONAR +// : private std::chrono::high_resolution_clock { +// std::string label_; +// time_point start_ = now(); + +// public: +// explicit watch(std::string label) : label_{std::move(label)} {} + +// ~watch() { +// std::cerr << label_ << ": " << std::chrono::duration(now() - start_).count() << " sec" << std::endl; +// } +// }; + +class slow_assign { + double val_; + + public: + constexpr explicit slow_assign(double const& vv) noexcept : val_{vv} {} + ~slow_assign() = default; + + slow_assign(slow_assign&& other) noexcept = default; + + slow_assign(slow_assign const& other) : val_{other.val_} { + using namespace std::chrono_literals; // NOLINT(build/namespaces) + std::this_thread::sleep_for(10ms); + } + auto operator=(slow_assign const& other) -> slow_assign& { + if(this == &other) { + return *this; + } + val_ = other.val_; + using namespace std::chrono_literals; // NOLINT(build/namespaces) + std::this_thread::sleep_for(10ms); + return *this; + } + auto operator=(slow_assign&& other) noexcept -> slow_assign& = default; + + auto operator==(slow_assign const& other) const noexcept { return val_ == other.val_; } + auto operator!=(slow_assign const& other) const noexcept { return val_ != other.val_; } +}; + +#if defined(TBB_FOUND) || (defined(__GNUC__) && !defined(__clang__) && !defined(__NVCOMPILER) && (__GLIBCXX__ >= 20190502)) +#if !defined(__NVCC__) && !(defined(__clang__) && defined(__CUDA__)) +#if !defined(PSTL_USE_PARALLEL_POLICIES) || !(PSTL_USE_PARALLEL_POLICIES == 0) +#if defined(__cpp_lib_execution) && (__cpp_lib_execution >= 201603L) + +// BOOST_AUTO_TEST_CASE(reduce_row_random) { +// std::random_device r; + +// std::seed_seq seed2{r(), r(), r(), r(), r(), r(), r(), r()}; +// std::mt19937 e2(seed2); // NOLINT(cpp:S2245) +// std::normal_distribution<> normal_dist{}; + +// multi::array arr({10000, 10000}); +// std::generate(arr.elements().begin(), arr.elements().end(), [&]() { return normal_dist(e2); }); + +// { +// multi::array vec(size(arr)); +// watch const _("reduce"); // NOLINT(fuchsia-default-arguments-calls) +// std::transform(arr.begin(), arr.end(), vec.begin(), [](auto const& row) {return std::reduce(row.begin(), row.end());} ); +// } + +// { +// multi::array vec(size(arr)); +// watch const _("par reduce"); // NOLINT(fuchsia-default-arguments-calls) +// std::transform(std::execution::par, arr.begin(), arr.end(), vec.begin(), [](auto const& row) {return std::reduce(row.begin(), row.end());} ); +// } + +// { +// multi::array vec(size(arr)); +// watch const _("par reduce"); // NOLINT(fuchsia-default-arguments-calls) +// std::transform(arr.begin(), arr.end(), vec.begin(), [](auto const& row) {return std::reduce(std::execution::par_unseq, row.begin(), row.end());} ); +// } +// } + +// BOOST_AUTO_TEST_CASE(sort_random) { +// std::random_device r; + +// std::seed_seq seed2{r(), r(), r(), r(), r(), r(), r(), r()}; +// std::mt19937 e2(seed2); // NOLINT(cpp:S2245) +// std::normal_distribution<> normal_dist{}; + +// multi::array arr({10000, 10000}, 0.0); +// std::generate(arr.elements().begin(), arr.elements().end(), [&]() { return normal_dist(e2); }); + +// auto arr_seq = arr; +// { +// watch const _("sort"); // NOLINT(fuchsia-default-arguments-calls) +// std::sort(arr_seq.begin(), arr_seq.end()); +// } + +// auto arr_par = arr; +// { +// watch const _("par sort"); // NOLINT(fuchsia-default-arguments-calls) +// std::sort(std::execution::par, arr_par.begin(), arr_par.end()); +// } + +// BOOST_REQUIRE( arr_seq == arr_par ); +// } + +// using T = slow_assign; +// auto const nelem = 8; + +// BOOST_AUTO_TEST_CASE(timing_copy_par_1d) { +// T const val{1.0}; +// T const val2{99.9}; + +// multi::array const arr(nelem, val); +// BOOST_REQUIRE( size(arr) == nelem ); +// BOOST_REQUIRE( arr[1] == val ); + +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("normal copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(arr.begin(), arr.end(), arr2.begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("par copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(std::execution::par, arr.begin(), arr.end(), arr2.begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// } + +// BOOST_AUTO_TEST_CASE(timing_copy_par_2d_warm) { +// T const val{1.0}; + +// multi::array const arr({8, nelem / 8}, val); +// BOOST_REQUIRE( arr.num_elements() == nelem ); +// BOOST_REQUIRE( arr[1][1] == val ); +// } + +// BOOST_AUTO_TEST_CASE(timing_copy_par_2d) { +// for(auto factor : {8, 4, 2}) { +// T const val{1.0}; +// T const val2{99.9}; + +// multi::array const arr({factor, nelem / factor}, val); +// BOOST_REQUIRE( arr.num_elements() == nelem ); +// BOOST_REQUIRE( arr[1][1] == val ); + +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("normal copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(arr.begin(), arr.end(), arr2.begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("par copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(std::execution::par, arr.begin(), arr.end(), arr2.begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// } +// } + +// BOOST_AUTO_TEST_CASE(timing_copy_par_2d_ultra_skinny) { +// T const val{1.0}; +// T const val2{99.9}; + +// multi::array const arr({2, nelem / 2}, val); +// BOOST_REQUIRE( arr.num_elements() == nelem ); +// BOOST_REQUIRE( arr[1][1] == val ); + +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("~copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy((~arr).begin(), (~arr).end(), (~arr2).begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("~par copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(std::execution::par, (~arr).begin(), (~arr).end(), (~arr2).begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("elements copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(arr.elements().begin(), arr.elements().end(), arr2.elements().begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// multi::array arr2(arr.extensions(), val2); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// { +// watch const _("par elements copy"); // NOLINT(fuchsia-default-arguments-calls) +// std::copy(std::execution::par, arr.elements().begin(), arr.elements().end(), arr2.elements().begin()); +// } +// BOOST_REQUIRE( arr2 == arr ); +// } +// { +// { +// watch const _("constructor"); // NOLINT(fuchsia-default-arguments-calls) +// multi::array arr2(arr); // same as ...= arr; +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// BOOST_REQUIRE( arr2 == arr ); +// arr2.clear(); +// } +// } +// { +// { +// watch const _("par constructor"); // NOLINT(fuchsia-default-arguments-calls) +// multi::array const arr2(std::execution::par, arr); +// BOOST_REQUIRE( arr2.num_elements() == arr.num_elements() ); +// BOOST_REQUIRE( arr2 == arr ); +// } +// } +// } +#endif +#endif +#endif +#endif diff --git a/external_codes/boost_multi/multi/test/fill.cpp b/external_codes/boost_multi/multi/test/fill.cpp index f91783dadf..b431923929 100644 --- a/external_codes/boost_multi/multi/test/fill.cpp +++ b/external_codes/boost_multi/multi/test/fill.cpp @@ -1,110 +1,148 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi fill" -#include - -#include "../include/multi/array.hpp" - -#include // for transform -#include -#include -#include // enable_if_t +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // for std::transform +#include +#include // for std::accumulate +#include +#include // for std::enable_if_t + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +namespace { + +using fnv1a_size = std::uint64_t; // from Howard Hinnart hash -static constexpr auto fnv1a(void const* key, std::size_t len, std::size_t hash) noexcept { // NOLINT(bugprone-easily-swappable-parameters) - auto const *first = static_cast(key); - unsigned char const* const last = first + len; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic): low level - for(; first < last; ++first) { // NOLINT(altera-id-dependent-backward-branch,cppcoreguidelines-pro-bounds-pointer-arithmetic): low level - hash = (hash ^ *first) * 1099511628211U; // prime - } - return hash; +auto fnv1a(unsigned char const* first, std::ptrdiff_t len, fnv1a_size hash) noexcept { // NOLINT(bugprone-easily-swappable-parameters) + return std::accumulate( + first, std::next(first, len), hash, + [prime = 1099511628211U](auto acc, auto elem) { return (acc ^ elem) * prime; } + ); } - -// static constexpr auto fnv1a(void const* key, std::size_t len) noexcept { -// return fnv1a(key, len, 14695981039346656037U); -// } +} // namespace class fnv1a_t { - std::size_t h = 14695981039346656037U; // offset + fnv1a_size h_ = 14695981039346656037U; // offset public: - using result_type = std::size_t; - static constexpr auto min() {return std::numeric_limits::min();} - static constexpr auto max() {return std::numeric_limits::max();} - void operator()(void const* key, std::size_t len) noexcept {h = fnv1a(key, len, h);} + using result_type = fnv1a_size; + static constexpr auto min() { return std::numeric_limits::min(); } + static constexpr auto max() { return std::numeric_limits::max(); } + void operator()(unsigned char const* key, std::ptrdiff_t len) noexcept { h_ = fnv1a(key, len, h_); } template, int> = 0> - auto operator()(T const& value) noexcept -> decltype(auto) {operator()(&value, sizeof(value)); return *this;} -// result_type operator()() && noexcept{return h;} - auto operator()() const& noexcept {return h;} -// explicit operator result_type() && noexcept {return h;} - explicit operator result_type() const& noexcept {return h;} + auto operator()(T const& value) noexcept -> decltype(auto) { + operator()(&value, sizeof(value)); + return *this; + } + // result_type operator()() && noexcept{return h;} + auto operator()() const& noexcept { return h_; } + // explicit operator result_type() && noexcept {return h;} + explicit operator result_type() const& noexcept { return h_; } }; -BOOST_AUTO_TEST_CASE(fill_1d) { +BOOST_AUTO_TEST_CASE(fill_1d_a) { namespace multi = boost::multi; - { - multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); - static_assert( std::is_same_v::value_type, multi::index>, "!"); - - using std::copy; - copy(begin(extension(d1D)), end(extension(d1D)), begin(d1D)); - BOOST_REQUIRE( d1D[0] == 0 ); - BOOST_REQUIRE( d1D[1] == 1 ); - BOOST_REQUIRE( d1D[9] == 9 ); - - d1D.assign(extension(d1D)); - BOOST_REQUIRE( d1D[0] == 0 ); - BOOST_REQUIRE( d1D[1] == 1 ); - BOOST_REQUIRE( d1D[9] == 9 ); - } - { - multi::array d1D(begin(multi::index_extension(10)), end(multi::index_extension(10))); - BOOST_REQUIRE( size(d1D) == 10 ); - BOOST_REQUIRE( d1D[0] == 0 ); - BOOST_REQUIRE( d1D[1] == 1 ); - BOOST_REQUIRE( d1D[9] == 9 ); - } - { - multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); - BOOST_REQUIRE( size(d1D) == 10 ); - - d1D.assign(begin(extension(d1D)), end(extension(d1D))); - BOOST_REQUIRE( d1D[0] == 0 ); - BOOST_REQUIRE( d1D[1] == 1 ); - BOOST_REQUIRE( d1D[9] == 9 ); - } - { - multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); - d1D.assign(extension(d1D)); - BOOST_REQUIRE( d1D[0] == 0 ); - BOOST_REQUIRE( d1D[1] == 1 ); - BOOST_REQUIRE( d1D[9] == 9 ); - } + + multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); + static_assert(std::is_same_v::value_type, multi::index>, "!"); + + using std::copy; + copy(begin(extension(d1D)), end(extension(d1D)), begin(d1D)); + BOOST_REQUIRE( d1D[0] == 0 ); + BOOST_REQUIRE( d1D[1] == 1 ); + BOOST_REQUIRE( d1D[9] == 9 ); + + d1D.assign(extension(d1D)); + BOOST_REQUIRE( d1D[0] == 0 ); + BOOST_REQUIRE( d1D[1] == 1 ); + BOOST_REQUIRE( d1D[9] == 9 ); +} + +BOOST_AUTO_TEST_CASE(fill_1d_b) { + namespace multi = boost::multi; + + multi::array d1D(begin(multi::index_extension(10)), end(multi::index_extension(10))); + BOOST_REQUIRE( size(d1D) == 10 ); + BOOST_REQUIRE( d1D[0] == 0 ); + BOOST_REQUIRE( d1D[1] == 1 ); + BOOST_REQUIRE( d1D[9] == 9 ); +} + +BOOST_AUTO_TEST_CASE(fill_1d_c) { + namespace multi = boost::multi; + + multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); + BOOST_REQUIRE( size(d1D) == 10 ); + + d1D.assign(begin(extension(d1D)), end(extension(d1D))); + BOOST_REQUIRE( d1D[0] == 0 ); + BOOST_REQUIRE( d1D[1] == 1 ); + BOOST_REQUIRE( d1D[9] == 9 ); +} + +BOOST_AUTO_TEST_CASE(fill_1d_d) { + namespace multi = boost::multi; + + multi::array d1D(multi::extensions_t<1>{multi::iextension{10}}); + d1D.assign(extension(d1D)); + BOOST_REQUIRE( d1D[0] == 0 ); + BOOST_REQUIRE( d1D[1] == 1 ); + BOOST_REQUIRE( d1D[9] == 9 ); } BOOST_AUTO_TEST_CASE(fill_member) { namespace multi = boost::multi; - multi::array d1D = {1., 2., 3., 4.}; - d1D.fill(42.); + + multi::array d1D = {1.0, 2.0, 3.0, 4.0}; + d1D.fill(42.0); multi::array d2D = { - {150., 16., 17., 18., 19.}, - { 5., 5., 5., 5., 5.}, - {100., 11., 12., 13., 14.}, - { 50., 6., 7., 8., 9.} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 5.0, 5.0, 5.0, 5.0, 5.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; BOOST_REQUIRE( d2D.elements().size() == d2D.num_elements() ); BOOST_REQUIRE( d2D.elements().base() == d2D.base() ); - BOOST_REQUIRE( d2D.elements()[3] == 18. ); + BOOST_REQUIRE( d2D.elements()[3] == 18.0 ); BOOST_REQUIRE( &*d2D.elements().begin() == d2D.data_elements() ); BOOST_REQUIRE( &*d2D.elements().end() == d2D.data_elements() + d2D.num_elements() ); -// std::fill( d2D.elements().begin(), d2D.elements().end() , 99. ); -// multi::adl_fill_n( d2D.elements().begin(), d2D.elements().size(), 99. ); - d2D.elements().fill(99.); - BOOST_REQUIRE( d2D[1][1] == 99. ); + // std::fill( d2D.elements().begin(), d2D.elements().end() , 99. ); + // multi::adl_fill_n( d2D.elements().begin(), d2D.elements().size(), 99. ); + d2D.elements().fill(99.0); + + BOOST_REQUIRE( d2D[1][1] == 99.0 ); } BOOST_AUTO_TEST_CASE(fill) { @@ -113,86 +151,98 @@ BOOST_AUTO_TEST_CASE(fill) { namespace multi = boost::multi; multi::array d2D = { - {150., 16., 17., 18., 19.}, - { 5., 5., 5., 5., 5.}, - {100., 11., 12., 13., 14.}, - { 50., 6., 7., 8., 9.} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 5.0, 5.0, 5.0, 5.0, 5.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; using std::all_of; - BOOST_REQUIRE( all_of(begin(d2D[1]), end(d2D[1]), [](auto const& elem) {return elem == 5.;}) ); + BOOST_REQUIRE( all_of(begin(d2D[1]), end(d2D[1]), [](auto const& elem) { return elem == 5.0;}) ); using std::fill; - fill(d2D[1].begin(), d2D[1].end(), 8.); + fill(d2D[1].begin(), d2D[1].end(), 8.0); - BOOST_REQUIRE( all_of(begin(d2D[1]), end(d2D[1]), [](auto const& elem) {return elem == 8.;}) ); + BOOST_REQUIRE( all_of(begin(d2D[1]), end(d2D[1]), [](auto const& elem) { return elem == 8.0;}) ); - fill(begin(rotated(d2D)[1]), end(rotated(d2D)[1]), 8.); - BOOST_REQUIRE( all_of(begin(rotated(d2D)[1]), end(rotated(d2D)[1]), [](auto&& elem) {return elem == 8.;}) ); + fill(begin(rotated(d2D)[1]), end(rotated(d2D)[1]), 8.0); + BOOST_REQUIRE( all_of(begin(rotated(d2D)[1]), end(rotated(d2D)[1]), [](auto&& elem) { return elem == 8.0;}) ); - fill(begin((d2D.rotated())[1]), end((d2D.rotated())[1]), 8.); - BOOST_REQUIRE( all_of(begin((d2D.rotated())[1]), end((d2D.rotated())[1]), [](auto&& elem) {return elem == 8.;}) ); + fill(begin((d2D.rotated())[1]), end((d2D.rotated())[1]), 8.0); + BOOST_REQUIRE( all_of(begin((d2D.rotated())[1]), end((d2D.rotated())[1]), [](auto&& elem) { return elem == 8.0;}) ); + + auto rand = [gauss = std::normal_distribution<>{}, gen = std::mt19937_64(randdev())]() mutable { return gauss(gen); }; // NOSONAR - auto rand = [gauss = std::normal_distribution<>{}, gen = std::mt19937{randdev()}]() mutable {return gauss(gen);}; multi::array r2D({5, 5}); - std::for_each(begin(r2D), end(r2D), [&](auto&& elem) {std::generate(begin(elem), end(elem), rand);}); + std::for_each(begin(r2D), end(r2D), [&](decltype(r2D)::reference elem) { std::generate(begin(elem), end(elem), rand); }); } namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(fill_1D) { - multi::array arr = {1., 2., 3.}; + multi::array const arr = {1.0, 2.0, 3.0}; + multi::array arr2({10, 3}); - std::fill( begin(arr2), end(arr2), arr ); + std::fill(begin(arr2), end(arr2), arr); BOOST_REQUIRE( arr2[0] == arr ); BOOST_REQUIRE( arr2[1] == arr ); - // ... + BOOST_REQUIRE( arr2[9] == arr ); } -#define FWD(a) std::forward(a) - template auto broadcast(BinaryOp op, Column const& col, Array const& in, Out&& out) -> Out&& { // NOLINT(readability-identifier-length) clang-tidy 14 bug std::transform( begin(~in), end(~in), begin(~out), begin(~out), [acol = (~col)[0], &op](auto const& Acol, auto&& Bcol) { std::transform(begin(Acol), end(Acol), begin(acol), begin(Bcol), op); - return FWD(Bcol); + return std::forward(Bcol); } ); return std::forward(out); } -BOOST_AUTO_TEST_CASE (julia_broadcast, *boost::unit_test::tolerance(0.00001) ) { - multi::array col = { +BOOST_AUTO_TEST_CASE(julia_broadcast, *boost::unit_test::tolerance(0.00001)) { + multi::array const col = { {0.1}, - {0.2} + {0.2}, }; multi::array arr = { {1.10813, 1.72068, 1.15387}, - {1.36851, 1.66401, 1.47846} + {1.36851, 1.66401, 1.47846}, }; - { // "broadcast" - multi::array arr2(extensions(arr)); - broadcast(std::plus<>{}, col, arr, arr2); - BOOST_TEST( arr2[0][0] == 1.20813 ); BOOST_TEST( arr2[0][1] == 1.82068 ); BOOST_TEST( arr2[0][2] == 1.25387 ); - BOOST_TEST( arr2[1][0] == 1.56851 ); BOOST_TEST( arr2[1][1] == 1.86401 ); BOOST_TEST( arr2[1][2] == 1.67846 ); - } - { // inefficient: replicate the vector before summing elementwise - multi::array ax3({2, 3}); - - std::fill( begin(~ax3), end(~ax3), (~col)[0] ); - BOOST_TEST( ax3[0][0] == 0.1 ); BOOST_TEST( ax3[0][1] == 0.1 ); BOOST_TEST( ax3[0][2] == 0.1 ); - BOOST_TEST( ax3[1][0] == 0.2 ); BOOST_TEST( ax3[1][1] == 0.2 ); BOOST_TEST( ax3[1][2] == 0.2 ); - - multi::array Ap(extensions(arr)); - std::transform(begin(arr.elements()), end(arr.elements()), begin(ax3.elements()), begin(Ap.elements()), std::plus<>{}); - - BOOST_TEST( Ap[0][0] == 1.20813 ); BOOST_TEST( Ap[0][1] == 1.82068 ); BOOST_TEST( Ap[0][2] == 1.25387 ); - BOOST_TEST( Ap[1][0] == 1.56851 ); BOOST_TEST( Ap[1][1] == 1.86401 ); BOOST_TEST( Ap[1][2] == 1.67846 ); - } + // "broadcast" + multi::array arr2(extensions(arr)); + broadcast(std::plus<>{}, col, arr, arr2); + + BOOST_TEST( arr2[0][0] == 1.20813 ); + BOOST_TEST( arr2[0][1] == 1.82068 ); + BOOST_TEST( arr2[0][2] == 1.25387 ); + BOOST_TEST( arr2[1][0] == 1.56851 ); + BOOST_TEST( arr2[1][1] == 1.86401 ); + BOOST_TEST( arr2[1][2] == 1.67846 ); + + // inefficient: replicate the vector before summing elementwise + multi::array ax3({2, 3}); + + std::fill(begin(~ax3), end(~ax3), (~col)[0]); + BOOST_TEST( ax3[0][0] == 0.1 ); + BOOST_TEST( ax3[0][1] == 0.1 ); + BOOST_TEST( ax3[0][2] == 0.1 ); + BOOST_TEST( ax3[1][0] == 0.2 ); + BOOST_TEST( ax3[1][1] == 0.2 ); + BOOST_TEST( ax3[1][2] == 0.2 ); + + multi::array Ap(extensions(arr)); + std::transform(begin(arr.elements()), end(arr.elements()), begin(ax3.elements()), begin(Ap.elements()), std::plus<>{}); + + BOOST_TEST( Ap[0][0] == 1.20813 ); + BOOST_TEST( Ap[0][1] == 1.82068 ); + BOOST_TEST( Ap[0][2] == 1.25387 ); + BOOST_TEST( Ap[1][0] == 1.56851 ); + BOOST_TEST( Ap[1][1] == 1.86401 ); + BOOST_TEST( Ap[1][2] == 1.67846 ); } diff --git a/external_codes/boost_multi/multi/test/fix_complex.cpp b/external_codes/boost_multi/multi/test/fix_complex.cpp new file mode 100644 index 0000000000..46a2f00647 --- /dev/null +++ b/external_codes/boost_multi/multi/test/fix_complex.cpp @@ -0,0 +1,184 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +// #include + +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +namespace multi = boost::multi; + +#ifdef __NVCC__ +template<> +inline constexpr bool multi::force_element_trivial_default_construction> = true; +template<> +inline constexpr bool multi::force_element_trivial_default_construction> = true; +#else +// vvv nvcc (12.1?) doesn't tolerate this kind of customization: "error: expected initializer before ‘<’" +template +inline constexpr bool multi::force_element_trivial_default_construction> = std::is_trivially_default_constructible_v; +#endif + +BOOST_AUTO_TEST_CASE(pmr_double) { + multi::array, 2> Aarr({2, 2}, std::complex(4.0, 5.0)); + BOOST_REQUIRE(Aarr[0][0] == std::complex(4.0, 5.0) ); +} + +#ifdef BOOST_MULTI_HAS_MEMORY_RESOURCE +BOOST_AUTO_TEST_CASE(pmr_double_uninitialized) { + { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool(static_cast(std::data(buffer)), 12*sizeof(double)); + + multi::pmr::array Aarr({2, 2}, &pool); + + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + + #if defined(__GLIBCXX__) + BOOST_TEST( &Aarr[0][0] == buffer.data() ); + BOOST_TEST( Aarr[0][0] == 4.0); + #elif defined(_LIBCPP_VERSION) + BOOST_TEST( &Aarr[0][0] == buffer.data() + (buffer.size() - 4) ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + BOOST_TEST( Aarr[0][0] == 996.0); + #endif + } + { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool(static_cast(std::data(buffer)), 12*sizeof(double)); + + multi::pmr::array Aarr({2, 2}, double{}, &pool); + + #if defined(__GLIBCXX__) + BOOST_TEST( buffer[0] == 0.0 ); + BOOST_TEST( buffer[1] == 0.0 ); + BOOST_TEST( &Aarr[0][0] == buffer.data() ); + #elif defined(_LIBCPP_VERSION) + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + BOOST_TEST( buffer[buffer.size()-4] == 0.0 ); + BOOST_TEST( buffer[buffer.size()-3] == 0.0 ); + BOOST_TEST( buffer[buffer.size()-5] == 11.0 ); + BOOST_TEST( &Aarr[0][0] == buffer.data() + (buffer.size() - 4) ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) + #endif + + BOOST_TEST( Aarr[0][0] == 0.0); + } +} + +BOOST_AUTO_TEST_CASE(pmr_complex_initialized_2) { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool{static_cast(std::data(buffer)), 12*sizeof(double)}; + + multi::pmr::array, 2> Aarr({2, 2}, &pool); + +#if defined(__GLIBCXX__) + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + BOOST_REQUIRE(Aarr[0][0] == std::complex(4.0, 5.0) ); +#elif defined(_LIBCPP_VERSION) + BOOST_TEST( buffer[buffer.size() - 4] == 996.0 ); + BOOST_TEST( buffer[buffer.size() - 3] == 997.0 ); + BOOST_TEST(Aarr[0][0].real() == 8.0 ); + BOOST_TEST(Aarr[0][0].imag() == 9.0 ); +#endif + Aarr[0][0] = std::complex{40.0, 50.0}; + +#if defined(__GLIBCXX__) + BOOST_TEST( buffer[0] == 40.0 ); + BOOST_TEST( buffer[1] == 50.0 ); +#elif defined(_LIBCPP_VERSION) + BOOST_TEST( buffer[buffer.size() - 4] == 996.0 ); + BOOST_TEST( buffer[buffer.size() - 3] == 997.0 ); +#endif +} + +BOOST_AUTO_TEST_CASE(pmr_complex_initialized_4) { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 999.9, 999.9, 999.9, 999.9}}; + std::pmr::monotonic_buffer_resource pool{static_cast(std::data(buffer)), 12*sizeof(double)}; + + multi::pmr::array, 2> Aarr({2, 2}, &pool); + +#if defined(__GLIBCXX__) + BOOST_REQUIRE(Aarr[0][0] == std::complex(4.0, 5.0) ); +#elif defined(_LIBCPP_VERSION) + BOOST_REQUIRE(Aarr[0][0] == std::complex(8.0, 9.0) ); +#endif + + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + +#if defined(__GLIBCXX__) + BOOST_TEST( static_cast(buffer.data()) == static_cast(&Aarr[0][0]) ); +#elif defined(_LIBCPP_VERSION) + BOOST_TEST( static_cast(buffer.data() + 4) == static_cast(&Aarr[0][0]) ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) +#endif +} + +BOOST_AUTO_TEST_CASE(pmr_complex_initialized_3) { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool{static_cast(std::data(buffer)), 12*sizeof(double)}; + + multi::pmr::array, 2> const Aarr({2, 2}, std::complex{40.0, 50.0}, &pool); + + BOOST_TEST( Aarr[0][0] == (std::complex{40.0, 50.0}) ); + +#if defined(__GLIBCXX__) + BOOST_TEST( buffer[0] == 40.0 ); + BOOST_TEST( buffer[1] == 50.0 ); +#elif defined(_LIBCPP_VERSION) + BOOST_TEST( buffer[buffer.size() - 4] == 40.0 ); + BOOST_TEST( buffer[buffer.size() - 3] == 50.0 ); +#endif +} + +BOOST_AUTO_TEST_CASE(pmr_complex_initialized) { + std::array buffer = {{4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.00, 11.0, 996.0, 997.0, 998.0, 999.0}}; + std::pmr::monotonic_buffer_resource pool{static_cast(std::data(buffer)), 12*sizeof(double)}; + + multi::pmr::array, 2> Aarr({2, 2}, &pool); + + if constexpr(multi::force_element_trivial_default_construction>) { + BOOST_TEST( buffer[0] == 4.0 ); + BOOST_TEST( buffer[1] == 5.0 ); + + #if defined(__GLIBCXX__) + BOOST_REQUIRE(Aarr[0][0] == std::complex(4.0, 5.0) ); + #elif defined(_LIBCPP_VERSION) + BOOST_TEST(Aarr[0][0].real() == 8.0 ); + BOOST_TEST(Aarr[0][0].imag() == 9.0 ); + #endif + } else { + BOOST_TEST( buffer[0] == 0.0 ); + BOOST_TEST( buffer[1] == 0.0 ); + + BOOST_REQUIRE(Aarr[0][0] == 0.0); + } +} +#endif diff --git a/external_codes/boost_multi/multi/test/flatted.cpp b/external_codes/boost_multi/multi/test/flatted.cpp index 91e0b6221f..9438a3c50d 100644 --- a/external_codes/boost_multi/multi/test/flatted.cpp +++ b/external_codes/boost_multi/multi/test/flatted.cpp @@ -1,30 +1,66 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// © Alfredo Correa 2018-2021 +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi flattened operation" -#define BOOST_TEST_DYN_LINK -#include +#include -#include "multi/array.hpp" +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(array_flatted_3d) { - multi::array arr({13, 4, 5}); + multi::array arr({13, 4, 5}); - BOOST_REQUIRE( arr.rotated().is_flattable() ); + BOOST_REQUIRE( arr.size() == 13 ); + // BOOST_REQUIRE( arr.rotated().is_flattable() ); { - auto&& arrRFU = arr.rotated().flatted().unrotated(); + auto&& arrRFU = arr.rotated().flatted().unrotated(); // TODO(correaa) remove flatted? BOOST_REQUIRE( &arrRFU[11][7] == &arr[11][1][2] ); } - { + { auto&& arrRFU = (arr.rotated()).flatted().unrotated(); BOOST_REQUIRE( &arrRFU[11][7] == &arr[11][7/5][7%5] ); } } BOOST_AUTO_TEST_CASE(array_flatted_3d_bis) { - multi::array arr({13, 4, 5}); + multi::array const arr({13, 4, 5}); BOOST_REQUIRE( arr.size() == 13 ); + // BOOST_REQUIRE( arr.is_flattable() ); + BOOST_REQUIRE( arr.flatted().size() == 52 ); +} + +BOOST_AUTO_TEST_CASE(empty_array_3D_flatted) { + multi::array const arr; + // BOOST_REQUIRE( arr.is_flattable() ); + BOOST_REQUIRE( arr.flatted().size() == 0 ); +} + +BOOST_AUTO_TEST_CASE(empty_array_2D_flatted) { + multi::array const arr; + // BOOST_REQUIRE( arr.is_flattable() ); + BOOST_REQUIRE( arr.flatted().size() == 0 ); } diff --git a/external_codes/boost_multi/multi/test/index_range.cpp b/external_codes/boost_multi/multi/test/index_range.cpp index df5b07e08d..2ff1d6f7de 100644 --- a/external_codes/boost_multi/multi/test/index_range.cpp +++ b/external_codes/boost_multi/multi/test/index_range.cpp @@ -1,69 +1,89 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// © Alfredo A. Correa 2021-2022 +// Copyright 2021-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // for accumulate + +// Suppress warnings from boost +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi index range" -#include +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif -#include "multi/array_ref.hpp" +#include #include -// #include - -// #include -// #include - -#include // for accumulate - namespace multi = boost::multi; -#if 0 -BOOST_AUTO_TEST_CASE(xml_serialization_index_range) { - std::stringstream ss; - multi::range const rg{5, 10}; - { - boost::archive::xml_oarchive oa{ss}; - oa<< ::boost::serialization::make_nvp("rg", rg); - } - { - boost::archive::xml_iarchive ia{ss}; - multi::range rg2; - ia>> ::boost::serialization::make_nvp("rg2", rg2); - BOOST_REQUIRE( rg == rg2 ); - } -} -#endif - BOOST_AUTO_TEST_CASE(multi_range) { -#if defined(__cpp_deduction_guides) and __cpp_deduction_guides and not defined(__NVCC__) +#if defined(__cpp_deduction_guides) && __cpp_deduction_guides && ! defined(__NVCC__) BOOST_REQUIRE(( multi::range{5, 5}.empty() )); #else BOOST_REQUIRE(( multi::range{5, 5}.empty() )); #endif -{ - auto drng = multi::range{5, 10}; - std::vector vec(drng.begin(), drng.end()); - BOOST_REQUIRE( vec[1] == 6 ); -} -{ - auto drng = multi::range{5, 10}; - auto fun = [](auto idx) {return idx + 1;}; - std::vector vec( - boost::make_transform_iterator(drng.begin(), fun), - boost::make_transform_iterator(drng.end() , fun) - ); - BOOST_REQUIRE( vec[1] == 7 ); + { + auto drng = multi::range{5, 10}; + std::vector vec(drng.begin(), drng.end()); // testing std::vector NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( vec[1] == 6 ); + } + { + auto drng = multi::range{5, 10}; + + auto fun = [](auto idx) { return idx + 1; }; + + std::vector vec( // testing std::vector NOLINT(fuchsia-default-arguments-calls) + boost::make_transform_iterator(drng.begin(), fun), + boost::make_transform_iterator(drng.end(), fun) + ); + BOOST_REQUIRE( vec[1] == 7 ); + } } + +BOOST_AUTO_TEST_CASE(crazy_range) { + // auto trng = multi::range( + // multi::detail::tuple{5, 3}, + // multi::detail::tuple{5, 9}, + // [](auto t , int d) {return std::get<1>(t) + d;} + // [](auto t1, auto t2) {return std::get<1>(t1) - std::get<1>(t2);} + // ); + + // BOOST_REQUIRE( trng[0] == (std::tuple{5, 3}) ); + // BOOST_REQUIRE( trng[1] == (std::tuple{5, 4}) ); + + // BOOST_REQUIRE( *trng.begin() == (std::tuple{5, 3}) ); + // BOOST_REQUIRE( *(trng.begin() + 1) == (std::tuple{5, 4}) ); } BOOST_AUTO_TEST_CASE(multi_range_in_constexpr) { - BOOST_REQUIRE( multi::extension_t{5} == 5 ); // this is not a constexpr in cuda 10 + // BOOST_REQUIRE( multi::extension_t{5} == 5 ); BOOST_REQUIRE(( multi::extension_t{5, 12}.contains(10) )); - multi::range irng{5, 12}; + multi::range const irng{5, 12}; BOOST_REQUIRE( irng.contains(6) ); - BOOST_REQUIRE( not irng.contains(12) ); + BOOST_REQUIRE( ! irng.contains(12) ); BOOST_REQUIRE( * irng.begin() == 5 ); BOOST_REQUIRE( *(irng.begin() + 1) == 6 ); @@ -74,20 +94,16 @@ BOOST_AUTO_TEST_CASE(multi_range_in_constexpr) { BOOST_REQUIRE( irng.front() == 5 ); BOOST_REQUIRE( irng.back () == 11 ); - std::vector vec = {5, 6, 7, 8, 9, 10, 11}; + std::vector vec = {5, 6, 7, 8, 9, 10, 11}; // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls) - assert( std::equal( irng.begin(), irng.end(), vec.begin(), vec.end() ) ); + BOOST_REQUIRE(std::equal(irng.begin(), irng.end(), vec.begin(), vec.end())); // testing std::vector of multi:array NOLINT(fuchsia-default-arguments-calls) - auto sum = 0; - for(auto elem : irng) { - sum += elem; - } + auto sum = std::accumulate(irng.begin(), irng.end(), 0); BOOST_REQUIRE( sum == 5 + 6 + 7 + 8 + 9 + 10 + 11 ); - } BOOST_AUTO_TEST_CASE(multi_range2) { - multi::index_extension iex(10); + multi::index_extension const iex(10); BOOST_REQUIRE( *begin(iex) == 0 ); BOOST_REQUIRE( size(iex) == 10 ); @@ -99,16 +115,32 @@ BOOST_AUTO_TEST_CASE(multi_range2) { BOOST_REQUIRE( xbeg[0] == iex[0] ); BOOST_REQUIRE( xbeg[1] == iex[1] ); - BOOST_REQUIRE( std::accumulate( begin(iex), end(iex), 0) == 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 ); + BOOST_REQUIRE( std::accumulate( begin(iex), end(iex), static_cast(0U)) == 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 ); -// #if(__cpp_structured_bindings >= 201606) { - multi::iextensions<3> ies({{0, 3}, {0, 4}, {0, 5}}); + multi::iextensions<3> const ies({ + {0, 3}, + {0, 4}, + {0, 5}, + }); + + auto const ies0 = std::get<0>(ies); + auto const ies1 = std::get<1>(ies); + auto const ies2 = std::get<2>(ies); + + BOOST_REQUIRE( ies0.size() == 3 ); + BOOST_REQUIRE( ies1.size() == 4 ); + BOOST_REQUIRE( ies2.size() == 5 ); + + BOOST_REQUIRE( std::get<0>(ies).size() == 3 ); BOOST_REQUIRE( std::get<1>(ies).size() == 4 ); + BOOST_REQUIRE( std::get<2>(ies).size() == 5 ); + + #ifndef _MSC_VER // doesn't work in MSVC 14.3 in c++17 mode auto [eyes, jays, kays] = ies; BOOST_REQUIRE( eyes.size() == 3 ); BOOST_REQUIRE( jays.size() == 4 ); BOOST_REQUIRE( kays.size() == 5 ); + #endif } -// #endif } diff --git a/external_codes/boost_multi/multi/test/initializer_list.cpp b/external_codes/boost_multi/multi/test/initializer_list.cpp index e0ceb46f10..75026c93a7 100644 --- a/external_codes/boost_multi/multi/test/initializer_list.cpp +++ b/external_codes/boost_multi/multi/test/initializer_list.cpp @@ -1,122 +1,158 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2021 Alfredo A. Correa +// Copyright 2019-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi initializer_list" -#include +#include -#include "multi/array.hpp" +#include +#include -#include +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_1d) { { - std::vector const vec = {1., 2., 3.}; + std::vector const vec = {1.0, 2.0, 3.0}; // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( vec[1] == 2. ); } { - multi::static_array arr = {1.2, 3.4, 5.6}; + multi::static_array arr = {12, 34, 56}; BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); + BOOST_REQUIRE( arr[2] == 56 ); } { - multi::static_array const arr = {1.2, 3.4, 5.6}; + multi::static_array const arr = {12, 34, 56}; BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); + BOOST_REQUIRE( arr[2] == 56 ); } { - auto il = {1.2, 3.4, 5.6}; + auto const il = {1.2, 3.4, 5.6}; + multi::static_array const arr(il); BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == il.begin()[2] ); + BOOST_REQUIRE( arr[2] == il.begin()[2] ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } { - auto il = {1.2, 3.4, 5.6}; + auto const il = {1.2, 3.4, 5.6}; + multi::static_array const arr(begin(il), end(il)); BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == il.begin()[2] ); + BOOST_REQUIRE( arr[2] == il.begin()[2] ); // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic) } { - multi::static_array const arr = {1.2, 3.4, 5.6}; - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); - BOOST_REQUIRE(( arr == multi::static_array{1.2, 3.4, 5.6} )); - BOOST_REQUIRE(( arr == decltype(arr){1.2, 3.4, 5.6} )); + multi::static_array const arr = {12, 34, 56}; + BOOST_TEST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( arr[2] == 56 ); + BOOST_TEST_REQUIRE(( arr == multi::static_array{12, 34, 56} )); + BOOST_TEST_REQUIRE(( arr == decltype(arr){12, 34, 56} )); } { - auto values = {1.2, 3.4, 5.6}; - multi::array const arr(values.begin(), values.end()); - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); + auto const values = {12, 34, 56}; + + multi::array const arr(values.begin(), values.end()); + BOOST_TEST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( arr[2] == 56 ); } { - multi::array const arr = {1.2, 3.4, 5.6}; - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); - BOOST_REQUIRE(( arr == multi::array{1.2, 3.4, 5.6} )); - BOOST_REQUIRE(( arr == decltype(arr){1.2, 3.4, 5.6} )); - BOOST_REQUIRE(( arr == decltype(arr)::decay_type({1.2, 3.4, 5.6}) )); + multi::array const arr = {12, 34, 56}; + + BOOST_TEST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( arr[2] == 56 ); + + BOOST_TEST_REQUIRE(( arr == multi::array{12, 34, 56} )); + BOOST_TEST_REQUIRE(( arr == decltype(arr){12, 34, 56} )); + BOOST_TEST_REQUIRE(( arr == decltype(arr)::decay_type({12, 34, 56}) )); } { - std::array const stdarr = {{1.1, 2.2, 3.3}}; + std::array const stdarr = { + {11, 22, 33}, + }; using multi::num_elements; - BOOST_REQUIRE( num_elements(stdarr) == 3 ); + BOOST_TEST_REQUIRE( num_elements(stdarr) == 3 ); - using std::begin; using std::end; + using std::begin; + using std::end; multi::static_array const arr(begin(stdarr), end(stdarr)); - BOOST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( size(arr) == 3 ); } } BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_1d_ctad) { - #if defined(__cpp_deduction_guides) and not defined(__NVCC__) and not defined(__circle_build__) // circle 170 crashes +#if defined(__cpp_deduction_guides) && !defined(__NVCC__) +#if !defined(__circle_build__) || (__circle_build__ > 200 ) // crashes circle 187-200 in docker { - multi::static_array const arr = {1.2, 3.4, 5.6}; - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); - BOOST_REQUIRE(( arr == multi::static_array{1.2, 3.4, 5.6} )); + multi::static_array const arr = {12, 34, 56}; + BOOST_TEST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( arr[2] == 56 ); + BOOST_TEST_REQUIRE(( arr == multi::static_array{12, 34, 56} )); } +#endif { - multi::array arr({1.2, 3.4, 5.6}); - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[2] == 5.6 ); - BOOST_REQUIRE(( arr == multi::array({1.2, 3.4, 5.6}) )); + multi::array arr({12, 34, 56}); + BOOST_TEST_REQUIRE( size(arr) == 3 ); + BOOST_TEST_REQUIRE( arr[2] == 56 ); + BOOST_TEST_REQUIRE(( arr == multi::array({12, 34, 56}) )); } - #endif +#endif } BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_array) { -//#if not defined (__GNUG__) -#if defined(__INTEL_COMPILER) or (defined(__clang__) and (__clang_major__ >= 10)) // doesn't work on gcc - { +#if defined(__INTEL_COMPILER) || (defined(__clang__) && (__clang_major__ >= 10)) // doesn't work on gcc + { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wc99-designator" -// double const a[] = { [8] = 8., 9., 10. }; - std::array const stdarr = {{ [8] = 8., 9., 10. }}; + // double const a[] = { [8] = 8.0, 9.0, 10.0 }; + std::array const stdarr = { + {[8] = 8.0, 9.0, 10.0}, + }; #pragma GCC diagnostic pop multi::array arr = stdarr; BOOST_REQUIRE( arr.size() == 11 ); - BOOST_REQUIRE( arr[9] == 9. ); + BOOST_REQUIRE( arr[9] == 9.0 ); } #endif } BOOST_AUTO_TEST_CASE(multi_initialize_from_carray_1d) { - { - multi::static_array const arr = {1.1, 2.2, 3.3}; + { + multi::static_array const arr = {11, 22, 33}; BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( arr[1] == 2.2 ); + BOOST_REQUIRE( arr[1] == 22 ); } { -#if defined(__cpp_deduction_guides) and not defined(__NVCC__) -// multi::array arr = {{1.1, 2.2, 3.3}}; -// static_assert( decltype(arr)::dimensionality == 1 , "!"); -// BOOST_REQUIRE( size(arr)==3 and arr[1] == 2.2 ); +#if defined(__cpp_deduction_guides) && ! defined(__NVCC__) +// multi::array arr = {{1.1, 2.2, 3.3}}; +// static_assert( decltype(arr)::dimensionality == 1 , "!"); +// BOOST_REQUIRE( size(arr)==3 && arr[1] == 2.2 ); #endif } { - std::array stdarr = {{1.1, 2.2, 3.3}}; + std::array stdarr = { + {1.1, 2.2, 3.3} + }; multi::array const arr(begin(stdarr), end(stdarr)); BOOST_REQUIRE(( arr == decltype(arr){1.1, 2.2, 3.3} )); } @@ -127,187 +163,247 @@ BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_2d) { multi::static_array const arr = { { 1.2, 2.4, 3.6, 8.9}, {11.2, 34.4, 5.6, 1.1}, - {15.2, 32.4, 5.6, 3.4} + {15.2, 32.4, 5.6, 3.4}, }; BOOST_REQUIRE( size(arr) == 3 ); BOOST_REQUIRE( size(arr[0]) == 4 ); BOOST_REQUIRE(( arr == decltype(arr){ { 1.2, 2.4, 3.6, 8.9}, {11.2, 34.4, 5.6, 1.1}, - {15.2, 32.4, 5.6, 3.4} + {15.2, 32.4, 5.6, 3.4}, })); } { - multi::array const arr = { - { 1.2, 2.4, 3.6}, - {11.2, 34.4, 5.6}, - {15.2, 32.4, 5.6} + multi::array const arr = { + { 12, 24, 36}, + {112, 344, 56}, + {152, 324, 56}, }; - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( size(arr) == 3 and size(arr[0]) == 3 ); - BOOST_REQUIRE( arr[1][1] == 34.4 ); + BOOST_TEST( size(arr) == 3 ); + BOOST_TEST( size(arr[0]) == 3 ); + BOOST_TEST( arr[1][1] == 344 ); } { - multi::array arr = { - { 1.2, 2.4, 3.6}, - {11.2, 34.4, 5.6}, - {15.2, 32.4, 5.6} + multi::array arr = { + { 12, 24, 36}, + {112, 344, 56}, + {152, 324, 56}, }; - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( size(arr) == 3 and size(arr[0]) == 3 ); - BOOST_REQUIRE( arr[1][1] == 34.4 ); + + BOOST_TEST( size(arr) == 3 ); + BOOST_TEST( size(arr) == 3 ); + BOOST_TEST( size(arr[0]) == 3 ); + BOOST_TEST( arr[1][1] == 344 ); + arr = { - { 00., 01., 02.}, - { 10., 11., 12.}, - { 20., 21., 22.} + {100, 10, 20}, + {100, 110, 120}, + {200, 210, 220}, }; - BOOST_REQUIRE( arr[1][2] == 12. ); + BOOST_TEST( arr[1][2] == 120 ); } { - multi::array vec; - vec = {4.0, 5.5}; - BOOST_REQUIRE( size(vec) == 2 ); - BOOST_REQUIRE( vec[1] == 5.5 ); + multi::array vec; + vec = {40, 55}; + BOOST_TEST( size(vec) == 2 ); + BOOST_TEST( vec[1] == 55 ); } { - std::array, 3> const nested = {{ - {{ 1.2, 2.4}}, - {{11.2, 34.4}}, - {{15.2, 32.4}} - }}; - using std::begin; using std::end; + std::array, 3> const nested = { + {{{12, 24}}, {{112, 344}}, {{152, 324}}} + }; + + using std::begin; + using std::end; + multi::static_array arr(begin(nested), end(nested)); - BOOST_REQUIRE( size(arr) == 3 ); - BOOST_REQUIRE( size(arr[0]) == 2 ); - BOOST_REQUIRE( arr[1][0] == 11.2 ); - } - { - std::array, 3> const nested = {{ - {{ 1.2, 2.4}}, - {{11.2, 34.4}}, - {{15.2, 32.4}} - }}; - multi::static_array const arr(std::begin(nested), std::end(nested)); - - BOOST_REQUIRE(( - arr == multi::array {{ - {{ 1.2, 2.4}}, - {{11.2, 34.4}}, - {{15.2, 32.4}} + BOOST_TEST( size(arr) == 3 ); + BOOST_TEST( size(arr[0]) == 2 ); + BOOST_TEST( arr[1][0] == 112 ); + } + { + std::array, 3> const nested = { + {{{12, 24}}, {{112, 344}}, {{152, 324}}} + }; + multi::static_array const arr(std::begin(nested), std::end(nested)); + + BOOST_TEST(( + arr == multi::array {{ + {{ 12, 24}}, + {{112, 344}}, + {{152, 324}} }} )); - BOOST_REQUIRE(not( arr != multi::array{ - { 1.2, 2.4}, - {11.2, 34.4}, - {15.2, 32.4} + + BOOST_TEST(!( arr != multi::array{ + { 12, 24}, + {112, 344}, + {152, 324}, } )); - BOOST_REQUIRE(( + BOOST_TEST(( arr == decltype(arr){ - { 1.2, 2.4}, - {11.2, 34.4}, - {15.2, 32.4} + { 12, 24}, + {112, 344}, + {152, 324}, } )); } { - std::array, 3> nested = {{ - {{1., 2.}}, - {{2., 4.}}, - {{3., 6.}} - }}; - multi::array arr(begin(nested), end(nested)); - BOOST_REQUIRE( num_elements(arr) == 6 and arr[2][1] == 6. ); + std::array, 3> nested = { + {{{10, 20}}, + {{20, 40}}, + {{30, 60}}}, + }; + multi::array arr(begin(nested), end(nested)); + BOOST_TEST( num_elements(arr) == 6 ); + BOOST_TEST( arr[2][1] == 60 ); } { - using complex = std::complex; complex const I{0., 1.}; // NOLINT(readability-identifier-length) imaginary unit + using complex = std::complex; + + complex const I{0.0, 1.0}; // NOLINT(readability-identifier-length) imaginary unit + multi::array arr = { - {2. + 1.*I, 1. + 3.*I, 1. + 7.*I}, - {3. + 4.*I, 4. + 2.*I, 0. + 0.*I} + {2.0 + 1.0 * I, 1.0 + 3.0 * I, 1.0 + 7.0 * I}, + {3.0 + 4.0 * I, 4.0 + 2.0 * I, 0.0 + 0.0 * I}, }; - BOOST_REQUIRE( arr[1][1] == 4. + 2.*I ); + BOOST_REQUIRE( arr[1][1] == 4.0 + 2.0*I ); } } BOOST_AUTO_TEST_CASE(multi_tests_static_array_initializer_list) { multi::static_array, 2> SA = { - {1. , 2.}, - {3. , 4.}, + {{1.0, 0.0}, {2.0, 0.0}}, + {{3.0, 0.0}, {4.0, 0.0}}, }; - BOOST_REQUIRE( SA[1][1] == 4. ); + BOOST_REQUIRE( SA[1][1] == 4.0 ); } BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_3d) { - multi::array const arr = { - { - { 1.2, 0.}, - { 2.4, 1.} - }, - { - {11.2, 3.}, - {34.4, 4.} - }, - { - {15.2, 99.}, - {32.4, 2.} - } + multi::array const arr = { + { {12, 100}, {24, 10}}, + { {112, 30}, {344, 40}}, + {{152, 990}, {324, 20}}, }; - BOOST_REQUIRE( arr[1][1][0] == 34.4 and arr[1][1][1] == 4. ); + BOOST_REQUIRE( arr[1][1][0] == 344 ); + BOOST_REQUIRE( arr[1][1][1] == 40 ); } BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_3d_string) { - { + { using std::string; + + // NOLINTBEGIN(fuchsia-default-arguments-calls) multi::array B3 = { - { {"000", "001", "002"}, - {"010", "011", "012"} }, - { {"100", "101", "102"}, - {"110", "111", "112"} } + {{"000", "001", "002"}, {"010", "011", "012"}}, + {{"100", "101", "102"}, {"110", "111", "112"}}, }; - BOOST_REQUIRE( num_elements(B3)==12 and B3[1][0][1] == "101" ); + // NOLINTEND(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( num_elements(B3)==12 && B3[1][0][1] == "101" ); + } +} + +#if defined(__cpp_deduction_guides) && ! defined(__NVCC__) +BOOST_AUTO_TEST_CASE(initializer_list_1d_static) { +#if !defined(__circle_build__) || (__circle_build__ > 200 ) // crashes circle 187-200 in docker + { + multi::static_array arr({1.0, 2.0, 3.0}); + static_assert(std::is_same_v); + BOOST_REQUIRE( size(arr) == 3 && num_elements(arr) == 3 ); + BOOST_REQUIRE( multi::rank{}==1 && num_elements(arr)==3 && arr[1] == 2.0 ); + static_assert(typename decltype(arr)::rank{} == 1); } +#endif } -BOOST_AUTO_TEST_CASE(multi_tests_initializer_list_3d_string_ctad) { - #if defined(__cpp_deduction_guides) and not defined(__NVCC__) and not defined(__circle_build__) // circle 170 crashes +BOOST_AUTO_TEST_CASE(initializer_list_1d) { { - multi::array arr({1., 2., 3.}); - static_assert( std::is_same{}, "!"); - BOOST_REQUIRE( size(arr) == 3 and num_elements(arr) == 3 ); - BOOST_REQUIRE( multi::rank{}==1 and num_elements(arr)==3 and arr[1]==2. ); - static_assert( typename decltype(arr)::rank {}==1 ); + multi::array arr({1.0, 2.0, 3.0}); + static_assert(std::is_same_v); + BOOST_REQUIRE( size(arr) == 3 && num_elements(arr) == 3 ); + BOOST_REQUIRE( multi::rank{}==1 && num_elements(arr)==3 && arr[1] == 2.0 ); + static_assert(typename decltype(arr)::rank{} == 1); } { - multi::array arr({1., 2.}); - static_assert( std::is_same{}, "!"); - BOOST_REQUIRE( size(arr) == 2 and num_elements(arr) == 2 ); - BOOST_REQUIRE( multi::rank{}==1 and num_elements(arr)==2 and arr[1]==2. ); BOOST_REQUIRE( multi::rank{}==1 ); + multi::array arr({1.0, 2.0}); + static_assert(std::is_same_v); + BOOST_REQUIRE( size(arr) == 2 && num_elements(arr) == 2 ); + BOOST_REQUIRE( multi::rank{}==1 && num_elements(arr) == 2 && arr[1] == 2.0 ); + BOOST_REQUIRE( multi::rank{} == 1 ); } { - multi::array arr({0, 2}); // multi::array arr = {0, 2}; not working with CTAD - static_assert( std::is_same_v, "!" ); - BOOST_REQUIRE( size(arr) == 2 and num_elements(arr) == 2 ); - BOOST_REQUIRE( multi::rank{}==1 and num_elements(arr)==2 and arr[1]==2. ); BOOST_REQUIRE( multi::rank{}==1 ); + multi::array arr({0, 2}); // multi::array arr = {0, 2}; not working with CTAD + static_assert(std::is_same_v); + BOOST_REQUIRE( size(arr) == 2 && num_elements(arr) == 2 ); + BOOST_REQUIRE( multi::rank{} == 1 && num_elements(arr) == 2 && arr[1] == 2.0 ); + BOOST_REQUIRE( multi::rank{} == 1 ); } { - multi::array arr({9.}); // multi::array arr = {9.}; not working with CTAD - static_assert( std::is_same{}, "!" ); - BOOST_REQUIRE( multi::rank{}==1 and num_elements(arr)==1 and arr[0]==9. ); BOOST_REQUIRE( multi::rank{}==1 ); + multi::array arr({9.0}); // multi::array arr = {9.0}; not working with CTAD + static_assert(std::is_same_v); + BOOST_REQUIRE( multi::rank{}==1 && num_elements(arr)==1 && arr[0]==9.0 ); + BOOST_REQUIRE( multi::rank{}==1 ); } { - multi::array arr({9}); // multi::array arr = {9}; not working with CTAD - static_assert( std::is_same{}, "!" ); - BOOST_REQUIRE( size(arr) == 1 and num_elements(arr) == 1 ); + multi::array arr({9}); // multi::array arr = {9}; not working with CTAD + static_assert(std::is_same_v); + BOOST_REQUIRE( size(arr) == 1 && num_elements(arr) == 1 ); BOOST_REQUIRE( multi::rank{} == 1 ); - BOOST_REQUIRE( num_elements(arr) == 1 and arr[0] == 9. ); + BOOST_REQUIRE( num_elements(arr) == 1 && arr[0] == 9.0 ); + } +} + +BOOST_AUTO_TEST_CASE(initializer_list_2d) { +#if !defined(__circle_build__) || (__circle_build__ > 200 ) // crashes circle 187-200 in docker + { + multi::static_array const arr({ + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + }); + BOOST_TEST_REQUIRE( multi::rank{} == 2 ); + BOOST_TEST_REQUIRE( num_elements(arr) == 6 ); } { - multi::array arr({ - {1., 2., 3.}, - {4., 5., 6.} + multi::array const arr({ + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, }); - BOOST_REQUIRE( multi::rank{} == 2 and num_elements(arr) == 6 ); + BOOST_TEST_REQUIRE( multi::rank{} == 2 ); + BOOST_TEST_REQUIRE( num_elements(arr) == 6 ); } - #endif +#endif +} +#endif + +BOOST_AUTO_TEST_CASE(partially_formed) { + multi::array arr1({10, 10}, double{}); + multi::array arr2({10, 10}, {}); + multi::array arr3({10, 10}, 0.0); + + BOOST_REQUIRE( arr1[0][0] == 0.0); + BOOST_REQUIRE( arr2[0][0] == 0.0); + BOOST_REQUIRE( arr3[0][0] == 0.0); +} + +BOOST_AUTO_TEST_CASE(partially_formed_int_1) { + multi::array arr1({10, 10}, static_cast(1U)); + multi::array arr2({10, 10}, {1}); + multi::array arr3({10, 10}, 1); + + BOOST_REQUIRE( arr1[0][0] == 1); + BOOST_REQUIRE( arr2[0][0] == 1); + BOOST_REQUIRE( arr3[0][0] == 1); +} + +BOOST_AUTO_TEST_CASE(partially_formed_int_0) { + multi::array arr1({10, 10}, int{}); + multi::array arr2({10, 10}, {}); + multi::array arr3({10, 10}, 0); + + BOOST_REQUIRE( arr1[0][0] == 0); + BOOST_REQUIRE( arr2[0][0] == 0); + BOOST_REQUIRE( arr3[0][0] == 0); } diff --git a/external_codes/boost_multi/multi/test/iterator.cpp b/external_codes/boost_multi/multi/test/iterator.cpp index 847ddffc53..146617571c 100644 --- a/external_codes/boost_multi/multi/test/iterator.cpp +++ b/external_codes/boost_multi/multi/test/iterator.cpp @@ -1,92 +1,138 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi iterators" -#include +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif -#include "multi/array.hpp" +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif -#include -#include +#include namespace multi = boost::multi; -template auto take(Array&& array) -> decltype(array[0]) {return array[0];} +template auto take(Array&& array) -> auto& {return std::forward(array)[0];} BOOST_AUTO_TEST_CASE(iterator_1d) { { - multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, 99.); + multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, 99.0); BOOST_REQUIRE( size(arr) == 100 ); BOOST_REQUIRE( begin(arr) < end(arr) ); BOOST_REQUIRE( end(arr) - begin(arr) == size(arr) ); - multi::array::const_iterator cbarr = cbegin(arr); + multi::array::const_iterator const cbarr = cbegin(arr); multi::array::iterator barr = begin(arr); + + [[maybe_unused]] multi::array::const_iterator const cbarr3{barr}; + + BOOST_REQUIRE( barr == cbarr ); + BOOST_REQUIRE( cbarr == barr ); + + barr += 1; + barr -= 1; BOOST_REQUIRE( cbarr == barr ); - multi::array::const_iterator cbarr2 = begin(arr); + multi::array::const_iterator const cbarr2 = begin(arr); BOOST_REQUIRE( cbarr2 == cbarr ); } { - multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, 99.); + multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, 99.0); BOOST_REQUIRE( size(arr) == 100 ); BOOST_REQUIRE( begin(arr) < end(arr) ); auto arr2 = arr.begin(); - multi::array::const_iterator cbb = arr2; + multi::array::const_iterator const cbb = arr2; BOOST_REQUIRE( cbb == arr2 ); BOOST_REQUIRE( arr2 == cbb ); } + { + multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, 99.0); + BOOST_REQUIRE( size(arr) == 100 ); + BOOST_REQUIRE( begin(arr) < end(arr) ); + + auto const arrend = arr.end(); + auto const arrlast = arrend - 1; + + BOOST_REQUIRE( arrlast + 1 == arrend ); + } } BOOST_AUTO_TEST_CASE(iterator_2d) { { - multi::array arr({120, 140}, 99.); + multi::array const arr({120, 140}, 99.0); + BOOST_REQUIRE( arr.size() == 120 ); - #if not defined(__circle_build__) // circle 170 crashes BOOST_REQUIRE( size(arr) == 120 ); - #endif - #if not defined(__circle_build__) // circle 170 crashes BOOST_REQUIRE( arr.cbegin() < arr.cend() ); - #endif - #if not defined(__circle_build__) // circle 170 crashes BOOST_REQUIRE( arr.cend() - arr.cbegin() == arr.size() ); - #endif + using iter = multi::array::iterator; - static_assert( std::is_same_v< iter::element , double >, "!"); - static_assert( std::is_same_v< iter::value_type, multi::array >, "!"); - static_assert( std::is_same_v< iter::reference, multi::basic_array>, "!"); - static_assert( std::is_same_v< iter::element_ptr, double*>, "!"); + static_assert( std::is_same_v< iter::element , double > ); + static_assert( std::is_same_v< iter::value_type, multi::array > ); + static_assert( std::is_same_v< iter::reference, multi::subarray> ); + static_assert( std::is_same_v< iter::element_ptr, double*> ); using citer = multi::array::const_iterator; - static_assert( std::is_same_v< citer::element , double >, "!"); - static_assert( std::is_same_v< citer::value_type, multi::array >, "!"); - static_assert( std::is_same_v< citer::reference, multi::basic_array>, "!"); - static_assert( std::is_same_v< citer::element_ptr, double const* >, "!"); + static_assert( std::is_same_v< citer::element , double > ); + static_assert( std::is_same_v< citer::value_type, multi::array > ); + static_assert( std::is_same_v< citer::reference, multi::subarray> ); + static_assert( std::is_same_v< citer::element_ptr, double const* > ); + + auto const arrend = arr.end(); + auto const arrlast = arrend - 1; + + BOOST_REQUIRE( arrlast + 1 == arrend ); } { - std::vector vec(10000); + std::vector vec(10000); // std::vector NOLINT(fuchsia-default-arguments-calls) multi::array_ref arr(vec.data(), {100, 100}); BOOST_REQUIRE(size(arr) == 100); - begin(arr)[4][3] = 2.; + begin(arr)[4][3] = 2.0; } } BOOST_AUTO_TEST_CASE(iterator_interface ) { - multi::array arr = { + multi::array arr = { { - { 1.2, 1.1}, { 2.4, 1.} + { 12, 11}, { 24, 10} }, { - {11.2, 3.0}, {34.4, 4.} + {112, 30}, {344, 40} }, { - { 1.2, 1.1}, { 2.4, 1.} + { 12, 11}, { 24, 10} } }; - BOOST_REQUIRE( size(arr)==3 and size(arr[0])==2 and size(arr[0][0])==2); - BOOST_REQUIRE( arr[0][0][1] == 1.1 ); + BOOST_REQUIRE( size(arr) == 3 ); + BOOST_REQUIRE( size(arr[0]) == 2 ); + BOOST_REQUIRE( size(arr[0][0]) == 2 ); + BOOST_REQUIRE( arr[0][0][1] == 11 ); BOOST_REQUIRE( begin(arr) < end(arr) ); BOOST_REQUIRE( cbegin(arr) < cend(arr) ); @@ -100,8 +146,8 @@ BOOST_AUTO_TEST_CASE(iterator_interface ) { BOOST_REQUIRE( end(arr) - begin(arr) == size(arr) ); // BOOST_REQUIRE( rend(A) - rbegin(A) == size(A) ); - BOOST_REQUIRE( size(*begin(arr)) == 2 ); - BOOST_REQUIRE( size(begin(arr)[1]) == 2 ); + BOOST_REQUIRE( size(*begin(arr) ) == 2 ); + BOOST_REQUIRE( size( begin(arr)[1]) == 2 ); BOOST_REQUIRE( &(arr[1][1].begin()[0]) == &arr[1][1][0] ); // NOLINT(readability-container-data-pointer) test access BOOST_REQUIRE( &arr[0][1][0] == &arr[0][1][0] ); @@ -116,9 +162,9 @@ BOOST_AUTO_TEST_CASE(iterator_interface ) { BOOST_AUTO_TEST_CASE(iterator_semantics) { multi::array arr = { - {{ 1.2, 1.1}, { 2.4, 1.}}, - {{11.2, 3.0}, {34.4, 4.}}, - {{ 1.2, 1.1}, { 2.4, 1.}} + {{ 1.2, 1.1}, { 2.4, 1.0}}, + {{11.2, 3.0}, {34.4, 4.0}}, + {{ 1.2, 1.1}, { 2.4, 1.0}} }; multi::array::iterator it; @@ -128,29 +174,45 @@ BOOST_AUTO_TEST_CASE(iterator_semantics) { it = begin(arr); BOOST_REQUIRE( it == begin(arr) ); - multi::array::iterator it2 = begin(arr); + it += 1; + it -= 1; + BOOST_REQUIRE( it == begin(arr) ); + + auto const& arrc = arr(); + BOOST_REQUIRE( &arrc[0][0][0] == &arr[0][0][0] ); + + auto const& arrc2 = arr(); + + BOOST_REQUIRE( arrc.addressof() == arrc2.addressof() ); // BOOST_REQUIRE( &arrc == &arrc2 ); + + multi::array::iterator const it2 = begin(arr); BOOST_REQUIRE(it == it2); it = end(arr); BOOST_REQUIRE(it != it2); BOOST_REQUIRE(it > it2); - multi::array::iterator it3{it}; + multi::array::iterator const it3{it}; BOOST_REQUIRE( it3 == it ); multi::array::const_iterator cit; static_assert( std::is_same::iterator::element_ptr, double*>{}, "!"); + [[maybe_unused]] multi::array::const_iterator const cit3{it3}; + cit = it3; BOOST_REQUIRE( cit == it3 ); + BOOST_REQUIRE( it3 == cit ); BOOST_REQUIRE( &arr[0][2][1] == &begin(arr)[0][2][1] ); + [[maybe_unused]] multi::array::const_iterator const cit2 = it3; + static_assert( decltype(begin(arr))::rank_v == 3 , "!" ); static_assert( decltype(begin(arr))::rank {} == 3 , "!" ); -#if not defined(__circle_build__) // circle 170 crashes auto&& ref = multi::ref(begin(arr), end(arr)); -// BOOST_TEST( arr.base() == ref.base() ); // fails in circle (?) + + BOOST_TEST( arr.base() == ref.base() ); BOOST_TEST( arr[0][2][1] == ref[0][2][1] ); BOOST_TEST( &arr[0][2][1] == &ref[0][2][1] ); BOOST_TEST( arr.layout().stride() == ref.layout().stride()); @@ -162,20 +224,19 @@ BOOST_AUTO_TEST_CASE(iterator_semantics) { BOOST_REQUIRE( arr.layout() == ref.layout() ); BOOST_REQUIRE( &multi::ref(begin(arr), end(arr)) == &arr ); -#endif } BOOST_AUTO_TEST_CASE(iterator_arrow_operator) { multi::array arr = { - {"00", "01"}, - {"10", "11"}, - {"20", "21"} + {"00", "01"}, // std::string NOLINT(fuchsia-default-arguments-calls) std::string has a default constructor + {"10", "11"}, // std::string NOLINT(fuchsia-default-arguments-calls) + {"20", "21"} // std::string NOLINT(fuchsia-default-arguments-calls) }; BOOST_REQUIRE( arr[1][0] == "10" ); - BOOST_REQUIRE( std::is_sorted(begin(arr), end(arr)) ); // sorted by rows - BOOST_REQUIRE( std::is_sorted(begin(arr.rotated()), end(arr.rotated())) ); // sorted by cols + BOOST_REQUIRE( std::is_sorted(begin(arr), end(arr)) ); // sorted by rows + BOOST_REQUIRE( std::is_sorted(begin(arr.rotated()), end(arr.rotated())) ); // sorted by cols BOOST_REQUIRE( begin( arr )->size() == arr[0].size() ); BOOST_REQUIRE( begin( arr.rotated() )->size() == arr.size() ); @@ -185,34 +246,37 @@ BOOST_AUTO_TEST_CASE(iterator_arrow_operator) { } BOOST_AUTO_TEST_CASE(index_range_iteration) { - multi::index_range irng{0, 5}; // semiopen interval + multi::index_range irng{0, 5}; // semiopen interval std::ostringstream out; std::copy(begin(irng), end(irng), std::ostream_iterator{out, ","}); BOOST_REQUIRE( out.str() == "0,1,2,3,4," ); - BOOST_REQUIRE( std::accumulate(begin(irng), end(irng), 0) == irng.size()*(irng.size()-1)/2 ); + BOOST_REQUIRE( std::accumulate(begin(irng), end(irng), static_cast(0U)) == irng.size()*(irng.size()-1)/2 ); - BOOST_REQUIRE( std::accumulate(begin(irng), end(irng), 0, [](auto&& acc, auto const& elem) {return acc + elem*elem*elem;}) > 0 ); // sum of cubes + BOOST_REQUIRE( std::accumulate(begin(irng), end(irng), static_cast(0U), [](auto&& acc, auto const& elem) {return acc + elem*elem*elem;}) > 0 ); // sum of cubes } BOOST_AUTO_TEST_CASE(multi_reverse_iterator_1D) { - multi::array arr(100, 66.); + multi::array arr(100, 66.0); BOOST_REQUIRE( &arr[99] == &*std::make_reverse_iterator(arr.end()) ); auto rbegin = std::make_reverse_iterator(arr.end()); rbegin += 100; - multi::array::iterator begin{rbegin.base()}; + multi::array::iterator const begin{rbegin.base()}; BOOST_REQUIRE( begin == arr.begin() ); } BOOST_AUTO_TEST_CASE(multi_reverse_iterator_2D) { multi::array arr = { - { 1., 2.}, - { 10., 20.}, - {100., 200.} + { 1.0, 2.0}, + { 10.0, 20.0}, + {100.0, 200.0} }; - BOOST_REQUIRE( (*arr.begin())[1] == 2. ); + BOOST_REQUIRE( (*arr.begin())[1] == 2.0 ); auto rbegin = std::make_reverse_iterator(arr.end()); - BOOST_TEST( (*rbegin)[1] == 200. ); + BOOST_TEST( (*rbegin)[1] == 200.0 ); + + BOOST_REQUIRE( arr.begin() < arr.begin() + 1 ); + BOOST_REQUIRE( arr.end() - 1 < arr.end() ); } diff --git a/external_codes/boost_multi/multi/test/layout.cpp b/external_codes/boost_multi/multi/test/layout.cpp index 26a801be03..b1d3405ef7 100644 --- a/external_codes/boost_multi/multi/test/layout.cpp +++ b/external_codes/boost_multi/multi/test/layout.cpp @@ -1,56 +1,62 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2021 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi layout" -#include - -#include "multi/array.hpp" -#include "multi/utility.hpp" - -#include "multi/detail/tuple_zip.hpp" +// Copyright 2018-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +// See: https://github.com/llvm/llvm-project/issues/61415 +// Should be fixed in 18, but we know 16 and 17 are broken +#if !(defined(__clang__) && (__clang_major__ == 16 || __clang_major__ == 17) && __cplusplus > 202002L) + +#include +#include + +#include + +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +// # pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +// # pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) // 'conversion' conversion from 'type1' to 'type2', possible loss of data +#endif -//#include -//#include +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif -#include +#include namespace multi = boost::multi; -static auto second_finish(multi::extensions_t<3> exts) { - return std::get<1>(exts).finish(); +namespace { +auto second_finish(multi::extensions_t<3> exts) { + return std::get<1>(exts).last(); } +} // namespace BOOST_AUTO_TEST_CASE(extensions_3D) { BOOST_REQUIRE( 20 == second_finish( multi::extensions_t<3> { {0, 10}, {0, 20}, {0, 30} } ) ); BOOST_REQUIRE( 20 == second_finish( multi::extensions_t<3>( { {0, 10}, {0, 20}, {0, 30} } )) ); BOOST_REQUIRE( 20 == second_finish( { {0, 10}, {0, 20}, {0, 30} } ) ); - multi::extensions_t<3> exts{ {0, 10}, {0, 20}, {0, 30} }; + multi::extensions_t<3> const exts({0, 10}, {0, 20}, {0, 30}); BOOST_REQUIRE( 20 == second_finish(exts ) ); } -//BOOST_AUTO_TEST_CASE(extensions_1D) { -// BOOST_REQUIRE( multi::extensions_t<1>( { {0, 10} } ) == multi::extensions_t<1>( { {0, 10} } ) ); -//} - -#if 0 -BOOST_AUTO_TEST_CASE(serialize_extensions) { - multi::extensions_t<3> x{51, 52, 53}; - std::stringstream ss; - { - boost::archive::xml_oarchive xoa{ss}; - xoa<< BOOST_SERIALIZATION_NVP(x); - } - { - std::cerr<< ss.str() << std::endl; - boost::archive::xml_iarchive xia{ss}; - multi::extensions_t<3> x2{51, 52, 53}; - xia>> BOOST_SERIALIZATION_NVP(x2); - BOOST_REQUIRE(x == x2); - } -} -#endif - BOOST_AUTO_TEST_CASE(extensions_to_linear) { multi::extensions_t<3> exts{4, 5, 3}; BOOST_REQUIRE( exts.to_linear(0, 0, 0) == 0 ); @@ -72,12 +78,18 @@ BOOST_AUTO_TEST_CASE(extensions_to_linear) { BOOST_REQUIRE( exts.to_linear(4, 0, 0) == exts.num_elements() ); for(int idx = 0; idx != exts.num_elements(); ++idx) { - BOOST_REQUIRE( std::apply([&](auto... indices) {return exts.to_linear(indices...);}, exts.from_linear(idx)) == idx ); + BOOST_REQUIRE( std::apply([&](auto... indices) { return exts.to_linear(indices...);}, exts.from_linear(idx)) == idx ); } } BOOST_AUTO_TEST_CASE(extensions_layout_to_linear) { - multi::array arr({40, 50, 80}); + multi::array arr( + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 + multi::extensions_t<3> + #endif + {40, 50, 80} + ); + auto&& sub = arr({10, 30}, {20, 32}, {60, 75}); for(int i = 0; i != 10; ++i) { @@ -91,23 +103,41 @@ BOOST_AUTO_TEST_CASE(extensions_layout_to_linear) { } BOOST_AUTO_TEST_CASE(extensions_layout_to_linear_2) { - multi::array arr({40, 50, 80}); + multi::array arr( + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 + multi::extensions_t<3> + #endif + {40, 50, 80} + ); + auto&& sub = arr({10, 30}, {20, 32}, {60, 75}); auto const& rot = sub.rotated(); - auto exts = rot.extensions(); - for(auto eye : std::get<0>(exts)) { - for(auto jay : std::get<1>(exts)) { - for(auto kay : std::get<2>(exts)) { - BOOST_REQUIRE( & rot.base() [rot.layout()(eye, jay, kay)] == &rot(eye, jay, kay) ); - BOOST_REQUIRE( &*(rot.base() + rot.layout()(eye, jay, kay)) == &rot(eye, jay, kay) ); + +#ifndef _MSC_VER + auto const [is, js, ks] = rot.extensions(); +#else + auto const is = std::get<0>(rot.extensions()); + auto const js = std::get<0>(rot.extensions()); + auto const ks = std::get<0>(rot.extensions()); +#endif + for(auto const i : is) { + for(auto const j : js) { + for(auto const k : ks) { + BOOST_REQUIRE( & rot.base() [rot.layout()(i, j, k)] == &rot(i, j, k) ); + BOOST_REQUIRE( &*(rot.base() + rot.layout()(i, j, k)) == &rot(i, j, k) ); } } } } BOOST_AUTO_TEST_CASE(linearize) { - multi::array arr({10, 20, 30}); + multi::array const arr( + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 + multi::extensions_t<3> + #endif + {10, 20, 30} + ); BOOST_REQUIRE(( 25 % extensions(arr) == decltype( 25 % extensions(arr)){0, 0, 25} )); BOOST_REQUIRE(( 55 % extensions(arr) == decltype( 55 % extensions(arr))(0, 1, 25) )); @@ -115,27 +145,87 @@ BOOST_AUTO_TEST_CASE(linearize) { BOOST_REQUIRE((1255 % extensions(arr) == decltype(1255 % extensions(arr))(2, 1, 25) )); auto const point = arr.extensions().from_linear(655); -// BOOST_REQUIRE( p == std::make_tuple(1, 1, 25) ); + // BOOST_REQUIRE( p == std::make_tuple(1, 1, 25) ); using multi::detail::get; BOOST_REQUIRE( get<0>(point) == 1 ); BOOST_REQUIRE( get<1>(point) == 1 ); BOOST_REQUIRE( get<2>(point) == 25 ); } +BOOST_AUTO_TEST_CASE(layout_tuple_2d) { + multi::extensions_t<2> const x1({51, 52}); + multi::extensions_t<2> const x2({multi::iextension{0, 51}, multi::iextension{0, 52}}); + BOOST_REQUIRE( x1 == x2 ); + + multi::extensions_t<2> const x3(std::make_tuple(multi::iextension{0, 51}, multi::iextension{0, 52})); + BOOST_REQUIRE( x1 == x3 ); + + multi::extensions_t<2> const x4 = std::make_tuple(multi::iextension{0, 51}, multi::iextension{0, 52}); + BOOST_REQUIRE( x1 == x4 ); + + multi::extensions_t<2> const x5 = std::tuple{multi::iextension{0, 51}, multi::iextension{0, 52}}; + BOOST_REQUIRE( x1 == x5 ); + + multi::extensions_t<2> const x6 = std::tuple{51, 52}; + BOOST_REQUIRE( x1 == x6 ); + + multi::extensions_t<2> const x7{51, 52}; + BOOST_REQUIRE( x1 == x7 ); + + multi::extensions_t<2> const x8 = {51, 52}; + BOOST_REQUIRE( x1 == x8 ); + + auto const x9 = multi::extensions_t<2>{51, 52}; + BOOST_REQUIRE( x1 == x9 ); + + // multi::extensions_t x10{51, 52, 53}; // TODO(correaa) should it work? + // BOOST_REQUIRE( x1 == x10 ); +} + +BOOST_AUTO_TEST_CASE(layout_tuple_3d) { + multi::extensions_t<3> const x1({51, 52, 53}); + multi::extensions_t<3> const x2({multi::iextension{0, 51}, multi::iextension{0, 52}, multi::iextension{0, 53}}); + BOOST_REQUIRE( x1 == x2 ); + + multi::extensions_t<3> const x3(std::make_tuple(multi::iextension{0, 51}, multi::iextension{0, 52}, multi::iextension{0, 53})); + BOOST_REQUIRE( x1 == x3 ); + + multi::extensions_t<3> const x4 = std::make_tuple(multi::iextension{0, 51}, multi::iextension{0, 52}, multi::iextension{0, 53}); + BOOST_REQUIRE( x1 == x4 ); + + multi::extensions_t<3> const x5 = std::tuple{multi::iextension{0, 51}, multi::iextension{0, 52}, multi::iextension{0, 53}}; + BOOST_REQUIRE( x1 == x5 ); + + multi::extensions_t<3> const x6 = std::tuple{51, 52, 53}; + BOOST_REQUIRE( x1 == x6 ); + + multi::extensions_t<3> const x7{51, 52, 53}; + BOOST_REQUIRE( x1 == x7 ); + + // multi::extensions_t x8{51, 52, 53}; // TODO(correaa) should it work? + // BOOST_REQUIRE( x1 == x8 ); +} + BOOST_AUTO_TEST_CASE(layout_0) { multi::array arr( -#if defined(__INTEL_COMPILER) or (defined(__GNUC__) and (__GNUC__ < 6)) + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 multi::extensions_t<3> -#endif + #endif {51, 52, 53} ); - BOOST_REQUIRE( size(arr) == 51 ); BOOST_REQUIRE( arr.size() == 51 ); - BOOST_REQUIRE( size(arr[0]) == 52 ); BOOST_REQUIRE( arr[0].size() == 52 ); - BOOST_REQUIRE( size(arr[0][0]) == 53); BOOST_REQUIRE( arr[0][0].size() == 53 ); + + BOOST_REQUIRE( size(arr) == 51 ); + BOOST_REQUIRE( arr.size() == 51 ); + + BOOST_REQUIRE( size(arr[0]) == 52 ); + BOOST_REQUIRE( arr[0].size() == 52 ); + + BOOST_REQUIRE( size(arr[0][0]) == 53 ); + BOOST_REQUIRE( arr[0][0].size() == 53 ); } BOOST_AUTO_TEST_CASE(layout_1) { - //NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): testing feature + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): testing feature double arr[50][50][50]; using multi::size; BOOST_REQUIRE( size(arr) == 50 ); @@ -143,255 +233,291 @@ BOOST_AUTO_TEST_CASE(layout_1) { using multi::extension; BOOST_REQUIRE(( extension(arr) == multi::index_extension{0, 50} )); BOOST_REQUIRE(( extension(arr) == multi::iextension{0, 50} )); - BOOST_REQUIRE(( extension(arr) == multi::irange{0, 50} )); + // BOOST_REQUIRE(( extension(arr) == multi::irange{0, 50} )); } BOOST_AUTO_TEST_CASE(layout_2) { - std::array, 50>, 50> arr{}; + std::array, 50>, 50> const arr{}; using multi::size; BOOST_REQUIRE( size(arr) == 50 ); using multi::extension; BOOST_REQUIRE(( extension(arr) == multi::index_extension{0, 50} )); BOOST_REQUIRE(( extension(arr) == multi::iextension{0, 50} )); - BOOST_REQUIRE(( extension(arr) == multi::irange{0, 50} )); } BOOST_AUTO_TEST_CASE(layout_3) { multi::array arr( -//#if defined(__INTEL_COMPILER) or (defined(__GNUC__) and (__GNUC__ < 6)) -// multi::extensions_t<2> -//#endif + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 + multi::extensions_t<2> + #endif {50, 50} ); - BOOST_REQUIRE( size(arr) == 50 ); BOOST_REQUIRE( arr.size() == 50 ); + BOOST_REQUIRE( size(arr) == 50 ); + BOOST_REQUIRE( arr.size() == 50 ); + BOOST_REQUIRE( arr[0].sliced(10, 20).size() == 10 ); BOOST_REQUIRE( size(arr[0].sliced(10, 20)) == 10 ); - static_assert( decltype(arr(0, {10, 20}))::rank_v == 1 , "!"); + static_assert(decltype(arr(0, {10, 20}))::rank_v == 1, "!"); BOOST_REQUIRE( size(arr(0, {10, 20})) == 10 ); BOOST_REQUIRE( arr.layout() == arr.layout() ); - BOOST_REQUIRE( not (arr.layout() < arr.layout()) ); + BOOST_REQUIRE( ! (arr.layout() < arr.layout()) ); } BOOST_AUTO_TEST_CASE(layout) { -{ - multi::array A2 = { - {1., 2., 3.}, - {4., 5., 6.}, - {7., 8., 9.} - }; + { + multi::array const A2 = { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + }; - BOOST_REQUIRE( size(A2) == 3 ); + BOOST_REQUIRE( size(A2) == 3 ); - multi::array B2( -#if defined(__INTEL_COMPILER) or (defined(__GNUC__) and (__GNUC__ < 6)) + multi::array B2( + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 multi::extensions_t<2> -#endif - {4, 4} - ); - BOOST_REQUIRE( size(B2) == 4 ); - B2[3][3] = 99.; - - auto B2copy =+ B2({0, 2}, {0, 2}); - - BOOST_REQUIRE( &B2copy[1][1] != &B2({0, 2}, {0, 2})[1][1] ); - - std::array, 2> B2blk = {{ - {{ B2({0, 2}, {0, 2}), B2({0, 2}, {2, 4}) }}, - {{ B2({2, 4}, {0, 2}), B2({2, 4}, {2, 4}) }} - }}; - - BOOST_REQUIRE( &B2blk[1][1][1][1] == &B2[3][3] ); -} -{ - // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, hicpp-avoid-c-arrays,-warnings-as-errors, modernize-avoid-c-arrays,-warnings-as-errors): test - double arr[3][4][5] = {}; - using multi::dimensionality; - static_assert(dimensionality(arr)==3, "!"); - using multi::extensions; - auto xA = extensions(arr); + #endif + {4, 4} + ); + BOOST_REQUIRE( size(B2) == 4 ); + B2[3][3] = 99.0; + + auto B2copy = +B2({0, 2}, {0, 2}); + + BOOST_REQUIRE( &B2copy[1][1] != &B2({0, 2}, {0, 2})[1][1] ); + + // clang-format off + std::array, 2> + B2blk = {{ + {{B2({0, 2}, {0, 2}), B2({0, 2}, {2, 4})}}, + {{B2({2, 4}, {0, 2}), B2({2, 4}, {2, 4})}}, + }} + ; + // clang-format on + + BOOST_REQUIRE( &B2blk[1][1][1][1] == &B2[3][3] ); + } + { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) test legacy type + double arr[3][4][5] = {}; + using multi::dimensionality; + static_assert( dimensionality(arr) == 3 ); + using multi::extensions; + auto xA = extensions(arr); - BOOST_REQUIRE( size(std::get<0>(xA)) == 3 ); - BOOST_REQUIRE( size(std::get<1>(xA)) == 4 ); - BOOST_REQUIRE( size(std::get<2>(xA)) == 5 ); + BOOST_REQUIRE( size(std::get<0>(xA)) == 3 ); + BOOST_REQUIRE( size(std::get<1>(xA)) == 4 ); + BOOST_REQUIRE( size(std::get<2>(xA)) == 5 ); - static_assert( multi::stride(arr) == 20 , "!" ); + static_assert(multi::stride(arr) == 20); -// static_assert( multi::stride(arr) == 5 , "!" ); - static_assert( multi::stride(arr[1]) == 5 , "!" ); - static_assert( multi::stride(arr[0][0]) == 1 , "!" ); + static_assert(multi::stride(arr[1]) == 5); + static_assert(multi::stride(arr[0][0]) == 1); - multi::array AA({3, 4, 5}); - using multi::layout; - BOOST_REQUIRE( layout(AA) == layout(arr) ); + multi::array AA({3, 4, 5}); + using multi::layout; + BOOST_REQUIRE( layout(AA) == layout(arr) ); - BOOST_REQUIRE( AA .stride() == 20 ); -} -{ - std::array, 4>, 3> arr = {}; -#if defined(__circle_build__) // circle doesn't see dimensionality as a constexpr "cannot access value of A at compile time;" - assert( multi::dimensionality(arr) == 3 ); -#else // other compilers ok - static_assert( multi::dimensionality(arr) == 3 ); -#endif + BOOST_REQUIRE( AA.stride() == 20 ); + } + { + std::array, 4>, 3> arr = {}; + static_assert(multi::dimensionality(arr) == 3); - using multi::extensions; - auto xA = extensions(arr); - using std::get; - BOOST_REQUIRE( size(std::get<0>(xA)) == 3 ); - BOOST_REQUIRE( size(std::get<1>(xA)) == 4 ); - BOOST_REQUIRE( size(std::get<2>(xA)) == 5 ); + using multi::extensions; + auto xA = extensions(arr); + using std::get; + BOOST_REQUIRE( size(std::get<0>(xA)) == 3 ); + BOOST_REQUIRE( size(std::get<1>(xA)) == 4 ); + BOOST_REQUIRE( size(std::get<2>(xA)) == 5 ); - multi::array AA({3, 4, 5}); - using multi::layout; - BOOST_REQUIRE( layout(AA) == layout(arr) ); + multi::array AA({3, 4, 5}); + using multi::layout; + BOOST_REQUIRE( layout(AA) == layout(arr) ); - BOOST_REQUIRE( AA.stride() == 20 ); + BOOST_REQUIRE( AA.stride() == 20 ); -#if defined(__circle_build__) // circle doesn't recognize this as a constexpr "cannot access value of A at compile time;" - assert( multi::stride(arr) == 20); -#else // other compilers ok - static_assert( multi::stride(arr) == 20); -#endif + static_assert(multi::stride(arr) == 20); - BOOST_REQUIRE( multi::stride(arr[0]) == 5 ); - BOOST_REQUIRE( multi::stride(arr[1]) == 5 ); - BOOST_REQUIRE( multi::stride(arr[0][0]) == 1 ); -// assert( stride(arr) == 20 ); -// assert( stride(arr[0]) == 20 ); -} -{ - multi::array B2 = { - {1.}, - {2.}, - {3.} - }; - BOOST_REQUIRE( size(B2) == 3 ); - BOOST_REQUIRE( size(rotated(B2)) == 1 ); BOOST_REQUIRE( size(B2[0]) == 1); - BOOST_REQUIRE( B2 .stride() == 1 ); - BOOST_REQUIRE( B2[0].stride() == 1 ); -} + BOOST_REQUIRE( multi::stride(arr[0]) == 5 ); + BOOST_REQUIRE( multi::stride(arr[1]) == 5 ); + BOOST_REQUIRE( multi::stride(arr[0][0]) == 1 ); + } + { + multi::array const B2 = { + {1.0}, + {2.0}, + {3.0}, + }; + BOOST_REQUIRE( size(B2) == 3 ); + BOOST_REQUIRE( size(rotated(B2)) == 1 ); + BOOST_REQUIRE( size(B2[0]) == 1); + BOOST_REQUIRE( B2 .stride() == 1 ); + BOOST_REQUIRE( B2[0].stride() == 1 ); + } } BOOST_AUTO_TEST_CASE(multi_layout_with_offset) { { - multi::layout_t<1> l1(multi::iextension(2, 5)); - BOOST_REQUIRE( l1.extension().start() == 2 ); - BOOST_REQUIRE( l1.extension().finish() == 5 ); + multi::layout_t<1> const l1(multi::iextension(2, 5)); + BOOST_REQUIRE( l1.extension().first() == 2 ); + BOOST_REQUIRE( l1.extension().last() == 5 ); } { - boost::multi::layout_t<2>::extensions_type exts{ + boost::multi::layout_t<2>::extensions_type const exts{ multi::iextension(2, 5), - multi::iextension(0, 5) - }; - multi::layout_t<2> l2(exts); - BOOST_REQUIRE( l2.extension().start() == std::get<0>(exts).start() ); - BOOST_REQUIRE( l2.extension().finish() == std::get<0>(exts).finish() ); + multi::iextension(0, 5)}; + multi::layout_t<2> const l2(exts); + BOOST_REQUIRE( l2.extension().first() == std::get<0>(exts).first() ); + BOOST_REQUIRE( l2.extension().last () == std::get<0>(exts).last() ); } - { - multi::layout_t<2> l2({multi::iextension(0, 3), multi::iextension(2, 7)}); - BOOST_REQUIRE( std::get<1>(l2.extensions()).start() == 2 ); - BOOST_REQUIRE( std::get<1>(l2.extensions()).finish() == 7 ); + { + multi::layout_t<2> const l2({multi::iextension(0, 3), multi::iextension(2, 7)}); + BOOST_REQUIRE( std::get<1>(l2.extensions()).first() == 2 ); + BOOST_REQUIRE( std::get<1>(l2.extensions()).last() == 7 ); } } BOOST_AUTO_TEST_CASE(multi_layout_part1) { -{ - multi::layout_t<0> lyt; - static_assert( decltype(lyt)::rank_v==0 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 1 ); -}{ - multi::iextensions<0> exts{}; - multi::layout_t<0> lyt(exts); - BOOST_REQUIRE(lyt.num_elements() == 1); -}{ multi::layout_t<1> lyt{}; - static_assert( decltype(lyt)::rank_v == 1 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 0 ); - BOOST_REQUIRE( size(lyt) == 0 ); - BOOST_REQUIRE( size(extension(lyt))==0 ); - BOOST_REQUIRE( stride(lyt)!=0 ); - BOOST_REQUIRE( is_empty(lyt) ); -}{ - multi::layout_t<2> lyt({2, 10}); - static_assert( decltype(lyt)::rank_v == 2 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 20 ); - BOOST_REQUIRE( size(lyt) == 2 ); - BOOST_REQUIRE( size(extension(lyt))==2 ); - BOOST_REQUIRE( stride(lyt)==10 ); - BOOST_REQUIRE( not is_empty(lyt) ); -} { - multi::layout_t<1> lyt(multi::iextensions<1>{20}); - static_assert( decltype(lyt)::rank_v == 1 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 20 ); - BOOST_REQUIRE( size(lyt) == 20 ); - BOOST_REQUIRE( stride(lyt) == 1 ); -} + { + multi::layout_t<0> const lyt; + static_assert(decltype(lyt)::rank_v == 0); + BOOST_REQUIRE( num_elements(lyt) == 1 ); + } + { + multi::iextensions<0> const exts{}; + multi::layout_t<0> const lyt(exts); + BOOST_REQUIRE(lyt.num_elements() == 1); + } + { + multi::layout_t<1> const lyt{}; + static_assert(decltype(lyt)::rank_v == 1); + BOOST_REQUIRE( num_elements(lyt) == 0 ); + BOOST_REQUIRE( size(lyt) == 0 ); + BOOST_REQUIRE( size(extension(lyt))==0 ); + BOOST_REQUIRE( stride(lyt)!=0 ); + BOOST_REQUIRE( is_empty(lyt) ); + } + { + multi::layout_t<2> const lyt({2, 10}); + static_assert(decltype(lyt)::rank_v == 2); + BOOST_REQUIRE( num_elements(lyt) == 20 ); + BOOST_REQUIRE( size(lyt) == 2 ); + BOOST_REQUIRE( size(extension(lyt))==2 ); + BOOST_REQUIRE( stride(lyt)==10 ); + BOOST_REQUIRE( ! is_empty(lyt) ); + } + { + multi::layout_t<1> const lyt(multi::iextensions<1>{20}); + static_assert(decltype(lyt)::rank_v == 1, "!"); + BOOST_REQUIRE( num_elements(lyt) == 20 ); + BOOST_REQUIRE( size(lyt) == 20 ); + BOOST_REQUIRE( stride(lyt) == 1 ); + } } BOOST_AUTO_TEST_CASE(multi_layout_part2) { -{ - multi::layout_t<1> lyt(multi::iextensions<1>{1}); - static_assert( decltype(lyt)::rank_v ==1 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 1 ); - BOOST_REQUIRE( size(lyt) == 1 ); - BOOST_REQUIRE( stride(lyt) == 1 ); -} { - multi::layout_t<2> lyt({1, 10}); - static_assert( decltype(lyt)::rank_v ==2 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 10 ); - BOOST_REQUIRE( size(lyt) == 1); - BOOST_REQUIRE( not is_empty(lyt) ); - BOOST_REQUIRE( size(extension(lyt))==1 ); - BOOST_REQUIRE( stride(lyt)== 10 );//std::numeric_limits::max() ); - using std::get; - BOOST_REQUIRE( get<0>(strides(lyt)) == 10); - BOOST_REQUIRE( get<1>(strides(lyt)) == 1 ); -} + { + multi::layout_t<1> const lyt(multi::iextensions<1>{1}); + static_assert(decltype(lyt)::rank_v == 1); + BOOST_REQUIRE( num_elements(lyt) == 1 ); + BOOST_REQUIRE( size(lyt) == 1 ); + BOOST_REQUIRE( stride(lyt) == 1 ); + } + { + multi::layout_t<2> const lyt({1, 10}); + static_assert(decltype(lyt)::rank_v == 2); + BOOST_REQUIRE( num_elements(lyt) == 10 ); + BOOST_REQUIRE( size(lyt) == 1); + BOOST_REQUIRE( ! is_empty(lyt) ); + BOOST_REQUIRE( size(extension(lyt))==1 ); + BOOST_REQUIRE( stride(lyt)== 10 ); // std::numeric_limits::max() ); + + using std::get; + BOOST_REQUIRE( get<0>(strides(lyt)) == 10); + BOOST_REQUIRE( get<1>(strides(lyt)) == 1 ); + } } BOOST_AUTO_TEST_CASE(multi_layout_part3) { -{ - multi::layout_t<2> lyt({10, 1}); - static_assert( decltype(lyt)::rank_v ==2 , "!"); - BOOST_REQUIRE( num_elements(lyt) == 10 ); - BOOST_REQUIRE( size(lyt) == 10 ); - using std::get; - BOOST_REQUIRE( get<0>(strides(lyt)) == 1 ); - BOOST_REQUIRE( get<1>(strides(lyt)) == 1 ); -}{ multi::layout_t<2> lyt{}; - BOOST_REQUIRE( dimensionality(lyt)==2 ); - BOOST_REQUIRE( num_elements(lyt) == 0 ); - BOOST_REQUIRE( size(lyt) == 0 ); - BOOST_REQUIRE( size(extension(lyt))==0 ); - BOOST_REQUIRE( stride(lyt)!=0 ); - BOOST_REQUIRE( is_empty(lyt) ); -}{ multi::layout_t<3> lyt{}; BOOST_REQUIRE( num_elements(lyt) == 0 ); -}{ multi::layout_t<3> lyt({{0, 10}, {0, 10}, {0, 10}}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -}{ multi::layout_t<3> lyt({{10}, {10}, {10}}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -}{ multi::layout_t<3> lyt({10, 10, 10}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -}{ multi::layout_t<3> lyt({multi::index_extension{0, 10}, {0, 10}, {0, 10}}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -}{ multi::layout_t<3> lyt(multi::layout_t<3>::extensions_type{{0, 10}, {0, 10}, {0, 10}}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -} + { + multi::layout_t<2> const lyt({10, 1}); + static_assert(decltype(lyt)::rank_v == 2); + BOOST_REQUIRE( num_elements(lyt) == 10 ); + BOOST_REQUIRE( size(lyt) == 10 ); + using std::get; + BOOST_REQUIRE( get<0>(strides(lyt)) == 1 ); + BOOST_REQUIRE( get<1>(strides(lyt)) == 1 ); + } + { + multi::layout_t<2> const lyt{}; + BOOST_REQUIRE( dimensionality(lyt)==2 ); + BOOST_REQUIRE( num_elements(lyt) == 0 ); + BOOST_REQUIRE( size(lyt) == 0 ); + BOOST_REQUIRE( size(extension(lyt))==0 ); + BOOST_REQUIRE( stride(lyt)!=0 ); + BOOST_REQUIRE( is_empty(lyt) ); + } + { + multi::layout_t<3> const lyt{}; + BOOST_REQUIRE( num_elements(lyt) == 0 ); + } + { + multi::layout_t<3> const lyt({ + {0, 10}, + {0, 10}, + {0, 10}, + }); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } + { + multi::layout_t<3> const lyt({{10}, {10}, {10}}); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } + { + multi::layout_t<3> const lyt({10, 10, 10}); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } + { + multi::layout_t<3> const lyt({ + multi::index_extension{0, 10}, + {0, 10}, + {0, 10}, + }); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } + { + multi::layout_t<3> const lyt(multi::layout_t<3>::extensions_type{ + {0, 10}, + {0, 10}, + {0, 10}, + }); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } } BOOST_AUTO_TEST_CASE(layout_to_offset) { - multi::layout_t<3> lyt({10, 20, 30}); - multi::array arr({10, 20, 30}); + multi::layout_t<3> const lyt({10, 20, 30}); + multi::array const arr({10, 20, 30}); BOOST_REQUIRE( lyt[0][0][0] == &arr[0][0][0] - arr.data_elements() ); BOOST_REQUIRE( lyt[0][0][1] == &arr[0][0][1] - arr.data_elements() ); BOOST_REQUIRE( lyt[0][0][2] == &arr[0][0][2] - arr.data_elements() ); - BOOST_TEST_REQUIRE( lyt[0][1][2] == &arr[0][1][2] - arr.data_elements() ); - BOOST_TEST_REQUIRE( lyt[3][1][2] == &arr[3][1][2] - arr.data_elements() ); + BOOST_TEST_REQUIRE(lyt[0][1][2] == &arr[0][1][2] - arr.data_elements()); + BOOST_TEST_REQUIRE(lyt[3][1][2] == &arr[3][1][2] - arr.data_elements()); } BOOST_AUTO_TEST_CASE(layout_to_offset_sub) { multi::array arr({10, 20, 30}); + auto&& sub = arr({2, 6}, {4, 8}, {10, 20}); + auto const lyt = sub.layout(); + BOOST_REQUIRE( lyt[0][0][0] == &sub[0][0][0] - base(sub) ); BOOST_REQUIRE( lyt[0][0][1] == &sub[0][0][1] - base(sub) ); BOOST_REQUIRE( lyt[0][0][2] == &sub[0][0][2] - base(sub) ); @@ -400,54 +526,89 @@ BOOST_AUTO_TEST_CASE(layout_to_offset_sub) { } BOOST_AUTO_TEST_CASE(continued_part1) { -{ - multi::layout_t<3> lyt(multi::layout_t<3>::extensions_type{{0, 10}, {0, 10}, {0, 10}}); - BOOST_REQUIRE( num_elements(lyt) == 1000); -} -{ multi::layout_t<3> lyt({multi::iextension{0, 10}, multi::iextension{0, 10}, multi::iextension{0, 10}}); BOOST_REQUIRE(lyt.num_elements() == 1000); -}{ multi::layout_t<3> lyt({multi::iextension{10}, multi::iextension{10}, multi::iextension{10}}); BOOST_REQUIRE( num_elements(lyt) == 1000); -}{ multi::layout_t<3> lyt({10, 10, multi::iextension{10}}); BOOST_REQUIRE( num_elements(lyt) == 1000 ); -}{ - multi::layout_t<1> lyt; - BOOST_REQUIRE( size(lyt) == 0 ); -}{ - multi::layout_t<1> lyt({{0, 10}}); - BOOST_REQUIRE( size(lyt) == 10 ); - BOOST_REQUIRE( extension(lyt).start () == 0 ); - BOOST_REQUIRE( extension(lyt).finish() == 10 ); - - lyt.reindex(1); - BOOST_REQUIRE( size(lyt) == 10 ); - BOOST_REQUIRE( extension(lyt).start () == 1 ); - BOOST_REQUIRE( extension(lyt).finish() == 11 ); -}{ - multi::layout_t<2> lyt; - BOOST_REQUIRE( size(lyt) == 0 ); -} -{ - multi::layout_t<2> lyt({{0, 10}, {0, 20}}); - BOOST_REQUIRE( size(lyt) == 10 ); - BOOST_REQUIRE( extension(lyt).start () == 0 ); - BOOST_REQUIRE( extension(lyt).finish() == 10 ); - - lyt.reindex(1); - BOOST_REQUIRE( extension(lyt).start () == 1 ); - BOOST_REQUIRE( extension(lyt).finish() == 11 ); - - lyt.rotate().reindex(3).unrotate(); - BOOST_TEST_REQUIRE( extension(lyt).start () == 1 ); - BOOST_TEST_REQUIRE( extension(lyt).finish() == 11 ); - - BOOST_TEST_REQUIRE( std::get<0>(extensions(lyt)).start () == 1 ); - BOOST_TEST_REQUIRE( std::get<1>(extensions(lyt)).start () == 3 ); - BOOST_TEST_REQUIRE( std::get<1>(extensions(lyt)).finish() == 23 ); -} + { + multi::layout_t<3> const lyt(multi::layout_t<3>::extensions_type{ + {0, 10}, + {0, 10}, + {0, 10}, + }); + BOOST_REQUIRE( num_elements(lyt) == 1000); + } + { + multi::layout_t<3> const lyt({ + multi::iextension{0, 10}, + multi::iextension{0, 10}, + multi::iextension{0, 10}, + }); + BOOST_REQUIRE(lyt.num_elements() == 1000); + } + { + multi::layout_t<3> const lyt({multi::iextension{10}, multi::iextension{10}, multi::iextension{10}}); + BOOST_REQUIRE( num_elements(lyt) == 1000); + } + { + multi::layout_t<3> const lyt({10, 10, multi::iextension{10}}); + BOOST_REQUIRE( num_elements(lyt) == 1000 ); + } + { + multi::layout_t<1> const lyt; + BOOST_REQUIRE( size(lyt) == 0 ); + } + { + multi::layout_t<1> lyt({ + {0, 10}, + }); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( extension(lyt).first() == 0 ); + BOOST_REQUIRE( extension(lyt).last () == 10 ); + + lyt.reindex(1); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( extension(lyt).first() == 1 ); + BOOST_REQUIRE( extension(lyt).last () == 11 ); + } + { + multi::layout_t<2> const lyt; + BOOST_REQUIRE( size(lyt) == 0 ); + } + { + multi::layout_t<2> lyt(multi::extensions_t<2>({ + {0, 10}, + {0, 20}, + })); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( extension(lyt).first() == 0 ); + BOOST_REQUIRE( extension(lyt).last () == 10 ); + + lyt.reindex(1); + BOOST_REQUIRE( extension(lyt).first() == 1 ); + BOOST_REQUIRE( extension(lyt).last () == 11 ); + + lyt.rotate().reindex(3).unrotate(); + BOOST_TEST_REQUIRE( extension(lyt).first() == 1 ); + BOOST_TEST_REQUIRE( extension(lyt).last () == 11 ); + + BOOST_TEST_REQUIRE( std::get<0>(extensions(lyt)).first() == 1 ); + BOOST_TEST_REQUIRE( std::get<1>(extensions(lyt)).first() == 3 ); + BOOST_TEST_REQUIRE( std::get<1>(extensions(lyt)).last () == 23 ); + } + // { + // multi::layout_t<2> lyt({ + // {0, 10}, + // {0, 20}, + // }); + // BOOST_REQUIRE( size(lyt) == 10 ); + // } } BOOST_AUTO_TEST_CASE(continued_part2) { - multi::layout_t<3> lyt({{0, 10}, {0, 20}, {0, 30}}); + multi::layout_t<3> const lyt({ + {0, 10}, + {0, 20}, + {0, 30}, + }); - BOOST_REQUIRE( not lyt.empty() ); + BOOST_REQUIRE( ! lyt.empty() ); BOOST_REQUIRE( stride(lyt) == lyt.stride() ); BOOST_REQUIRE( offset(lyt) == lyt.offset() ); @@ -472,7 +633,11 @@ BOOST_AUTO_TEST_CASE(continued_part2) { } BOOST_AUTO_TEST_CASE(continued_part3) { - multi::layout_t<3> lyt({{0, 10}, {0, 20}, {0, 30}}); + multi::layout_t<3> const lyt({ + {0, 10}, + {0, 20}, + {0, 30}, + }); BOOST_REQUIRE( lyt.num_elements() == num_elements(lyt) ); BOOST_REQUIRE( lyt.size() == size(lyt) ); @@ -485,26 +650,26 @@ BOOST_AUTO_TEST_CASE(continued_part3) { BOOST_REQUIRE( std::get<0>(lyt.extensions()) == lyt.extension() ); - boost::multi::extensions_t<2> exts2; + boost::multi::extensions_t<2> const exts2; using boost::multi::detail::get; using std::get; BOOST_REQUIRE( get<0>(exts2).is_empty() ); -// BOOST_REQUIRE( std::get<0>(L.sizes()) == L.size(0) ); -// BOOST_REQUIRE( std::get<0>(L.extensions()) == L.extension(0) ); + // BOOST_REQUIRE( std::get<0>(L.sizes()) == L.size(0) ); + // BOOST_REQUIRE( std::get<0>(L.extensions()) == L.extension(0) ); BOOST_REQUIRE(( get<0>(lyt.extensions()) == multi::index_extension{0, 10} )); BOOST_REQUIRE( get<0>(lyt.extensions()).first() == 0 ); BOOST_REQUIRE( get<0>(lyt.extensions()).last() == 10 ); -// BOOST_REQUIRE( L.size(1) == 20 ); + // BOOST_REQUIRE( L.size(1) == 20 ); BOOST_REQUIRE( get<1>(lyt.extensions()).first() == 0 ); BOOST_REQUIRE( get<1>(lyt.extensions()).last() == 20 ); -// BOOST_REQUIRE( L.size(2) == 30 ); + // BOOST_REQUIRE( L.size(2) == 30 ); BOOST_REQUIRE( get<2>(lyt.extensions()).first() == 0 ); BOOST_REQUIRE( get<2>(lyt.extensions()).last() == 30 ); @@ -516,79 +681,126 @@ BOOST_AUTO_TEST_CASE(continued_part3) { } BOOST_AUTO_TEST_CASE(continued) { -{ - multi::layout_t<3> lyt; - BOOST_REQUIRE( size(lyt) == 0 ); -} -{ - multi::layout_t<3> lyt( {{0, 10}, {0, 20}, {0, 30}} ); - BOOST_REQUIRE( stride(lyt) == 20*30L ); -} -{ - multi::layout_t<1> lyt({{0, 10}}); - BOOST_REQUIRE( extension(lyt).first() == 0 ); - BOOST_REQUIRE( extension(lyt).last() == 10 ); -} -{ - multi::layout_t<1> lyt({{8, 18}}); - BOOST_REQUIRE( extension(lyt).first() == 8 ); - BOOST_REQUIRE( extension(lyt).last() == 18 ); -} -{ - multi::layout_t<2> lyt({{0, 10}, {0, 20}}); - BOOST_REQUIRE( extension(lyt).first() == 0 ); - BOOST_REQUIRE( extension(lyt).last() == 10 ); -} -{ - multi::layout_t<2> lyt( {{0, 10}, {11, 31}} ); - BOOST_REQUIRE( size(lyt) == 10 ); - BOOST_REQUIRE( stride(lyt) == 20 ); - BOOST_REQUIRE( offset(lyt) == 0 ); -} -{ - multi::layout_t<2> lyt( {{8, 18}, {0, 20}} ); - BOOST_REQUIRE( size(lyt) == 10 ); - BOOST_REQUIRE( stride(lyt) == 20 ); -} -{ - multi::layout_t<3> lyt({{0, 3}, {0, 5}, {10, 17}}); - BOOST_REQUIRE( stride(lyt) == 5*7L ); - BOOST_REQUIRE( stride(lyt.sub().sub()) == 1 ); -} -{ - multi::layout_t<3> lyt({{0, 10}, {0, 20}, {0, 30}}); - BOOST_REQUIRE( stride(lyt) == 20*30L ); - BOOST_REQUIRE( offset(lyt) == 0 ); - BOOST_REQUIRE( nelems(lyt) == 10*20L*30L ); -} -{ - multi::layout_t<3> lyt({{10, 20}, {10, 30}, {10, 40}}); - BOOST_REQUIRE( stride(lyt) == 20*30L ); -} -{ - auto const ttt = boost::multi::tuple{1, 2, 3}; - auto const arr = std::apply([](auto... elems) {return std::array{{elems...}};}, ttt); - BOOST_REQUIRE(arr[1] == 2); -} + { + multi::layout_t<3> const lyt; + BOOST_REQUIRE( size(lyt) == 0 ); + } + { + multi::layout_t<3> const lyt({ + {0, 10}, + {0, 20}, + {0, 30}, + }); + BOOST_REQUIRE( stride(lyt) == 20*30L ); + } + { + multi::layout_t<1> const lyt({ + {0, 10}, + }); + BOOST_REQUIRE( extension(lyt).first() == 0 ); + BOOST_REQUIRE( extension(lyt).last() == 10 ); + } + { + multi::layout_t<1> const lyt({ + {8, 18}, + }); + BOOST_REQUIRE( extension(lyt).first() == 8 ); + BOOST_REQUIRE( extension(lyt).last() == 18 ); + } + { + multi::layout_t<2> const lyt(multi::extensions_t<2>({ + {0, 10}, + {0, 20}, + })); + BOOST_REQUIRE( extension(lyt).first() == 0 ); + BOOST_REQUIRE( extension(lyt).last() == 10 ); + } + // { // this is ambiguous in nvcc + // multi::layout_t<2> const lyt({ + // {0, 10}, + // {0, 20}, + // }); + // BOOST_REQUIRE( extension(lyt).first() == 0 ); + // BOOST_REQUIRE( extension(lyt).last() == 10 ); + // } + { + multi::layout_t<2> const lyt(multi::extensions_t<2>({ + { 0, 10}, + {11, 31}, + })); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( stride(lyt) == 20 ); + BOOST_REQUIRE( offset(lyt) == 0 ); + } + { // this is ambiguous in nvcc + multi::layout_t<2> const lyt(multi::extensions_t<2>({ + { 0, 10}, + {11, 31}, + })); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( stride(lyt) == 20 ); + BOOST_REQUIRE( offset(lyt) == 0 ); + } + { + multi::layout_t<2> const lyt(multi::extensions_t<2>({ + {8, 18}, + {0, 20}, + })); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( stride(lyt) == 20 ); + } + // { + // multi::layout_t<3> const lyt(multi::extensions_t<3>({ + // { 0, 3}, + // { 0, 5}, + // {10, 17}, + // })); + // BOOST_REQUIRE( stride(lyt) == 5*7L ); + // BOOST_REQUIRE( stride(lyt.sub().sub()) == 1 ); + // } + { + multi::layout_t<3> const lyt({ + {0, 10}, + {0, 20}, + {0, 30}, + }); + BOOST_REQUIRE( size(lyt) == 10 ); + BOOST_REQUIRE( stride(lyt) == 20*30L ); + BOOST_REQUIRE( offset(lyt) == 0 ); + BOOST_REQUIRE( nelems(lyt) == 10*20L*30L ); + } + { + multi::layout_t<3> const lyt({ + {10, 20}, + {10, 30}, + {10, 40}, + }); + BOOST_REQUIRE( stride(lyt) == 20*30L ); + } + { + auto const ttt = boost::multi::tuple{1, 2, 3}; + auto const arr = std::apply([](auto... elems) { return std::array{{elems...}}; }, ttt); + BOOST_REQUIRE(arr[1] == 2); + } } -//BOOST_AUTO_TEST_CASE(tuple_zip_test) { // TODO(correaa) make it work -// auto t1 = std::make_tuple( 1, 2, 3); -// auto t2 = std::make_tuple(10, 20, 30); -// auto t3 = std::make_tuple(std::string{"10"}, std::string{"20"}, std::string{"30"}); -// auto t123 = boost::multi::detail::tuple_zip(t1, t2, t3); -// BOOST_REQUIRE( std::get<2>(std::get<0>(t123)) == std::string{"10"} ); -//} +// BOOST_AUTO_TEST_CASE(tuple_zip_test) { // TODO(correaa) make it work +// auto t1 = std::make_tuple( 1, 2, 3); +// auto t2 = std::make_tuple(10, 20, 30); +// auto t3 = std::make_tuple(std::string{"10"}, std::string{"20"}, std::string{"30"}); +// auto t123 = boost::multi::detail::tuple_zip(t1, t2, t3); +// BOOST_REQUIRE( std::get<2>(std::get<0>(t123)) == std::string{"10"} ); +// } BOOST_AUTO_TEST_CASE(extensions_from_linear_1d) { - multi::extensions_t<1> exts{11}; + multi::extensions_t<1> const exts{11}; auto ijk = exts.from_linear(9); using multi::detail::get; BOOST_TEST_REQUIRE( get<0>(ijk) == 9 ); - multi::layout_t<1> lyt{exts}; + multi::layout_t<1> const lyt{exts}; BOOST_TEST_REQUIRE( lyt[get<0>(ijk)] == 9 ); BOOST_TEST_REQUIRE( lyt(get<0>(ijk)) == 9 ); @@ -598,33 +810,34 @@ BOOST_AUTO_TEST_CASE(extensions_from_linear_1d) { } BOOST_AUTO_TEST_CASE(extensions_from_linear_2d_structured_binding) { - multi::extensions_t<2> exts{3, 5}; + multi::extensions_t<2> const exts{3, 5}; auto [eye, jay] = exts.from_linear(7); BOOST_TEST_REQUIRE( eye == 1 ); BOOST_TEST_REQUIRE( jay == 2 ); -// BOOST_TEST_REQUIRE( std::apply(l, l.extensions().from_linear(9)) == 9 ); + // BOOST_TEST_REQUIRE( std::apply(l, l.extensions().from_linear(9)) == 9 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_2d_std_get) { - multi::extensions_t<2> exts{3, 5}; - auto eye = std::get<0>(exts.from_linear(7)); - auto jay = std::get<1>(exts.from_linear(7)); + multi::extensions_t<2> const exts{3, 5}; + auto eye = std::get<0>(exts.from_linear(7)); + auto jay = std::get<1>(exts.from_linear(7)); BOOST_TEST_REQUIRE( eye == 1 ); BOOST_TEST_REQUIRE( jay == 2 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_2d_std_get_using) { - multi::extensions_t<2> exts{3, 5}; + multi::extensions_t<2> const exts{3, 5}; using std::get; - auto const eye = get<0>(exts.from_linear(7)); - auto const jay = get<1>(exts.from_linear(7)); + auto fl = exts.from_linear(7L); + auto const eye = get<0>(fl); + auto const jay = get<1>(fl); BOOST_TEST_REQUIRE( eye == 1 ); BOOST_TEST_REQUIRE( jay == 2 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_2d_get_using) { - multi::extensions_t<2> exts{3, 5}; + multi::extensions_t<2> const exts{3, 5}; using multi::detail::get; @@ -635,7 +848,7 @@ BOOST_AUTO_TEST_CASE(extensions_from_linear_2d_get_using) { } BOOST_AUTO_TEST_CASE(extensions_from_linear_2d) { - multi::extensions_t<2> exts{3, 5}; + multi::extensions_t<2> const exts{3, 5}; auto ij = exts.from_linear(7); @@ -644,12 +857,12 @@ BOOST_AUTO_TEST_CASE(extensions_from_linear_2d) { BOOST_TEST_REQUIRE( get<0>(ij) == 1 ); BOOST_TEST_REQUIRE( get<1>(ij) == 2 ); - multi::layout_t<2> lyt{exts}; + multi::layout_t<2> const lyt{exts}; BOOST_TEST_REQUIRE( lyt[get<0>(ij)][get<1>(ij)] == 7 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_3d_std_get) { - multi::extensions_t<3> exts{11, 13, 17}; + multi::extensions_t<3> const exts{11, 13, 17}; BOOST_TEST_REQUIRE( std::get<0>(exts.from_linear( 0)) == 0 ); BOOST_TEST_REQUIRE( std::get<1>(exts.from_linear( 0)) == 0 ); @@ -671,14 +884,15 @@ BOOST_AUTO_TEST_CASE(extensions_from_linear_3d_std_get) { BOOST_TEST_REQUIRE( std::get<1>(exts.from_linear(18)) == 1 ); BOOST_TEST_REQUIRE( std::get<2>(exts.from_linear(18)) == 1 ); - multi::layout_t<3> lyt{exts}; + multi::layout_t<3> const lyt{exts}; + using std::get; BOOST_TEST_REQUIRE( lyt[get<0>(exts.from_linear(19))][get<1>(exts.from_linear(19))][get<2>(exts.from_linear(19))] == 19 ); BOOST_TEST_REQUIRE( lyt(get<0>(exts.from_linear(19)), get<1>(exts.from_linear(19)), get<2>(exts.from_linear(19))) == 19 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_3d_std_get_using) { - multi::extensions_t<3> exts{11, 13, 17}; + multi::extensions_t<3> const exts{11, 13, 17}; using std::get; @@ -706,73 +920,137 @@ BOOST_AUTO_TEST_CASE(extensions_from_linear_3d_std_get_using) { BOOST_TEST_REQUIRE( get<1>(exts.from_linear(19)) == 1 ); BOOST_TEST_REQUIRE( get<2>(exts.from_linear(19)) == 2 ); - multi::layout_t<3> lyt{exts}; + multi::layout_t<3> const lyt{exts}; BOOST_TEST_REQUIRE( lyt[get<0>(exts.from_linear(19))][get<1>(exts.from_linear(19))][get<2>(exts.from_linear(19))] == 19 ); BOOST_TEST_REQUIRE( lyt(get<0>(exts.from_linear(19)), get<1>(exts.from_linear(19)), get<2>(exts.from_linear(19))) == 19 ); } BOOST_AUTO_TEST_CASE(extensions_from_linear_3d_struct_bind) { - multi::extensions_t<3> exts{11, 13, 17}; + multi::extensions_t<3> const exts{11, 13, 17}; using std::get; { - auto [eye, jay, kay] = exts.from_linear( 0); - BOOST_TEST_REQUIRE( eye == 0 ); - BOOST_TEST_REQUIRE( jay == 0 ); - BOOST_TEST_REQUIRE( kay == 0 ); + auto [eye, jay, kay] = exts.from_linear(0); + BOOST_TEST_REQUIRE(eye == 0); + BOOST_TEST_REQUIRE(jay == 0); + BOOST_TEST_REQUIRE(kay == 0); } { - auto [eye, jay, kay] = exts.from_linear( 1); - BOOST_TEST_REQUIRE( eye == 0 ); - BOOST_TEST_REQUIRE( jay == 0 ); - BOOST_TEST_REQUIRE( kay == 1 ); + auto [eye, jay, kay] = exts.from_linear(1); + BOOST_TEST_REQUIRE(eye == 0); + BOOST_TEST_REQUIRE(jay == 0); + BOOST_TEST_REQUIRE(kay == 1); } { - auto [eye, jay, kay] = exts.from_linear(16); - BOOST_TEST_REQUIRE( eye == 0 ); - BOOST_TEST_REQUIRE( jay == 0 ); - BOOST_TEST_REQUIRE( kay == 16 ); + auto [eye, jay, kay] = exts.from_linear(16); + BOOST_TEST_REQUIRE(eye == 0); + BOOST_TEST_REQUIRE(jay == 0); + BOOST_TEST_REQUIRE(kay == 16); } { - auto [eye, jay, kay] = exts.from_linear(17); - BOOST_TEST_REQUIRE( eye == 0 ); - BOOST_TEST_REQUIRE( jay == 1 ); - BOOST_TEST_REQUIRE( kay == 0 ); + auto [eye, jay, kay] = exts.from_linear(17); + BOOST_TEST_REQUIRE(eye == 0); + BOOST_TEST_REQUIRE(jay == 1); + BOOST_TEST_REQUIRE(kay == 0); } { - auto [eye, jay, kay] = exts.from_linear(18); - BOOST_TEST_REQUIRE( eye == 0 ); - BOOST_TEST_REQUIRE( jay == 1 ); - BOOST_TEST_REQUIRE( kay == 1 ); - - multi::layout_t<3> lyt{exts}; - BOOST_TEST_REQUIRE( lyt[eye][jay][kay] == 18 ); - BOOST_TEST_REQUIRE( lyt(eye, jay, kay) == 18 ); + auto [eye, jay, kay] = exts.from_linear(18); + BOOST_TEST_REQUIRE(eye == 0); + BOOST_TEST_REQUIRE(jay == 1); + BOOST_TEST_REQUIRE(kay == 1); + + multi::layout_t<3> const lyt{exts}; + BOOST_TEST_REQUIRE(lyt[eye][jay][kay] == 18); + BOOST_TEST_REQUIRE(lyt(eye, jay, kay) == 18); } } BOOST_AUTO_TEST_CASE(extensions_from_linear_3d) { - multi::extensions_t<3> exts{11, 13, 17}; + multi::extensions_t<3> const exts{11, 13, 17}; auto ijk = exts.from_linear(19); { - using std::get; - BOOST_TEST_REQUIRE( get<0>(exts.from_linear(19)) == 0 ); - BOOST_TEST_REQUIRE( get<1>(exts.from_linear(19)) == 1 ); - BOOST_TEST_REQUIRE( get<2>(exts.from_linear(19)) == 2 ); + using std::get; + BOOST_TEST_REQUIRE(get<0>(exts.from_linear(19)) == 0); + BOOST_TEST_REQUIRE(get<1>(exts.from_linear(19)) == 1); + BOOST_TEST_REQUIRE(get<2>(exts.from_linear(19)) == 2); } { - using std::get; -// using multi::detail::get; - BOOST_TEST_REQUIRE( get<0>(ijk) == 0 ); - BOOST_TEST_REQUIRE( get<1>(ijk) == 1 ); - BOOST_TEST_REQUIRE( get<2>(ijk) == 2 ); + using std::get; + // using multi::detail::get; + BOOST_TEST_REQUIRE(get<0>(ijk) == 0); + BOOST_TEST_REQUIRE(get<1>(ijk) == 1); + BOOST_TEST_REQUIRE(get<2>(ijk) == 2); - multi::layout_t<3> lyt{exts}; + multi::layout_t<3> const lyt{exts}; + + BOOST_TEST_REQUIRE(lyt[get<0>(ijk)][get<1>(ijk)][get<2>(ijk)] == 19); + BOOST_TEST_REQUIRE(lyt(get<0>(ijk), get<1>(ijk), get<2>(ijk)) == 19); + } +} + +BOOST_AUTO_TEST_CASE(extension_1D_iteration) { + multi::extension_t const ext(10); + BOOST_TEST_REQUIRE(ext[0] == 0); + BOOST_TEST_REQUIRE(ext[1] == 1); +} - BOOST_TEST_REQUIRE( lyt[get<0>(ijk)][get<1>(ijk)][get<2>(ijk)] == 19 ); - BOOST_TEST_REQUIRE( lyt(get<0>(ijk), get<1>(ijk), get<2>(ijk)) == 19 ); +BOOST_AUTO_TEST_CASE(extensionS_1D_iteration) { + { + multi::extensions_t<1> const exts(10); + BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); + BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); + } + { + multi::extensions_t<1> const exts(multi::iextension{0, 10}); + BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); + BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); } } +// BOOST_AUTO_TEST_CASE(extensionS_2D_iteration) { +// { +// multi::extensions_t<2> exts({3, 5}); +// BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); +// BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); +// } +// { +// multi::extensions_t<2> exts({multi::iextension{0, 3}, multi::iextension{0, 5}}); +// BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); +// BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); +// } +// } + +BOOST_AUTO_TEST_CASE(layout_1D_iteration) { + multi::layout_t<1> const lyt{multi::extensions_t<1>(10)}; + BOOST_REQUIRE( lyt[0] == 0 ); + BOOST_REQUIRE( lyt[1] == 1 ); + BOOST_REQUIRE( lyt[2] == 2 ); + + // BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); + // BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); +} + +BOOST_AUTO_TEST_CASE(layout_2D_iteration) { + multi::layout_t<2> const lyt{multi::extensions_t<2>({5, 3})}; + BOOST_REQUIRE( lyt[0][0] == 0 ); + BOOST_REQUIRE( lyt[0][1] == 1 ); + BOOST_REQUIRE( lyt[0][2] == 2 ); + + BOOST_REQUIRE( lyt[1][0] == 3 ); + BOOST_REQUIRE( lyt[1][1] == 4 ); + BOOST_REQUIRE( lyt[1][2] == 5 ); + + // BOOST_TEST_REQUIRE(std::get<0>(exts[0]) == 0); + // BOOST_TEST_REQUIRE(std::get<0>(exts[1]) == 1); +} + +#else + +int main() +{ + return 0; +} + +#endif diff --git a/external_codes/boost_multi/multi/test/main.cpp b/external_codes/boost_multi/multi/test/main.cpp index 191adf4b21..294306fcde 100644 --- a/external_codes/boost_multi/multi/test/main.cpp +++ b/external_codes/boost_multi/multi/test/main.cpp @@ -1,17 +1,21 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa +// Copyright 2023-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -// C++ Unit Tests for Multi empty main -#include "multi/array.hpp" - -#include +#include namespace multi = boost::multi; auto main() -> int { + multi::array arr = { + { 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + {10, 11, 12, 13, 14}, + {15, 16, 17, 18, 19}, + }; - multi::array arr({10, 15}, 99.); - - if( arr[1][2] != 99. ) {return 1;} - + if(arr[2][3] != 13) { + return 1; + } + return 0; } diff --git a/external_codes/boost_multi/multi/test/member_array_cast.cpp b/external_codes/boost_multi/multi/test/member_array_cast.cpp index d03818ef96..a9a4afd944 100644 --- a/external_codes/boost_multi/multi/test/member_array_cast.cpp +++ b/external_codes/boost_multi/multi/test/member_array_cast.cpp @@ -1,121 +1,208 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi member cast" -#include - -#include "multi/array.hpp" +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4324) // Explicit padding required +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; -using v3d = std::array; - BOOST_AUTO_TEST_CASE(member_array_cast_soa_aos) { -// some members might need explicit padding to work well with member_cast -struct particle{ - double mass; - v3d position alignas(2*sizeof(double)); // __attribute__((aligned(2*sizeof(double)))) -}; + using v3d = std::array; -class particles_soa { - multi::array masses_; - multi::array positions_; - - public: - // NOLINTNEXTLINE(runtime/explicit) - particles_soa(multi::array const& AoS) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : particle_soa can represent a particles' AoS - : masses_ {AoS.member_cast(&particle::mass )} - , positions_{AoS.member_cast(&particle::position)} {} - - struct reference { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) - double& mass; // NOLINT(misc-non-private-member-variables-in-classes): exposed by design - v3d& position; // NOLINT(misc-non-private-member-variables-in-classes): exposed by design - operator particle() const {return {mass, position};} // NOLINT(google-explicit-constructor, hicpp-explicit-conversions): allow equal assignment - auto operator+() const {return operator particle();} - - reference(double& mss, v3d& pos) : mass{mss}, position{pos} {} // NOLINT(google-runtime-references) - - private: // NOLINT(whitespace/indent) : bug in cpplint 1.5.5 - friend class particles_soa; - reference(reference const&) = default; - // reference(reference&&) = default; - - public: // NOLINT(whitespace/indent) : bug in cpplint 1.5.5 - // ~reference() noexcept = default; // lints cppcoreguidelines-special-member-functions,hicpp-special-member-functions - // #endif - - // NOLINTNEXTLINE(cert-oop54-cpp, fuchsia-trailing-return): simulate reference - auto operator=(reference const& other) -> reference& { - std::tie(mass, position) = std::tie(other.mass, other.position); - return *this; - } - // NOLINTNEXTLINE(fuchsia-trailing-return): simulate reference - auto operator=(reference&& other) noexcept -> reference& {operator=(other); return *this;} - - auto operator==(reference const& other) const {return std::tie(mass, position) == std::tie(other.mass, other.position);} - auto operator!=(reference const& other) const {return std::tie(mass, position) != std::tie(other.mass, other.position);} + // some members might need explicit padding to work well with member_cast + struct particle { + int mass; + v3d position alignas(2 * sizeof(double)); // __attribute__((aligned(2*sizeof(double)))) }; - auto operator()(int eye, int jay){return reference{masses_[eye][jay], positions_[eye][jay]};} -}; + class particles_soa { + multi::array masses_; + multi::array positions_; + + public: // NOLINT(whitespace/indent) nested class + // NOLINTNEXTLINE(runtime/explicit) + explicit particles_soa(multi::array const& AoS) // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) : particle_soa can represent a particles' AoS + : masses_{AoS.member_cast(&particle::mass)}, positions_{AoS.member_cast(&particle::position)} {} + + struct reference { // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions) // NOSONAR + int& mass; // NOLINT(misc-non-private-member-variables-in-classes,cppcoreguidelines-avoid-const-or-ref-data-members) exposed by design + v3d& position; // NOLINT(misc-non-private-member-variables-in-classes,cppcoreguidelines-avoid-const-or-ref-data-members) exposed by design + + operator particle() const { return {mass, position}; } // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) // NOSONAR(cpp:S1709) allow direct assignment + auto operator+() const { return operator particle(); } + + reference(int& mss, v3d& pos) : mass{mss}, position{pos} {} // NOLINT(google-runtime-references) + // unused: explicit reference(particle& other) : reference{other.mass, other.position} {} + + private: // NOLINT(whitespace/indent) nested class + friend class particles_soa; + + public: // NOLINT(whitespace/indent) nested class + auto operator=(reference const& other) && -> reference& { + if(this == std::addressof(other)) { + return *this; + } + std::tie(mass, position) = std::tie(other.mass, other.position); + return *this; + } + + auto operator==(reference const& other) const { return std::tie(mass, position) == std::tie(other.mass, other.position); } + auto operator!=(reference const& other) const { return std::tie(mass, position) != std::tie(other.mass, other.position); } + }; + + auto operator()(int eye, int jay) { return reference{masses_[eye][jay], positions_[eye][jay]}; } + }; multi::array AoS({2, 2}, particle{}); - AoS[1][1] = particle{99., v3d{{1., 2.}} }; + AoS[1][1] = particle{99, v3d{{1.0, 2.0}}}; - auto&& masses = AoS.member_cast(&particle::mass); - BOOST_REQUIRE( size(masses) == 2 ); - BOOST_REQUIRE( masses[1][1] == 99. ); + auto&& masses = AoS.member_cast(&particle::mass); + BOOST_REQUIRE(size(masses) == 2); + BOOST_REQUIRE(masses[1][1] == 99 ); - multi::array masses_copy = masses; - BOOST_REQUIRE( &masses_copy[1][1] != &masses[1][1] ); + multi::array masses_copy = masses; + BOOST_REQUIRE(&masses_copy[1][1] != &masses[1][1]); particles_soa SoA{AoS}; - BOOST_REQUIRE(SoA(1, 1).mass == 99. ); + BOOST_REQUIRE( SoA(1, 1).mass == 99 ); - particle p11 = SoA(1, 1); - BOOST_REQUIRE(p11.mass == 99. ); + particle const p11 = SoA(1, 1); + BOOST_REQUIRE(p11.mass == 99 ); auto autop11 = +SoA(1, 1); - BOOST_REQUIRE(autop11.mass == 99. ); + BOOST_REQUIRE(autop11.mass == 99 ); SoA(1, 1).mass = 88; - BOOST_REQUIRE(SoA(1, 1).mass == 88. ); + BOOST_REQUIRE( SoA(1, 1).mass == 88 ); SoA(1, 1) = SoA(0, 0); - BOOST_REQUIRE(SoA(1, 1).mass == SoA(0, 0).mass ); - BOOST_REQUIRE(SoA(1, 1) == SoA(0, 0) ); - BOOST_REQUIRE(not (SoA(1, 1) != SoA(0, 0)) ); + BOOST_REQUIRE( SoA(1, 1).mass == SoA(0, 0).mass); + BOOST_REQUIRE( SoA(1, 1) == SoA(0, 0)); + BOOST_REQUIRE( ! (SoA(1, 1) != SoA(0, 0))); } -struct alignas(32) employee { +struct employee_dummy { + std::string name; + // NOLINTNEXTLINE(runtime/int) + short salary; // NOLINT(google-runtime-int) + std::size_t age; +}; + +struct employee { std::string name; - int16_t salary; + // NOLINTNEXTLINE(runtime/int) + short salary; // NOLINT(google-runtime-int) std::size_t age; -// private: // char padding_[9];// std::array padding_; // use alignment or padding to allow member_cast + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) + char padding_[(((offsetof(employee_dummy, age) + sizeof(age)) / sizeof(std::string) + 1) * sizeof(std::string) - (offsetof(employee_dummy, age) + sizeof(age)))] = {}; }; +// TODO(correaa) this doesn't work with NVCC (triggered by adl fill) +#if !(defined(__NVCC__) || defined(__HIPCC__)) BOOST_AUTO_TEST_CASE(member_array_cast_soa_aos_employee) { - multi::array d1D = { {"Al" , 1430, 35}, {"Bob" , 3212, 34} }; + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + multi::array d1D = { + { "Al"s, 1430, 35}, + {"Bob"s, 3212, 34}, + }; + auto&& d1D_names = d1D.member_cast(&employee::name); - BOOST_REQUIRE( size(d1D_names) == size(d1D) ); - BOOST_REQUIRE( d1D_names[1] == d1D[1].name ); - BOOST_REQUIRE( &d1D_names[1] == &d1D[1].name ); + BOOST_REQUIRE(size(d1D_names) == size(d1D)); + BOOST_REQUIRE(d1D_names[1] == d1D[1].name); + BOOST_REQUIRE(&d1D_names[1] == &d1D[1].name); multi::array d2D = { - { {"Al" , 1430, 35}, {"Bob" , 3212, 34} }, - { {"Carl", 1589, 32}, {"David", 2300, 38} } + { {"Al"s, 1430, 35}, {"Bob"s, 3212, 34}}, + {{"Carl"s, 1589, 32}, {"David"s, 2300, 38}}, }; - BOOST_REQUIRE( d2D[0][0].name == "Al" ); - BOOST_REQUIRE( d2D[0][0].salary == 1430 ); - BOOST_REQUIRE( d2D[0][0].age == 35 ); + BOOST_REQUIRE(d2D[0][0].name == "Al"); + BOOST_REQUIRE(d2D[0][0].salary == 1430); + BOOST_REQUIRE(d2D[0][0].age == 35); auto&& d2D_names = d2D.member_cast(&employee::name); - BOOST_REQUIRE( size(d2D_names) == size(d2D) ); - BOOST_REQUIRE( d2D_names[1][1] == "David" ); - + BOOST_REQUIRE(size(d2D_names) == size(d2D)); + BOOST_REQUIRE(d2D_names[1][1] == "David"); + +#if !(defined(__clang__) && defined(__CUDACC__)) +#if !defined(__circle_build__) || (__circle_build__ > 200 ) + multi::array d2D_names_copy_members = d2D.element_transformed(&employee::name); + BOOST_REQUIRE(d2D_names_copy_members[1][1] == "David"); + BOOST_REQUIRE(d2D_names_copy_members == d2D_names); +#endif +#endif + +#if !(defined(__clang__) && defined(__CUDACC__)) multi::array d2D_names_copy{d2D_names}; - BOOST_REQUIRE( d2D_names == d2D_names_copy ); - BOOST_REQUIRE( base(d2D_names) != base(d2D_names_copy) ); + BOOST_REQUIRE(d2D_names == d2D_names_copy); + BOOST_REQUIRE(base(d2D_names) != base(d2D_names_copy)); +#endif } +#endif + +#if !defined(__circle_build__) || (__circle_build__ > 200 ) +BOOST_AUTO_TEST_CASE(element_transformed_from_member) { + struct record { + int id; + double data; + }; + + multi::array const recs = { + {{1, 1.1}, {2, 2.2}}, + {{3, 3.3}, {4, 4.4}}, + }; + + // multi::array ids = recs.element_transformed(std::mem_fn(& A::id)); + multi::array ids = recs.element_transformed(&record::id); + BOOST_REQUIRE( ids[1][1] == 4 ); + BOOST_REQUIRE( ids == recs.member_cast(&record::id) ); + + // recs.element_transformed(std::mem_fn(& A::id) )[1][1] = 5; // not assignable, ok + // BOOST_REQUIRE( recs[1][1].id == 5 ); +} +#endif + +// TODO(correaa) this doesn't work with NVCC (triggered by adl fill) +#if !(defined(__NVCC__) || defined(__HIPCC__)) +BOOST_AUTO_TEST_CASE(element_transformed_from_member_no_amp) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + + multi::array d2D = { + { {"Al"s, 1430, 35}, {"Bob"s, 3212, 34}}, + {{"Carl"s, 1589, 32}, {"David"s, 2300, 38}}, + }; + + // multi::array d2D_ages_copy = + d2D.element_transformed(std::mem_fn(&employee::age)); + BOOST_REQUIRE( d2D.element_transformed(std::mem_fn(&employee::age)) == d2D.element_transformed(&employee::age) ); +} +#endif diff --git a/external_codes/boost_multi/multi/test/minimalistic_ptr.cpp b/external_codes/boost_multi/multi/test/minimalistic_ptr.cpp index 167f1f9c4b..8d2d152558 100644 --- a/external_codes/boost_multi/multi/test/minimalistic_ptr.cpp +++ b/external_codes/boost_multi/multi/test/minimalistic_ptr.cpp @@ -1,18 +1,42 @@ // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi minimalistic pointer" -#include - -#include - -#include "multi/array_ref.hpp" +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; namespace minimalistic { -template class ptr : public std::iterator_traits { // minimalistic pointer +template +class ptr : public std::iterator_traits { // minimalistic pointer using underlying_type = T*; underlying_type impl_; template friend class ptr; @@ -22,7 +46,7 @@ template class ptr : public std::iterator_traits { // minimalistic constexpr explicit ptr(T* impl) : impl_{impl} {} template{}> > // cppcheck-suppress [noExplicitConstructor,unmatchedSuppression] - ptr(ptr const& other) : impl_{other.impl_} {} // NOLINT(google-explicit-constructor, hicpp-explicit-conversions): ptr -> ptr + ptr(ptr const& other) : impl_{other.impl_} {} // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) // NOSONAR(cpp:S1709) ptr -> ptr using typename std::iterator_traits::reference; using typename std::iterator_traits::difference_type; // NOLINTNEXTLINE(fuchsia-overloaded-operator, fuchsia-trailing-return): operator* used because this class simulates a pointer, trailing return helps @@ -33,14 +57,15 @@ template class ptr : public std::iterator_traits { // minimalistic // NOLINTNEXTLINE(fuchsia-overloaded-operator, cppcoreguidelines-pro-bounds-pointer-arithmetic): operator+ is overloaded to simulate a pointer constexpr auto operator-(difference_type n) const {return ptr{impl_ - n};} -// T& operator[](difference_type n) const{return impl_[n];} // optional +// T& operator[](difference_type n) const{return impl_[n];} // optional using default_allocator_type = std::allocator; template auto operator==(ptr const& other) const& {return impl_ == other.impl_;} template friend class ptr2; }; -template class ptr2 : public std::iterator_traits { // minimalistic pointer +template +class ptr2 : public std::iterator_traits { // minimalistic pointer T* impl_; public: @@ -48,7 +73,7 @@ template class ptr2 : public std::iterator_traits { // minimalistic constexpr explicit ptr2(ptr const& other) : impl_{other.impl_} {} template>> // cppcheck-suppress [noExplicitConstructor, unmatchedSuppression] - ptr2(ptr2 const& other) : impl_{other.impl_} {} // NOLINT(google-explicit-constructor, hicpp-explicit-conversions): ptr -> ptr + ptr2(ptr2 const& other) : impl_{other.impl_} {} // NOLINT(google-explicit-constructor, hicpp-explicit-conversions) // NOSONAR(cpp:S1709) ptr -> ptr using typename std::iterator_traits::reference; using typename std::iterator_traits::difference_type; @@ -61,7 +86,7 @@ template class ptr2 : public std::iterator_traits { // minimalistic // NOLINTNEXTLINE(fuchsia-overloaded-operator, cppcoreguidelines-pro-bounds-pointer-arithmetic): operator+ is overloaded to simulate a pointer constexpr auto operator-(difference_type n) const {return ptr2{impl_ - n};} -// T& operator[](std::ptrdiff_t n) const{return impl_[n];} // optional +// T& operator[](std::ptrdiff_t n) const{return impl_[n];} // optional using default_allocator_type = std::allocator; }; @@ -72,9 +97,9 @@ BOOST_AUTO_TEST_CASE(test_minimalistic_ptr) { BOOST_REQUIRE( buffer.size() == 400 ); using pointer_type = minimalistic::ptr; - multi::array_ptr CCP(pointer_type{buffer.data()}, {20, 20}); - (*CCP)[2]; // requires operator+ - (*CCP)[1][1]; // requires operator* + multi::array_ptr const CCP(pointer_type{buffer.data()}, {20, 20}); + (*CCP)[2]; // requires operator+ + (*CCP)[1][1]; (*CCP)[1][1] = 9; BOOST_REQUIRE( &(*CCP)[1][1] == &buffer[21] ); @@ -83,8 +108,8 @@ BOOST_AUTO_TEST_CASE(test_minimalistic_ptr) { static_assert( std::is_convertible{}, "!"); - minimalistic::ptr pd{nullptr}; - minimalistic::ptr pcd = pd; + minimalistic::ptr const pd{nullptr}; + minimalistic::ptr const pcd = pd; BOOST_REQUIRE( pcd == pd ); { diff --git a/external_codes/boost_multi/multi/test/move.cpp b/external_codes/boost_multi/multi/test/move.cpp index b31cc697c3..f37bb0f68c 100644 --- a/external_codes/boost_multi/multi/test/move.cpp +++ b/external_codes/boost_multi/multi/test/move.cpp @@ -1,17 +1,39 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2020-2022 Alfredo A. Correa +// Copyright 2020-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi move" -#include - -#include - -#include "multi/array.hpp" +#include #include // for std::move #include #include +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#include // this is from Boost.MultiArray, not this library + namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(move_unique_ptr_1D) { @@ -39,7 +61,7 @@ BOOST_AUTO_TEST_CASE(move_unique_ptr_1D) { multi::array, 1> arr(multi::extensions_t<1>{10}); arr[1] = std::make_unique(42); - multi::array, 1> arr2;//(multi::extensions_t<1>{10}); + multi::array, 1> arr2; // (multi::extensions_t<1>{10}); arr2 = std::move(arr); BOOST_REQUIRE( arr.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) BOOST_REQUIRE( arr2[1] ); @@ -51,7 +73,7 @@ BOOST_AUTO_TEST_CASE(move_unique_ptr_1D) { multi::array, 1> arr2(multi::extensions_t<1>{10}); // arr2() = arr(); // fails to compile, elements are not copy assignable - arr2() = arr().moved(); + arr2() = arr().element_moved(); BOOST_REQUIRE( !arr[1] ); BOOST_REQUIRE( arr2[1] ); BOOST_REQUIRE( *arr2[1] == 42 ); @@ -59,81 +81,99 @@ BOOST_AUTO_TEST_CASE(move_unique_ptr_1D) { } BOOST_AUTO_TEST_CASE(multi_swap) { - multi::array arr({3, 5}, 99.); - multi::array arr2({7, 11}, 88.); +#ifndef _MSC_VER // problems with 14.3 c++17 + multi::array arr({3, 5}, 99.0); + multi::array arr2({7, 11}, 88.0); +#else + multi::array arr(multi::extensions_t<2>{3, 5}, 99.0); + multi::array arr2(multi::extensions_t<2>{7, 11}, 88.0); +#endif + swap(arr, arr2); + BOOST_REQUIRE( size(arr) == 7 ); - BOOST_REQUIRE( arr[1][2] == 88. ); - BOOST_REQUIRE( arr2[1][2] == 99. ); + BOOST_REQUIRE( arr[1][2] == 88.0 ); + BOOST_REQUIRE( arr2[1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_std_swap) { - multi::array arr({3, 5}, 99.); - multi::array arr2({7, 11}, 88.); +#ifndef _MSC_VER // problems with 14.3 c++17 + multi::array arr({3, 5}, 99.0); + multi::array arr2({7, 11}, 88.0); +#else + multi::array arr(multi::extensions_t<2>{3, 5}, 99.0); + multi::array arr2(multi::extensions_t<2>{7, 11}, 88.0); +#endif + using std::swap; swap(arr, arr2); + BOOST_REQUIRE( size(arr) == 7 ); - BOOST_REQUIRE( arr[1][2] == 88. ); - BOOST_REQUIRE( arr2[1][2] == 99. ); + BOOST_REQUIRE( arr[1][2] == 88.0 ); + BOOST_REQUIRE( arr2[1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_array_clear) { - multi::array arr({10, 10}, 99.); + multi::array arr({10, 10}, 99.0); + arr.clear(); + BOOST_REQUIRE(arr.is_empty()); - arr.reextent({20, 20}, 99.); - BOOST_REQUIRE(not arr.is_empty()); - clear(arr).reextent({30, 30}, 88.); - BOOST_REQUIRE(arr[15][15] == 88.); + + arr.reextent({20, 20}, 99.0); + // BOOST_REQUIRE(! arr.is_empty()); + + // clear(arr).reextent({30, 30}, 88.0); + // BOOST_REQUIRE(arr[15][15] == 88.0); } BOOST_AUTO_TEST_CASE(multi_array_move) { - std::vector > Av(10, multi::array({4, 5}, 99.)); + std::vector > Av(10, multi::array({4, 5}, 99.0)); // std::vector NOLINT(fuchsia-default-arguments-calls) multi::array arr2(std::move(Av[0]), std::allocator{}); BOOST_REQUIRE( is_empty(Av[0]) ); BOOST_REQUIRE( size(arr2) == 4 ); - BOOST_REQUIRE( arr2[1][2] == 99. ); + BOOST_REQUIRE( arr2[1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_array_move_into_vector) { - std::vector > Av(10, multi::array({4, 5}, 99.)); - std::vector > Bv; Bv.reserve(Av.size()); + std::vector > Av(10, multi::array({4, 5}, 99.0)); // NOLINT(fuchsia-default-arguments-calls) + std::vector > Bv; Bv.reserve(Av.size()); // NOLINT(fuchsia-default-arguments-calls) std::move( begin(Av), end(Av), std::back_inserter(Bv) ); BOOST_REQUIRE( size(Bv) == size(Av) ); BOOST_REQUIRE( is_empty(Av[4]) ); BOOST_REQUIRE( size(Bv[5]) == 4 ); - BOOST_REQUIRE( Bv[5][1][2] == 99. ); + BOOST_REQUIRE( Bv[5][1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_array_move_into_vector_reserve) { - std::vector > Av(10, multi::array({4, 5}, 99.)); + std::vector > Av(10, multi::array({4, 5}, 99.0)); // NOLINT(fuchsia-default-arguments-calls) std::vector > Bv; Bv.reserve(Av.size()); -// for(auto& v: Av) Bv.emplace_back(std::move(v), std::allocator{}); // segfaults nvcc 11.0 but not nvcc 11.1 +// for(auto& v: Av) Bv.emplace_back(std::move(v), std::allocator{}); // segfaults nvcc 11.0 but not nvcc 11.1 std::move(begin(Av), end(Av), std::back_inserter(Bv)); BOOST_REQUIRE( size(Bv) == size(Av) ); BOOST_REQUIRE( is_empty(Av[4]) ); BOOST_REQUIRE( size(Bv[5]) == 4 ); - BOOST_REQUIRE( Bv[5][1][2] == 99. ); + BOOST_REQUIRE( Bv[5][1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_array_move_into_vector_move) { - std::vector > Av(10, multi::array({4, 5}, 99.)); + std::vector > Av(10, multi::array({4, 5}, 99.0)); // std::vector NOLINT(fuchsia-default-arguments-calls) std::vector > Bv = std::move(Av); Av.clear(); BOOST_REQUIRE( size(Av) == 0 ); BOOST_REQUIRE( size(Bv) == 10 ); BOOST_REQUIRE( size(Bv[5]) == 4 ); - BOOST_REQUIRE( Bv[5][1][2] == 99. ); + BOOST_REQUIRE( Bv[5][1][2] == 99.0 ); } BOOST_AUTO_TEST_CASE(multi_array_move_array) { - multi::array, 2> arr({10, 10}, std::vector(5) ); + multi::array, 2> arr({10, 10}, std::vector(5) ); // std::vector NOLINT(fuchsia-default-arguments-calls) auto arr2 = std::move(arr); BOOST_REQUIRE( arr . empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) test deterministic moved from state BOOST_REQUIRE( arr .is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) test deterministic moved from state @@ -141,115 +181,113 @@ BOOST_AUTO_TEST_CASE(multi_array_move_array) { } BOOST_AUTO_TEST_CASE(multi_array_move_elements) { - multi::array, 1> arr({10}, std::vector(5) ); + multi::array, 1> arr({10}, std::vector(5) ); // std::vector NOLINT(fuchsia-default-arguments-calls) - std::vector> sink(5); + std::vector> sink(5); // std::vector NOLINT(fuchsia-default-arguments-calls) auto* ptr1 = arr[1].data(); - std::copy( arr({0, 5}).moved().begin(), arr({0, 5}).moved().end(), sink.begin() ); + std::copy( arr({0, 5}).element_moved().begin(), arr({0, 5}).element_moved().end(), sink.begin() ); BOOST_REQUIRE( arr[1].empty() ); - BOOST_REQUIRE( not arr[5].empty() ); + BOOST_REQUIRE( ! arr[5].empty() ); BOOST_REQUIRE( sink[1].data() == ptr1 ); } BOOST_AUTO_TEST_CASE(multi_array_move_elements_range) { - multi::array, 1> arr({10}, std::vector(5) ); + multi::array, 1> arr({10}, std::vector(5) ); // std::vector NOLINT(fuchsia-default-arguments-calls) - std::vector> sink(5); + std::vector> sink(5); // NOLINT(fuchsia-default-arguments-calls) auto* ptr1 = arr[1].data(); - std::copy( arr({0, 5}).moved().elements().begin(), arr({0, 5}).moved().elements().end(), sink.begin() ); + std::copy( arr({0, 5}).element_moved().elements().begin(), arr({0, 5}).element_moved().elements().end(), sink.begin() ); BOOST_REQUIRE( arr[1].empty() ); - BOOST_REQUIRE( not arr[5].empty() ); + BOOST_REQUIRE( ! arr[5].empty() ); BOOST_REQUIRE( sink[1].data() == ptr1 ); } BOOST_AUTO_TEST_CASE(multi_array_move_elements_to_array) { - multi::array, 1> arr({10}, std::vector(5, 99.) ); + multi::array, 1> arr({10}, std::vector(5, 99.0) ); // std::vector NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr.size() == 10 ); - multi::array, 1> arr2({ 5}, {}, {}); + multi::array, 1> arr2({ 5}, {}, {}); // std::vector NOLINT(fuchsia-default-arguments-calls) auto* ptr1 = arr[1].data(); - arr2().elements() = arr({0, 5}).moved().elements(); + arr2().elements() = arr({0, 5}).element_moved().elements(); BOOST_REQUIRE( arr2[1].size() == 5 ); - BOOST_REQUIRE( arr2[1][4] == 99. ); + BOOST_REQUIRE( arr2[1][4] == 99.0 ); BOOST_REQUIRE( arr[1].empty() ); - BOOST_REQUIRE( not arr[5].empty() ); + BOOST_REQUIRE( ! arr[5].empty() ); BOOST_REQUIRE( arr2[1].data() == ptr1 ); } BOOST_AUTO_TEST_CASE(move_range_vector_1D) { - std::vector> arr(10, std::vector{1., 2., 3.}); - std::vector> arr2(10); + std::vector> arr(10, std::vector{1.0, 2.0, 3.0}); // NOLINT(fuchsia-default-arguments-calls) + std::vector> arr2(10); // NOLINT(fuchsia-default-arguments-calls) std::move(arr.begin(), arr.end(), arr2.begin()); - BOOST_REQUIRE( arr2[0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0].empty() ); BOOST_REQUIRE( arr[1].empty() ); } BOOST_AUTO_TEST_CASE(copy_range_1D) { - multi::array, 1> arr({3}, std::vector{1., 2., 3.}); + multi::array, 1> arr({3}, std::vector{1.0, 2.0, 3.0}); // std::vector NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr.size() == 3 ); multi::array, 1> arr2({3}, std::vector{}); std::copy(arr.begin(), arr.end(), arr2.begin()); - BOOST_REQUIRE( arr2[0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) - BOOST_REQUIRE( arr[0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr[1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr [0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr [1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) } BOOST_AUTO_TEST_CASE(move_range_1D) { - multi::array, 1> arr({3}, std::vector{1., 2., 3.}); + multi::array, 1> arr({3}, std::vector{1.0, 2.0, 3.0}); // std::vector NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr.size() == 3 ); - multi::array, 1> arr2({3}, std::vector{}); + multi::array, 1> arr2({3}, std::vector{}); // std::vector NOLINT(fuchsia-default-arguments-calls) std::move(arr.begin(), arr.end(), arr2.begin()); - BOOST_REQUIRE( arr2[0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0].empty() ); BOOST_REQUIRE( arr[1].empty() ); } BOOST_AUTO_TEST_CASE(move_range_1D_moved_begin) { - multi::array, 1> arr({3}, std::vector{1., 2., 3.}); + multi::array, 1> arr({3}, std::vector{1.0, 2.0, 3.0}); // std::vector NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr.size() == 3 ); - multi::array, 1> arr2({3}, std::vector{}); + multi::array, 1> arr2({3}, std::vector{}); // std::vector NOLINT(fuchsia-default-arguments-calls) std::copy(arr.mbegin(), arr.mend(), arr2.begin()); - BOOST_REQUIRE( arr2[0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0].empty() ); BOOST_REQUIRE( arr[1].empty() ); } -template void what(Ts&&...) = delete; - BOOST_AUTO_TEST_CASE(copy_move_range) { - multi::array, 2> arr({10, 20}, std::vector{1., 2., 3.}); - multi::array, 2> arr2({10, 20}, std::vector{} ); + multi::array, 2> arr ({10, 20}, std::vector{1.0, 2.0, 3.0}); // std::vector NOLINT(fuchsia-default-arguments-calls) + multi::array, 2> arr2({10, 20}, std::vector{} ); // std::vector NOLINT(fuchsia-default-arguments-calls) std::copy(arr.mbegin(), arr.mend(), arr2.begin()); - BOOST_REQUIRE( arr2[0][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[0][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[0][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) - BOOST_REQUIRE( arr2[1][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[1][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0][0].empty() ); BOOST_REQUIRE( arr[0][1].empty() ); @@ -259,16 +297,16 @@ BOOST_AUTO_TEST_CASE(copy_move_range) { } BOOST_AUTO_TEST_CASE(copy_move_range_moved_begin) { - multi::array, 2> arr({10, 20}, std::vector{1., 2., 3.}); - multi::array, 2> arr2({10, 20}, std::vector{} ); + multi::array, 2> arr ({10, 20}, std::vector{1.0, 2.0, 3.0}); // NOLINT(fuchsia-default-arguments-calls) + multi::array, 2> arr2({10, 20}, std::vector{} ); // NOLINT(fuchsia-default-arguments-calls) - std::copy(arr.moved().begin(), arr.moved().end(), arr2.begin()); + std::copy(arr.element_moved().begin(), arr.element_moved().end(), arr2.begin()); - BOOST_REQUIRE( arr2[0][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[0][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[0][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) - BOOST_REQUIRE( arr2[1][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[1][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0][0].empty() ); BOOST_REQUIRE( arr[0][1].empty() ); @@ -278,16 +316,16 @@ BOOST_AUTO_TEST_CASE(copy_move_range_moved_begin) { } BOOST_AUTO_TEST_CASE(copy_move_range_moved_begin_block) { - multi::array, 2> arr({10, 20}, std::vector{1., 2., 3.}); - multi::array, 2> arr2({ 3, 5}, std::vector{} ); + multi::array, 2> arr ({10, 20}, std::vector{1.0, 2.0, 3.0}); // NOLINT(fuchsia-default-arguments-calls) + multi::array, 2> arr2({ 3, 5}, std::vector{} ); - std::copy(arr({5, 8}, {10, 15}).moved().begin(), arr({5, 8}, {10, 15}).moved().end(), arr2.begin()); + std::copy(arr({5, 8}, {10, 15}).element_moved().begin(), arr({5, 8}, {10, 15}).element_moved().end(), arr2.begin()); - BOOST_REQUIRE( arr2[0][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[0][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[0][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) - BOOST_REQUIRE( arr2[1][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[1][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[5][10].empty() ); BOOST_REQUIRE( arr[5][11].empty() ); @@ -298,17 +336,17 @@ BOOST_AUTO_TEST_CASE(copy_move_range_moved_begin_block) { BOOST_AUTO_TEST_CASE(move_reference_range) { - multi::array, 2> arr({10, 20}, std::vector{1., 2., 3.}); - multi::array, 2> arr2({10, 20}, std::vector{} ); + multi::array, 2> arr ({10, 20}, std::vector{1.0, 2.0, 3.0}); // std::vector NOLINT(fuchsia-default-arguments-calls) + multi::array, 2> arr2({10, 20}, std::vector{} ); // std::vector NOLINT(fuchsia-default-arguments-calls) -// arr2() = arr().moved(); - std::copy(arr().moved().begin(), arr().moved().end(), arr2().begin()); +// arr2() = arr().element_moved(); + std::copy(arr().element_moved().begin(), arr().element_moved().end(), arr2().begin()); - BOOST_REQUIRE( arr2[0][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[0][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[0][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[0][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) - BOOST_REQUIRE( arr2[1][0] == std::vector({1., 2., 3.}) ); - BOOST_REQUIRE( arr2[1][1] == std::vector({1., 2., 3.}) ); + BOOST_REQUIRE( arr2[1][0] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) + BOOST_REQUIRE( arr2[1][1] == std::vector({1.0, 2.0, 3.0}) ); // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( arr[0][0].empty() ); BOOST_REQUIRE( arr[0][1].empty() ); @@ -319,20 +357,20 @@ BOOST_AUTO_TEST_CASE(move_reference_range) { BOOST_AUTO_TEST_CASE(move_array_elements) { // NOLINT(readability-function-cognitive-complexity) { - auto arr = multi::array, 1>({ 5}, std::vector(7)); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) auto arr2 = std::move(arr); BOOST_REQUIRE( arr2.size() == 5 ); BOOST_REQUIRE( arr2[0].size() == 7 ); BOOST_REQUIRE( arr.is_empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) - std::vector v0 = std::move(arr[0]); + std::vector const v0 = std::move(arr[0]); BOOST_REQUIRE( v0.size() == 7 ); BOOST_REQUIRE( arr[0].empty() ); - std::vector v1 = std::move(arr)[1]; + std::vector const v1 = std::move(arr)[1]; BOOST_REQUIRE( v1.size() == 7 ); BOOST_REQUIRE( arr[1].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved,clang-analyzer-cplusplus.Move) for test @@ -340,124 +378,160 @@ BOOST_AUTO_TEST_CASE(move_array_elements) { // NOLINT(readability-function-cogn arr2({0, 1}) = arr({2, 3}); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[2].size() == 7 ); + BOOST_REQUIRE( arr [2].size() == 7 ); } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); arr2() = arr(); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[2].size() == 7 ); + BOOST_REQUIRE( arr [2].size() == 7 ); arr2() = std::move(arr)(); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[2].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [2].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); arr2() = arr(); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7 ); + BOOST_REQUIRE( arr [0].size() == 7 ); } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); arr2() = std::move(arr)(); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); auto&& mAp = std::move(arr)(); arr2() = mAp; BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); arr2({0, 5}) = std::move(arr)(); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); - arr2() = arr.take(5); + arr2() = arr.taked(5); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].size() == 7); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); - arr2() = std::move(arr).take(5); + arr2() = std::move(arr).taked(5); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); arr2() = mAt5; BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); arr2() = mAt5; BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); arr2() = std::move(mAt5); BOOST_REQUIRE( arr2[0].size() == 7 ); BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); - arr2() = std::move(mAt5).take(5); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); + arr2() = std::move(mAt5).taked(5); BOOST_REQUIRE( arr2[0].size() == 7 ); BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); - auto&& mAt5t5 = std::move(mAt5).take(5); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); + auto&& mAt5t5 = std::move(mAt5).taked(5); arr2() = mAt5t5; BOOST_REQUIRE( arr2[0].size() == 7 ); BOOST_REQUIRE( arr[0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 5}, std::vector{}); - auto&& mAt5 = std::move(arr).take(5); - arr2() = std::move(mAt5).drop(0); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 5}, std::vector{} ); + auto&& mAt5 = std::move(arr).taked(5); + arr2() = std::move(mAt5).dropped(0); BOOST_REQUIRE( arr2[0].size() == 7 ); BOOST_REQUIRE( arr[0].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } { - auto arr = multi::array, 1>({ 5}, std::vector(7)); - auto arr2 = multi::array, 1>({ 4}, std::vector{}); - arr2() = std::move(arr).drop(1); + auto arr = multi::array, 1>({ 5}, std::vector(7)); // std::vector NOLINT(fuchsia-default-arguments-calls) + auto arr2 = multi::array, 1>({ 4}, std::vector{} ); // std::vector NOLINT(fuchsia-default-arguments-calls) + arr2() = std::move(arr).dropped(1); BOOST_REQUIRE( arr2[0].size() == 7 ); - BOOST_REQUIRE( arr[0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing - BOOST_REQUIRE( arr[1].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [0].size() == 7 ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing + BOOST_REQUIRE( arr [1].empty() ); // NOLINT(bugprone-use-after-move,hicpp-invalid-access-moved) for testing } } + +BOOST_AUTO_TEST_CASE(multi_array_view_swap) { + multi::array arrA({4, 5}, 99); + multi::array arrB({4, 5}, 88); + + arrA().swap(arrB()); + + BOOST_REQUIRE( arrA[0][0] == 88 ); + BOOST_REQUIRE( arrB[0][0] == 99 ); +} + +BOOST_AUTO_TEST_CASE(multi_array_view_swap_dimension_1) { + multi::array arrA({4, 5}, 99); + multi::array arrB({4, 5}, 88); + + arrA[0].swap(arrB[0]); + + BOOST_REQUIRE( arrA[0][0] == 88 ); + BOOST_REQUIRE( arrB[0][0] == 99 ); + + BOOST_REQUIRE( arrA[1][0] == 99 ); + BOOST_REQUIRE( arrB[1][0] == 88 ); +} + +BOOST_AUTO_TEST_CASE(multi_array_view_swap_dimension_1_free) { + multi::array arrA({4, 5}, 99); + multi::array arrB({4, 5}, 88); + + swap(arrA[0], arrB[0]); + + BOOST_REQUIRE( arrA[0][0] == 88 ); + BOOST_REQUIRE( arrB[0][0] == 99 ); + + BOOST_REQUIRE( arrA[1][0] == 99 ); + BOOST_REQUIRE( arrB[1][0] == 88 ); +} diff --git a/external_codes/boost_multi/multi/test/nico_const_correctness.cpp b/external_codes/boost_multi/multi/test/nico_const_correctness.cpp index 05df38d6db..baa19b6cd4 100644 --- a/external_codes/boost_multi/multi/test/nico_const_correctness.cpp +++ b/external_codes/boost_multi/multi/test/nico_const_correctness.cpp @@ -1,109 +1,135 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi views constness" -#include - -#include "multi/array.hpp" +// Copyright 2022-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; template void print(Array1D const& coll) { -// *coll.begin() = 99; // doesn't compile "assignment of read-only location" + // *coll.begin() = 99; // doesn't compile "assignment of read-only location" - for(auto const& elem : coll) {std::cout<< elem <<", ";} - std::cout<(std::cout, ", ")); + std::cout << '\n'; } BOOST_AUTO_TEST_CASE(const_views) { multi::array coll1 = {0, 8, 15, 47, 11, 42}; - print( coll1 ); // prints "0, 8, 15, 47, 11, 42" + print(coll1); // prints "0, 8, 15, 47, 11, 42" - print( coll1({0, 3}) ); // similar to coll1 | take(3) // prints "0, 8, 15" + print(coll1({0, 3})); // similar to coll1 | take(3) // prints "0, 8, 15" auto&& coll1_take3 = coll1({0, 3}); - print( coll1_take3 ); // prints "0, 8, 15" + print(coll1_take3); // prints "0, 8, 15" } template -void fill_99(Array1D&& coll) { - for(auto& elem : coll) {elem = 99;} +auto fill_99(Array1D&& col) -> Array1D&& { + std::fill(std::begin(col), std::end(col), 99); + return std::forward(col); } BOOST_AUTO_TEST_CASE(mutating_views) { multi::array coll1 = {0, 8, 15, 47, 11, 42}; - fill_99( coll1 ); - fill_99( coll1({0, 3}) ); + fill_99(coll1); + fill_99(coll1({0, 3})); auto&& coll1_take3 = coll1({0, 3}); - fill_99( coll1_take3 ); + fill_99(coll1_take3); auto const& coll2 = coll1; -// fill_99( coll2 ); // doesn't compile because coll2 is const ("assignment of read-only" inside fill_99) -// fill_99( coll2({0, 3}) ); // similar to coll2 | take(3) doesn't compile ("assignment of read-only") + // fill_99( coll2 ); // doesn't compile because coll2 is const ("assignment of read-only" inside fill_99) + // fill_99( coll2({0, 3}) ); // similar to coll2 | take(3) doesn't compile ("assignment of read-only") auto const& coll1_take3_const = coll1({0, 3}); -// fill_99( coll1_take3_const ); // doesn't compile because coll1_take3_const is const ("assignment of read-only") + // fill_99( coll1_take3_const ); // doesn't compile because coll1_take3_const is const ("assignment of read-only") (void)coll2, (void)coll1_take3_const, (void)coll1_take3; } -template -void print_2d(Array1D const& coll) { -// *(coll.begin()->begin()) = 99; // doesn't compile "assignment of read-only location" - - for(auto const& row : coll) { - for(auto const& elem : row) { - std::cout<< elem <<", "; - } - std::cout< +void print_2d(Array2D const& coll) { + // *(coll.begin()->begin()) = 99; // doesn't compile "assignment of read-only location" + + std::for_each(std::begin(coll), std::end(coll), [](auto const& row) { + std::copy(std::begin(row), std::end(row), std::ostream_iterator(std::cout, ", ")); + std::cout << '\n'; + }); } BOOST_AUTO_TEST_CASE(const_views_2d) { multi::array coll1 = { {0, 8, 15, 47, 11, 42}, - {0, 8, 15, 47, 11, 42} + {0, 8, 15, 47, 11, 42}, }; - print_2d( coll1 ); // prints "0, 8, 15, 47, 11, 42" + print_2d(coll1); // prints "0, 8, 15, 47, 11, 42" - print_2d( coll1({0, 2}, {0, 3}) ); // similar to coll1 | take(3) // prints "0, 8, 15" + print_2d(coll1({0, 2}, {0, 3})); // similar to coll1 | take(3) // prints "0, 8, 15" auto&& coll1_take3 = coll1({0, 2}, {0, 3}); - print_2d( coll1_take3 ); // prints "0, 8, 15" + print_2d(coll1_take3); // prints "0, 8, 15" } template -void fill_2d_99(Array1D&& coll) { -// for(auto const& row : coll) { // does not work because it would make it const - for(auto&& row : coll) { - for(auto&& elem : row) { - elem = 99; - } - } +auto fill_2d_99(Array1D&& coll) -> Array1D&& { + // for(auto const& row : coll) { // does not work because it would make it const + std::for_each(std::begin(coll), std::end(coll), [](typename std::decay_t::reference row) { + std::fill(std::begin(row), std::end(row), 99); + }); + // std::transform(coll.begin(), coll.end(), coll.begin(), [](auto&& row) { + // std::fill(row.begin(), row.end(), 99); + // return std::forward(row); + // }); + return std::forward(coll); } BOOST_AUTO_TEST_CASE(mutating_views_2d) { multi::array coll1 = { {0, 8, 15, 47, 11, 42}, - {0, 8, 15, 47, 11, 42} + {0, 8, 15, 47, 11, 42}, }; - fill_2d_99( coll1 ); - fill_2d_99( coll1({0, 2}, {0, 3}) ); + fill_2d_99(coll1); + fill_2d_99(coll1({0, 2}, {0, 3})); auto&& coll1_take3 = coll1({0, 2}, {0, 3}); - fill_2d_99( coll1_take3 ); + fill_2d_99(coll1_take3); auto const& coll2 = coll1; -// fill_99( coll2 ); // doesn't compile because coll2 is const ("assignment of read-only" inside fill_99) -// fill_99( coll2({0, 3}) ); // similar to coll2 | take(3) doesn't compile ("assignment of read-only") + // fill_99( coll2 ); // doesn't compile because coll2 is const ("assignment of read-only" inside fill_99) + // fill_99( coll2({0, 3}) ); // similar to coll2 | take(3) doesn't compile ("assignment of read-only") auto const& coll1_take3_const = coll1({0, 2}, {0, 3}); -// fill_99( coll1_take3_const ); // doesn't compile because coll1_take3_const is const ("assignment of read-only") + // fill_99( coll1_take3_const ); // doesn't compile because coll1_take3_const is const ("assignment of read-only") (void)coll2, (void)coll1_take3_const, (void)coll1_take3; } diff --git a/external_codes/boost_multi/multi/test/one_based.cpp b/external_codes/boost_multi/multi/test/one_based.cpp index c04fec96c7..8abe3c5829 100644 --- a/external_codes/boost_multi/multi/test/one_based.cpp +++ b/external_codes/boost_multi/multi/test/one_based.cpp @@ -1,110 +1,134 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi one-based" -#include - -#include "multi/array.hpp" +// Copyright 2019-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(one_based_1D) { - multi::array Ac({{0, 10}}, 0.); + multi::array const Ac({{0, 10}}, 0.0); BOOST_REQUIRE( Ac.size() == 10 ); -// multi::array Af({{1, 1 + 10}}, 0.); -// Af[1] = 1.; -// Af[2] = 2.; -// Af[3] = 3.; +// multi::array Af({{1, 1 + 10}}, 0.); +// Af[1] = 1.; +// Af[2] = 2.; +// Af[3] = 3.; -// BOOST_REQUIRE( Af[1] = 1. ); -// BOOST_REQUIRE( *Af.data_elements() == 1. ); -// BOOST_REQUIRE( size(Af) == 10 ); -// BOOST_REQUIRE( extension(Af).start() == 1 ); -// BOOST_REQUIRE( extension(Af).finish() == 11 ); +// BOOST_REQUIRE( Af[1] = 1. ); +// BOOST_REQUIRE( *Af.data_elements() == 1. ); +// BOOST_REQUIRE( size(Af) == 10 ); +// BOOST_REQUIRE( extension(Af).start() == 1 ); +// BOOST_REQUIRE( extension(Af).finish() == 11 ); -// auto Af1 = multi::array(multi::extensions_t<1>{multi::iextension{10}}, 0.).reindex(1); +// auto Af1 = multi::array(multi::extensions_t<1>{multi::iextension{10}}, 0.).reindex(1); -// BOOST_REQUIRE( size(Af1) == 10 ); -// BOOST_REQUIRE( Af1[10] == 0. ); +// BOOST_REQUIRE( size(Af1) == 10 ); +// BOOST_REQUIRE( Af1[10] == 0. ); -// multi::array B({{0, 10}}, 0.); -// B[0] = 1.; -// B[1] = 2.; -// B[2] = 3.; +// multi::array B({{0, 10}}, 0.); +// B[0] = 1.; +// B[1] = 2.; +// B[2] = 3.; -// BOOST_REQUIRE( size(B) == 10 ); -// BOOST_REQUIRE( B != Af ); -// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B), end(B) ) ); +// BOOST_REQUIRE( size(B) == 10 ); +// BOOST_REQUIRE( B != Af ); +// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B), end(B) ) ); -// BOOST_REQUIRE( Af.reindexed(0) == B ); +// BOOST_REQUIRE( Af.reindexed(0) == B ); } BOOST_AUTO_TEST_CASE(one_based_2D) { - multi::array Ac({{0, 10}, {0, 20}}, 0.); + multi::array const Ac({{0, 10}, {0, 20}}, 0.0); BOOST_REQUIRE( Ac.size() == 10 ); -// multi::array Af({{1, 1 + 10}, {1, 1 + 20}}, 0.); -// Af[1][1] = 1.; -// Af[2][2] = 2.; -// Af[3][3] = 3.; -// Af[10][20] = 99.; - -// BOOST_REQUIRE( Af[1][1] = 1. ); -// BOOST_REQUIRE( Af[10][20] == 99. ); -// BOOST_REQUIRE( *Af.data_elements() == 1. ); -// BOOST_REQUIRE( Af.data_elements()[Af.num_elements()-1] == 99. ); -// BOOST_REQUIRE( size(Af) == 10 ); -// BOOST_REQUIRE( extension(Af).start() == 1 ); -// BOOST_REQUIRE( extension(Af).finish() == 11 ); - -// auto Af1 = multi::array({10, 10}, 0.).reindex(1, 1); - -// BOOST_REQUIRE( size(Af1) == 10 ); -// BOOST_REQUIRE( Af1[10][10] == 0. ); - -// multi::array B({{0, 10}, {0, 20}}, 0.); -// B[0][0] = 1.; -// B[1][1] = 2.; -// B[2][2] = 3.; -// B[9][19] = 99.; - -// BOOST_REQUIRE( size(B) == 10 ); -// BOOST_REQUIRE( B != Af ); -// BOOST_REQUIRE( std::equal(begin(Af.reindexed(0, 0)), end(Af.reindexed(0, 0)), begin(B), end(B)) ); -// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B.reindexed(1, 1)), end(B.reindexed(1, 1)) ) ); -// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B.reindexed(0, 1)), end(B.reindexed(0, 1)) ) ); - -// BOOST_REQUIRE( Af.reindexed(0, 0) == B ); - -// B = Af; // TODO(correaa) implement assignment for 1-based arrays -// BOOST_REQUIRE( B[1][1] = 1. ); -// BOOST_REQUIRE( B[10][20] == 99. ); -// BOOST_REQUIRE( B == Af ); +// multi::array Af({{1, 1 + 10}, {1, 1 + 20}}, 0.); +// Af[1][1] = 1.; +// Af[2][2] = 2.; +// Af[3][3] = 3.; +// Af[10][20] = 99.; + +// BOOST_REQUIRE( Af[1][1] = 1. ); +// BOOST_REQUIRE( Af[10][20] == 99. ); +// BOOST_REQUIRE( *Af.data_elements() == 1. ); +// BOOST_REQUIRE( Af.data_elements()[Af.num_elements()-1] == 99. ); +// BOOST_REQUIRE( size(Af) == 10 ); +// BOOST_REQUIRE( extension(Af).start() == 1 ); +// BOOST_REQUIRE( extension(Af).finish() == 11 ); + +// auto Af1 = multi::array({10, 10}, 0.).reindex(1, 1); + +// BOOST_REQUIRE( size(Af1) == 10 ); +// BOOST_REQUIRE( Af1[10][10] == 0. ); + +// multi::array B({{0, 10}, {0, 20}}, 0.); +// B[0][0] = 1.; +// B[1][1] = 2.; +// B[2][2] = 3.; +// B[9][19] = 99.; + +// BOOST_REQUIRE( size(B) == 10 ); +// BOOST_REQUIRE( B != Af ); +// BOOST_REQUIRE( std::equal(begin(Af.reindexed(0, 0)), end(Af.reindexed(0, 0)), begin(B), end(B)) ); +// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B.reindexed(1, 1)), end(B.reindexed(1, 1)) ) ); +// BOOST_REQUIRE( std::equal(begin(Af), end(Af), begin(B.reindexed(0, 1)), end(B.reindexed(0, 1)) ) ); + +// BOOST_REQUIRE( Af.reindexed(0, 0) == B ); + +// B = Af; // TODO(correaa) implement assignment for 1-based arrays +// BOOST_REQUIRE( B[1][1] = 1. ); +// BOOST_REQUIRE( B[10][20] == 99. ); +// BOOST_REQUIRE( B == Af ); } BOOST_AUTO_TEST_CASE(one_base_2D_ref) { std::array, 3> arr = {{ - {{ 1., 2., 3., 4., 5.}}, - {{ 6., 7., 8., 9., 10.}}, - {{11., 12., 13., 14., 15.}} + {{ 1.0, 2.0, 3.0, 4.0, 5.0}}, + {{ 6.0, 7.0, 8.0, 9.0, 10.0}}, + {{11.0, 12.0, 13.0, 14.0, 15.0}}, }}; - BOOST_REQUIRE( arr[0][0] == 1. ); + BOOST_REQUIRE( arr[0][0] == 1.0 ); -// multi::array_ref const& Ar = *multi::array_ptr(&arr[0][0], {3, 5}); -// BOOST_REQUIRE( &Ar[1][3] == &arr[1][3] ); +// multi::array_ref const& Ar = *multi::array_ptr(&arr[0][0], {3, 5}); +// BOOST_REQUIRE( &Ar[1][3] == &arr[1][3] ); -// multi::array_ref const& Ar2 = *multi::array_ptr(&arr[0][0], {{1, 1+3}, {1, 1+5}}); -// BOOST_REQUIRE( sizes(Ar) == sizes(Ar2) ); -// BOOST_REQUIRE( &Ar2[1][1] == &arr[0][0] ); -// BOOST_REQUIRE( &Ar2[2][4] == &arr[1][3] ); +// multi::array_ref const& Ar2 = *multi::array_ptr(&arr[0][0], {{1, 1+3}, {1, 1+5}}); +// BOOST_REQUIRE( sizes(Ar) == sizes(Ar2) ); +// BOOST_REQUIRE( &Ar2[1][1] == &arr[0][0] ); +// BOOST_REQUIRE( &Ar2[2][4] == &arr[1][3] ); -// BOOST_REQUIRE( Ar2.extensions() != Ar.extensions() ); -// BOOST_REQUIRE( not(Ar2 == Ar) ); -// BOOST_REQUIRE( Ar2 != Ar ); -// BOOST_REQUIRE( extensions(Ar2.reindexed(0, 0)) == extensions(Ar) ); -// BOOST_REQUIRE( Ar2.reindexed(0, 0) == Ar ); +// BOOST_REQUIRE( Ar2.extensions() != Ar.extensions() ); +// BOOST_REQUIRE( not(Ar2 == Ar) ); +// BOOST_REQUIRE( Ar2 != Ar ); +// BOOST_REQUIRE( extensions(Ar2.reindexed(0, 0)) == extensions(Ar) ); +// BOOST_REQUIRE( Ar2.reindexed(0, 0) == Ar ); -// static_assert( not std::is_assignable{}, "!" ); +// static_assert( not std::is_assignable{}, "!" ); } diff --git a/external_codes/boost_multi/multi/test/overload.cpp b/external_codes/boost_multi/multi/test/overload.cpp index ec4d94929f..1f27b32286 100644 --- a/external_codes/boost_multi/multi/test/overload.cpp +++ b/external_codes/boost_multi/multi/test/overload.cpp @@ -1,26 +1,48 @@ // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi overload resolution" -#include - -#include "multi/array.hpp" - -#include +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; -inline auto what_is(multi::array< double , 2> const& /*arr*/){return std::string{"real"} ;} -inline auto what_is(multi::array, 2> const& /*arr*/){return std::string{"complex"};} +inline auto what_is(multi::array const& /*arr*/) { return std::string{"real"}; } // std::string NOLINT(fuchsia-default-arguments-calls) +inline auto what_is(multi::array, 2> const& /*arr*/) { return std::string{"complex"}; } // std::string NOLINT(fuchsia-default-arguments-calls) -BOOST_AUTO_TEST_CASE(multi_array_range_section) { - multi::array< double , 2> real_A({10, 20}); - multi::array, 2> cplx_A({10, 20}); +BOOST_AUTO_TEST_CASE(multi_array_overload) { + multi::array const real_A({10, 20}); + multi::array, 2> const cplx_A({10, 20}); - std::string real_str = what_is(real_A); - std::string complex_str = what_is(cplx_A); + std::string const real_str = what_is(real_A); + std::string const complex_str = what_is(cplx_A); BOOST_REQUIRE( real_str == "real" ); BOOST_REQUIRE( complex_str == "complex" ); } - diff --git a/external_codes/boost_multi/multi/test/partitioned.cpp b/external_codes/boost_multi/multi/test/partitioned.cpp index ab03f9b0dc..402b0f7f46 100644 --- a/external_codes/boost_multi/multi/test/partitioned.cpp +++ b/external_codes/boost_multi/multi/test/partitioned.cpp @@ -1,20 +1,44 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi partitioned operation" -#define BOOST_TEST_DYN_LINK -#include - -#include "multi/array.hpp" +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(array_partitioned_1d) { - multi::array A1 = {0, 1, 2, 3, 4, 5}; + multi::array A1 = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + auto&& A2_ref = A1.partitioned(2); - static_assert( std::decay::type::rank {} == decltype(A1)::rank {} + 1 , "!"); - static_assert( std::decay_t::rank_v == decltype(A1)::rank_v +1 , "!"); + static_assert(std::decay::type::rank{} == decltype(A1)::rank{} + 1); + static_assert(std::decay_t::rank_v == decltype(A1)::rank_v + 1); BOOST_REQUIRE( size(A2_ref ) == 2 ); BOOST_REQUIRE( size(A2_ref[0]) == 3 ); @@ -25,16 +49,17 @@ BOOST_AUTO_TEST_CASE(array_partitioned_1d) { } BOOST_AUTO_TEST_CASE(array_partitioned_2d) { - multi::array A2 = { - { 0, 1, 2, 3, 4, 5}, - { 6, 7, 8, 9, 10, 11}, + multi::array A2 = { + { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0}, + { 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}, - { 12, 13, 14, 15, 16, 17}, - { 18, 19, 20, 21, 22, 23}, + {12.0, 13.0, 14.0, 15.0, 16.0, 17.0}, + {18.0, 19.0, 20.0, 21.0, 22.0, 23.0}, }; auto&& A3_ref = A2.partitioned(2); - static_assert( std::decay_t::rank_v == decltype(A2)::rank_v + 1 , "!"); + static_assert(std::decay_t::rank{} == decltype(A2)::rank{} + 1); + static_assert(std::decay_t::rank_v == decltype(A2)::rank_v + 1); BOOST_REQUIRE( num_elements(A3_ref) == num_elements(A2) ); BOOST_REQUIRE( size(A3_ref)==2 ); @@ -44,13 +69,15 @@ BOOST_AUTO_TEST_CASE(array_partitioned_2d) { } BOOST_AUTO_TEST_CASE(array_partitioned) { + using namespace std::string_literals; // NOLINT(build/namespaces) for ""s + multi::array A2 = { - { "s0P0", "s1P0"}, - { "s0P1", "s1P1"}, - { "s0P2", "s1P2"}, - { "s0P3", "s1P3"}, - { "s0P4", "s1P4"}, - { "s0P5", "s1P5"}, + {"s0P0"s, "s1P0"s}, + {"s0P1"s, "s1P1"s}, + {"s0P2"s, "s1P2"s}, + {"s0P3"s, "s1P3"s}, + {"s0P4"s, "s1P4"s}, + {"s0P5"s, "s1P5"s}, }; BOOST_REQUIRE( size(A2) == 6 ); @@ -64,7 +91,10 @@ BOOST_AUTO_TEST_CASE(array_partitioned) { BOOST_REQUIRE( std::get<1>(sizes(A2)) == 2 ); BOOST_REQUIRE( size(A2.partitioned(3)) == 3 ); - static_assert( decltype(A2.partitioned(3))::rank_v == 3 , "!"); + + static_assert(decltype(A2.partitioned(3))::rank{} == 3); + static_assert(decltype(A2.partitioned(3))::rank::value == 3); + static_assert(decltype(A2.partitioned(3))::rank_v == 3); BOOST_REQUIRE(( sizes(A2.partitioned(3)) == decltype(sizes(A2.partitioned(3))){3, 2, 2} )); @@ -73,129 +103,148 @@ BOOST_AUTO_TEST_CASE(array_partitioned) { BOOST_REQUIRE( std::get<2>(sizes(A2.partitioned(3))) == 2 ); BOOST_REQUIRE( size(A2.partitioned(1)) == 1 ); - static_assert( decltype(A2.partitioned(1))::rank_v == 3 , "!"); + + static_assert(decltype(A2.partitioned(1))::rank{} == 3); + static_assert(decltype(A2.partitioned(1))::rank::value == 3); + static_assert(decltype(A2.partitioned(1))::rank_v == 3); + BOOST_REQUIRE( &A2.partitioned(1).rotated()[3][1][0] == &A2[3][1] ); } template class propagate_const; -template class propagate_const{ - T& r_; +template class propagate_const { + T& r_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) public: explicit propagate_const(T& other) : r_{other} {} propagate_const(propagate_const const&) = delete; - propagate_const(propagate_const&&) = delete; - // NOLINTNEXTLINE(fuchsia-trailing-return,-warnings-as-errors): reference adaptor - auto operator=(propagate_const const&) -> propagate_const& = default; - // NOLINTNEXTLINE(fuchsia-trailing-return,-warnings-as-errors): reference adaptor + propagate_const(propagate_const&&) = delete; + + auto operator=(propagate_const const&) -> propagate_const& = default; auto operator=(propagate_const&&) noexcept -> propagate_const& = default; - // NOLINTNEXTLINE(fuchsia-trailing-return,-warnings-as-errors): reference adaptor - auto operator=(T const& other) -> propagate_const& {r_ = other; return *this;} + + auto operator=(T const& other) -> propagate_const& { + r_ = other; + return *this; + } + ~propagate_const() noexcept = default; - explicit operator T const&() const noexcept {return r_;} - explicit operator T &() noexcept {return r_;} + + explicit operator T const&() const noexcept { return r_; } + explicit operator T&() noexcept { return r_; } }; -template class propagate_const{ - T const& r_; +template class propagate_const { + T const& r_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) public: explicit propagate_const(T const& other) : r_{other} {} - // NOLINTNEXTLINE(fuchsia-trailing-return,-warnings-as-errors): reference adaptor auto operator=(T const& other) -> propagate_const& = delete; - explicit operator T const&() const noexcept {return r_;} + explicit operator T const&() const noexcept { return r_; } }; BOOST_AUTO_TEST_CASE(array_encoded_subarray) { - multi::array arr = { // arr[walker][encoded_property] // 7 walkers - {99., 99., 0.00, 0.01, 0.10, 0.11, 0.20, 0.21, 99.}, - {99., 99., 1.00, 1.01, 1.10, 1.11, 1.20, 1.21, 99.}, - {99., 99., 2.00, 2.01, 2.10, 2.11, 2.20, 2.21, 99.}, - {99., 99., 3.00, 3.01, 3.10, 3.11, 3.20, 3.21, 99.}, - {99., 99., 4.00, 4.01, 4.10, 4.11, 4.20, 4.21, 99.}, - {99., 99., 5.00, 5.01, 5.10, 5.11, 5.20, 5.21, 99.}, - {99., 99., 6.00, 6.01, 6.10, 6.11, 6.20, 6.21, 99.}, + // arr[walker][encoded_property] // 7 walkers + multi::array arr = { + {990, 990, 1000, 001, 10, 11, 20, 21, 990}, + {990, 990, 100, 101, 110, 111, 120, 121, 990}, + {990, 990, 200, 201, 210, 211, 220, 221, 990}, + {990, 990, 300, 301, 310, 311, 320, 321, 990}, + {990, 990, 400, 401, 410, 411, 420, 421, 990}, + {990, 990, 500, 501, 510, 511, 520, 521, 990}, + {990, 990, 600, 601, 610, 611, 620, 621, 990}, }; multi::iextension const encoded_3x2_range = {2, 8}; + auto&& arrRPU = arr.rotated()(encoded_3x2_range).partitioned(3).unrotated(); - static_assert( decltype(+arrRPU)::rank_v == 3 , "!"); + static_assert(decltype(+arrRPU)::rank::value == 3); + static_assert(decltype(+arrRPU)::rank{} == 3); + static_assert(decltype(+arrRPU)::rank_v == 3); + BOOST_REQUIRE(( sizes(arrRPU) == decltype(sizes(arrRPU)){7, 3, 2} )); BOOST_REQUIRE( arrRPU[4].num_elements() == 3*2L ); BOOST_REQUIRE( &arrRPU[4][1][0] == &arr[4][4] ); - BOOST_REQUIRE( arrRPU[4][1][0] == 4.10 ); + BOOST_REQUIRE( arrRPU[4][1][0] == 410 ); BOOST_REQUIRE(( arrRPU[4] == multi::array{ - {4.00, 4.01}, - {4.10, 4.11}, - {4.20, 4.21}, + {400, 401}, + {410, 411}, + {420, 421}, } )); - arrRPU[4][1][0] = 1111.; - BOOST_REQUIRE( arr[4][4] == 1111. ); - - class walker_ref{ - using raw_source_reference = decltype(std::declval&>()[0]); - using internal_array_type = decltype(std::declval()({2, 8}).partitioned(3)); - public: - propagate_const prop1; // NOLINT(misc-non-private-member-variables-in-classes) - propagate_const prop2; // NOLINT(misc-non-private-member-variables-in-classes) - internal_array_type slater_array; // NOLINT(misc-non-private-member-variables-in-classes) - propagate_const prop3; // NOLINT(misc-non-private-member-variables-in-classes) - explicit walker_ref(raw_source_reference&& row) : prop1{row[0]}, prop2{row[1]}, slater_array{row({2, 8}).partitioned(3)}, prop3{row[8]}{} + arrRPU[4][1][0] = 11110; + BOOST_REQUIRE( arr[4][4] == 11110 ); + + class walker_ref { + using raw_source_reference = decltype(std::declval&>()[0]); + using internal_array_type = decltype(std::declval()({2, 8}).partitioned(3)); + + public: // NOLINT(whitespace/indent) bug in cpplint + propagate_const prop1; // NOLINT(misc-non-private-member-variables-in-classes) + propagate_const prop2; // NOLINT(misc-non-private-member-variables-in-classes) + internal_array_type slater_array; // NOLINT(misc-non-private-member-variables-in-classes) + propagate_const prop3; // NOLINT(misc-non-private-member-variables-in-classes) + + explicit walker_ref(raw_source_reference&& row) : prop1{row[0]}, prop2{row[1]}, slater_array{row({2, 8}).partitioned(3)}, prop3{std::move(row)[8]} {} }; auto&& wr = walker_ref(arr[5]); + wr.prop1 = 88; - BOOST_REQUIRE( wr.slater_array[2][1] == 5.21 ); - wr.slater_array[2][1] = 9999.; + BOOST_REQUIRE( wr.slater_array[2][1] == 521 ); + + wr.slater_array[2][1] = 99990; } BOOST_AUTO_TEST_CASE(array_partitioned_add_to_last) { - multi::array arr = { + multi::array arr = { { - { 0., 1., 2., 3., 4., 5.}, - { 6., 7., 8., 9., 10., 11.}, - { 12., 13., 14., 15., 16., 17.}, - { 18., 19., 20., 21., 22., 23.}, + { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0}, + { 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}, + { 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}, + { 18.0, 19.0, 20.0, 21.0, 22.0, 23.0}, }, { - { 0., 1., 2., 3., 4., 5.}, - { 6., 7., 8., 9., 10., 11.}, - { 12., 13., 14., 15., 16., 17.}, - { 18., 19., 20., 21., 22., 23.}, + { 0.0, 1.0, 2.0, 3.0, 4.0, 5.0}, + { 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}, + { 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}, + { 18.0, 19.0, 20.0, 21.0, 22.0, 23.0}, } }; - auto strides = std::apply([](auto... strds) {return std::array{{strds...}};}, arr.strides()); + auto strides = std::apply([](auto... strds) { return std::array{{strds...}}; }, arr.layout().strides()); + // auto strides = std::apply([](auto... strds) { return std::array{{strds...}}; }, arr.strides()); - BOOST_REQUIRE( std::is_sorted(strides.rbegin(), strides.rend()) and arr.num_elements() == arr.nelems() ); // contiguous c-ordering + BOOST_REQUIRE( std::is_sorted(strides.rbegin(), strides.rend()) && arr.num_elements() == arr.nelems() ); // contiguous c-ordering +#ifndef _MSC_VER // problem with MSVC 14.3 c++17 auto&& A4 = arr.reinterpret_array_cast(1); BOOST_REQUIRE(( arr.extensions() == decltype(arr.extensions()){2, 4, 6} )); BOOST_REQUIRE(( A4.extensions() == decltype(A4.extensions()){2, 4, 6, 1} )); - BOOST_REQUIRE( A4.is_flattable() ); - BOOST_REQUIRE( A4.flatted().is_flattable() ); +// BOOST_REQUIRE( A4.is_flattable() ); +// BOOST_REQUIRE( A4.flatted().is_flattable() ); BOOST_REQUIRE( &A4[1][2][3][0] == &arr[1][2][3] ); +#endif } BOOST_AUTO_TEST_CASE(array_partitioned_vs_chunked_1D) { - multi::array arr = {0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.}; + multi::array arr = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}; BOOST_REQUIRE( size(arr.partitioned(3)) == 3 ); - BOOST_REQUIRE( arr.partitioned(3)[1] == decltype(+arr.partitioned(3)[1])({4., 5., 6., 7.}) ); + BOOST_REQUIRE( arr.partitioned(3)[1] == decltype(+arr.partitioned(3)[1])({4.0, 5.0, 6.0, 7.0}) ); BOOST_REQUIRE( &arr.partitioned(3)[1][2] == &arr[6] ); BOOST_REQUIRE( size(arr.chunked(3)) == 4 ); - BOOST_REQUIRE( arr.chunked(3)[1] == decltype(+arr.chunked(3)[1])({3., 4., 5.}) ); + BOOST_REQUIRE( arr.chunked(3)[1] == decltype(+arr.chunked(3)[1])({3.0, 4.0, 5.0}) ); BOOST_REQUIRE( &arr.chunked(3)[1][2] == &arr[5] ); } @@ -207,4 +256,3 @@ BOOST_AUTO_TEST_CASE(array_partitioned_vs_chunked_2D) { BOOST_REQUIRE( size(arr.chunked(5)) == 20 ); BOOST_REQUIRE( &arr.chunked(5)[1][2] == &arr[7] ); } - diff --git a/external_codes/boost_multi/multi/test/pmr.cpp b/external_codes/boost_multi/multi/test/pmr.cpp index bd130870f2..db410edfcc 100644 --- a/external_codes/boost_multi/multi/test/pmr.cpp +++ b/external_codes/boost_multi/multi/test/pmr.cpp @@ -1,56 +1,84 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi pmr allocators" -#include - -#include "multi/array.hpp" - -#include // for polymorphic memory resource, monotonic buffer +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) // narrowing conversion +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; -BOOST_AUTO_TEST_CASE(pmr_partially_formed) { +BOOST_AUTO_TEST_CASE(pmr_dummy) { +} +#ifdef BOOST_MULTI_HAS_MEMORY_RESOURCE +BOOST_AUTO_TEST_CASE(pmr_partially_formed) { { char buffer[] = "0123456789012345678901234567890123456789012345678901234567890123456789"; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) use raw memory std::pmr::monotonic_buffer_resource mbr{std::data(buffer), std::size(buffer)}; static_assert( std::size(buffer) > 6*sizeof(double) ); - multi::array> A({2, 3}, &mbr); // NOLINT(readability-identifier-length) + multi::array> const arr({2, 3}, &mbr); BOOST_TEST( buffer[ 0] == '0' ); // buffer is intact when initializing without value BOOST_TEST( buffer[13] == '3' ); - BOOST_TEST( A.num_elements() == 2*3 ); - // BOOST_TEST( A[0][0] != 0. ); - // BOOST_TEST( A[1][2] != 0. ); + BOOST_TEST( arr.num_elements() == 2*3 ); + // BOOST_TEST( arr[0][0] != 0.0 ); + // BOOST_TEST( arr[1][2] != 0.0 ); } { char buffer[] = "0123456789012345678901234567890123456789012345678901234567890123456789"; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) use raw memory - std::pmr::monotonic_buffer_resource mbr{std::data(buffer), std::size(buffer)}; + std::pmr::monotonic_buffer_resource mbr(std::data(buffer), std::size(buffer)); static_assert( std::size(buffer) > 6*sizeof(double) ); - multi::array> A({2, 3}, 0., &mbr); // NOLINT(readability-identifier-length) - // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value - // BOOST_TEST( buffer[13] != '3' ); + multi::array> A({2, 3}, 0.0, &mbr); // NOLINT(readability-identifier-length) + // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value + // BOOST_TEST( buffer[13] != '3' ); - BOOST_TEST( A[0][0] == 0. ); - BOOST_TEST( A[1][2] == 0. ); + BOOST_TEST( A[0][0] == 0.0 ); + BOOST_TEST( A[1][2] == 0.0 ); } { char buffer[] = "0123456789012345678901234567890123456789012345678901234567890123456789"; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) use raw memory - std::pmr::monotonic_buffer_resource mbr{std::data(buffer), std::size(buffer)}; + std::pmr::monotonic_buffer_resource mbr(std::data(buffer), std::size(buffer)); static_assert( std::size(buffer) > 6*sizeof(double) ); - multi::array> A({2, 3}, {}, &mbr); // NOLINT(readability-identifier-length) - // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value - // BOOST_TEST( buffer[13] != '3' ); + multi::array> arr({2, 3}, {}, &mbr); + // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value + // BOOST_TEST( buffer[13] != '3' ); - BOOST_TEST( A[0][0] == double{} ); - BOOST_TEST( A[1][2] == double{} ); + BOOST_TEST( arr[0][0] == double{} ); + BOOST_TEST( arr[1][2] == double{} ); } { char buffer[] = "0123456789012345678901234567890123456789012345678901234567890123456789"; // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) use raw memory @@ -58,11 +86,41 @@ BOOST_AUTO_TEST_CASE(pmr_partially_formed) { std::pmr::monotonic_buffer_resource mbr{std::data(buffer), std::size(buffer)}; static_assert( std::size(buffer) > 6*sizeof(double) ); - multi::array> A({2, 3}, 666., &mbr); // NOLINT(readability-identifier-length) - // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value - // BOOST_TEST( buffer[13] != '3' ); + multi::array> arr({2, 3}, 666.0, &mbr); + // BOOST_TEST( buffer[ 0] != '0' ); // buffer not is intact when initializing with value + // BOOST_TEST( buffer[13] != '3' ); - BOOST_TEST( A[0][0] == 666. ); - BOOST_TEST( A[1][2] == 666. ); + BOOST_TEST( arr[0][0] == 666.0 ); + BOOST_TEST( arr[1][2] == 666.0 ); } } + +#ifndef _MSC_VER // problems with MSVC 14.3 c++17 +BOOST_AUTO_TEST_CASE(pmr_benchmark) { + // auto* resp = std::pmr::unsynchronized_pool_resource(std::pmr::get_default_resource()); + auto* resp = std::pmr::get_default_resource(); + + auto count = 50; + auto start_time = std::chrono::high_resolution_clock::now(); + + multi::extension_t const exts{0, count}; + auto acc = std::transform_reduce( + exts.begin(), exts.end(), int64_t{0}, + std::plus<>{}, + [&resp](auto idx) { + multi::array> arr( + multi::extensions_t<2>{1000 - idx%10, 1000 + idx%10}, // MSVC needs multi::extensions_t<2> + resp + ); + std::fill_n(arr.data_elements(), arr.num_elements(), 1); + auto* be = arr.data_elements(); + decltype(be) en = arr.data_elements() + arr.num_elements(); + return std::accumulate(be, en, int64_t{}, std::plus{}); + } + ); + + auto time = std::chrono::high_resolution_clock::now() - start_time; + std::cout<< time.count() / count <<" "<< acc << '\n'; +} +#endif +#endif diff --git a/external_codes/boost_multi/multi/test/ranges.cpp b/external_codes/boost_multi/multi/test/ranges.cpp new file mode 100644 index 0000000000..d06d7aad4d --- /dev/null +++ b/external_codes/boost_multi/multi/test/ranges.cpp @@ -0,0 +1,114 @@ +// Copyright 2023-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // for std::ranges::fold_left + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +BOOST_AUTO_TEST_CASE(range_accumulate) { +#if defined(__cpp_lib_ranges_fold) && (__cpp_lib_ranges_fold >= 202207L) + namespace multi = boost::multi; + + static constexpr auto accumulate = [](auto const& R) { return std::ranges::fold_left(R, 0, std::plus<>{}); }; + + auto const values = multi::array{ + {2, 0, 2, 2}, + {2, 2, 0, 4}, + {2, 2, 0, 4}, + {2, 2, 0, 0}, + {2, 7, 0, 2}, + {2, 2, 4, 4}, + }; + + boost::multi::array> aaa = {1, 2, 3}; + + constexpr auto rowOddSum = [](auto const& arr) { + return std::ranges::find_if(arr, [](auto const& row) { return (accumulate(row) & 1) == 1; }); + }; + + auto const result = rowOddSum(values); + + BOOST_REQUIRE( result - values.begin() == 4 ); +#endif +} + +BOOST_AUTO_TEST_CASE(range_find) { +#if defined(__cpp_lib_ranges_fold) && (__cpp_lib_ranges_fold >= 202207L) + namespace multi = boost::multi; + + using Array2D = multi::array; + + Array2D const a = { + {1, 2}, + {3, 4}, + }; + { + auto const needle = std::ranges::find_if(a, [](auto const& row) { return row[0] == 9; }); + BOOST_REQUIRE(needle == a.end()); + } + { + std::ranges::equal_to eto; + + auto a2 = a(); + + [[maybe_unused]] auto const& _84 = static_cast> const&>(a); + [[maybe_unused]] auto const& _85 = static_cast> const&>(std::as_const(a)); + + auto a1 = a[1]; + auto a1_val = +a[1]; + + // [[maybe_unused]] auto const& _90 = static_cast>&>(a1_val); + // [[maybe_unused]] auto const& _91 = static_cast>&>(std::as_const(a1_val)); + + // static_assert( std::convertible_to>&, const boost::multi::subarray>&> ); + // static_assert( std::equality_comparable_with>&,boost::multi::subarray>&> ); + + bool const res = eto(a1_val, a1); + BOOST_REQUIRE( res ); + // std::ranges::equal_to&,boost::multi::array>&,boost::multi::subarray>& + } + + { + auto&& a1 = a[1]; + auto const needle = std::ranges::find(a, a1); + BOOST_REQUIRE(needle != a.end()); + BOOST_REQUIRE( *needle == a1 ); + BOOST_REQUIRE( *needle == a[1] ); + } + + { + auto const needle = std::ranges::find(a, a[1]); + BOOST_REQUIRE(needle != a.end()); + BOOST_REQUIRE( *needle == a[1] ); + } +#endif +} diff --git a/external_codes/boost_multi/multi/test/reextent.cpp b/external_codes/boost_multi/multi/test/reextent.cpp index 28eed8832d..6788ba14db 100644 --- a/external_codes/boost_multi/multi/test/reextent.cpp +++ b/external_codes/boost_multi/multi/test/reextent.cpp @@ -1,10 +1,35 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4244) +#endif -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi reextent" -#include +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif -#include "multi/array.hpp" +#include namespace multi = boost::multi; @@ -12,17 +37,17 @@ BOOST_AUTO_TEST_CASE(array_reextent) { multi::array arr({2, 3}); BOOST_REQUIRE( num_elements(arr) == 6 ); - arr[1][2] = 6.; - BOOST_REQUIRE( arr[1][2] == 6. ); + arr[1][2] = 6.0; + BOOST_REQUIRE( arr[1][2] == 6.0 ); multi::array arr3({2, 3}); BOOST_REQUIRE(size(arr3) == 2); BOOST_REQUIRE(size(arr3[0]) == 3); - arr.reextent({5, 4}, 99.); + arr.reextent({5, 4}, 99.0); BOOST_REQUIRE( num_elements(arr)== 5L*4L ); - BOOST_REQUIRE( arr[1][2] == 6. ); // reextent preserves values when it can... - BOOST_REQUIRE( arr[4][3] == 99. ); // ...and gives selected value to the rest + BOOST_REQUIRE( arr[1][2] == 6.0 ); // reextent preserves values when it can... + BOOST_REQUIRE( arr[4][3] == 99.0 ); // ...and gives selected value to the rest } BOOST_AUTO_TEST_CASE(array_reextent_noop) { @@ -39,7 +64,7 @@ BOOST_AUTO_TEST_CASE(array_reextent_noop) { auto* const A_base = arr.base(); arr.reextent({2, 3}); BOOST_REQUIRE( num_elements(arr)== 2L*3L ); - BOOST_REQUIRE( arr[1][2] == 6. ); // reextent preserves values when it can... + BOOST_REQUIRE( arr[1][2] == 6.0 ); // reextent preserves values when it can... BOOST_REQUIRE( A_base == arr.base() ); } @@ -48,17 +73,17 @@ BOOST_AUTO_TEST_CASE(array_reextent_noop_with_init) { multi::array arr({2, 3}); BOOST_REQUIRE( num_elements(arr) == 6 ); - arr[1][2] = 6.; - BOOST_REQUIRE( arr[1][2] == 6. ); + arr[1][2] = 6.0; + BOOST_REQUIRE( arr[1][2] == 6.0 ); multi::array arr3({2, 3}); BOOST_REQUIRE(size(arr3) == 2); BOOST_REQUIRE(size(arr3[0]) == 3); auto* const A_base = arr.base(); - arr.reextent({2, 3}, 99.); + arr.reextent({2, 3}, 99.0); BOOST_REQUIRE( num_elements(arr)== 2L*3L ); - BOOST_REQUIRE( arr[1][2] == 6. ); // reextent preserves values when it can... + BOOST_REQUIRE( arr[1][2] == 6.0 ); // reextent preserves values when it can... BOOST_REQUIRE( A_base == arr.base() ); } @@ -68,12 +93,16 @@ BOOST_AUTO_TEST_CASE(array_reextent_moved) { BOOST_REQUIRE( num_elements(arr) == 6 ); arr[1][2] = 6.; - BOOST_REQUIRE( arr[1][2] == 6. ); + BOOST_REQUIRE( arr[1][2] == 6.0 ); auto* const A_base = arr.base(); - arr = std::move(arr).reextent({2, 3}, 99.); // "arr = ..." suppresses linter bugprone-use-after-move,hicpp-invalid-access-moved + + arr = std::move(arr).reextent({2, 3}); // "arr = ..." suppresses linter bugprone-use-after-move,hicpp-invalid-access-moved + + BOOST_TEST_REQUIRE( arr.size() == 2 ); + BOOST_REQUIRE( arr.num_elements() == 2L*3L ); BOOST_REQUIRE( num_elements(arr)== 2L*3L ); - BOOST_TEST( arr[1][2] == 6. ); // after move the original elments might not be the same + BOOST_TEST(arr[1][2] == 6.0); // after move the original elments might not be the same BOOST_REQUIRE( A_base == arr.base() ); } @@ -82,13 +111,13 @@ BOOST_AUTO_TEST_CASE(array_reextent_moved_trivial) { multi::array arr({2, 3}); BOOST_REQUIRE( num_elements(arr) == 6 ); - arr[1][2] = 6.; - BOOST_REQUIRE( arr[1][2] == 6. ); + arr[1][2] = 6.0; + BOOST_REQUIRE( arr[1][2] == 6.0 ); auto* const A_base = arr.base(); - arr = std::move(arr).reextent({2, 3}); // "arr = ..." suppresses linter bugprone-use-after-move,hicpp-invalid-access-moved + arr = std::move(arr).reextent({2, 3}); // "arr = ..." suppresses linter bugprone-use-after-move,hicpp-invalid-access-moved BOOST_REQUIRE( num_elements(arr)== 2L*3L ); - BOOST_REQUIRE( arr[1][2] == 6. ); // after move the original elments might not be the same + BOOST_REQUIRE( arr[1][2] == 6.0 ); // after move the original elments might not be the same BOOST_REQUIRE( A_base == arr.base() ); } @@ -97,178 +126,170 @@ BOOST_AUTO_TEST_CASE(array_reextent_moved_trivial_change_extents) { multi::array arr({2, 3}); BOOST_REQUIRE( num_elements(arr) == 6 ); - arr[1][2] = 6.; - BOOST_REQUIRE( arr[1][2] == 6. ); + arr[1][2] = 6.0; + BOOST_REQUIRE( arr[1][2] == 6.0 ); auto* const A_base = arr.base(); - arr = std::move(arr).reextent({4, 5}); + arr = std::move(arr).reextent({4, 5}); BOOST_REQUIRE( num_elements(arr)== 4L*5L ); - BOOST_REQUIRE( arr[1][2] != 6. ); // after move the original elments might not be the same + // BOOST_REQUIRE( arr[1][2] != 6.0 ); // after move the original elements might not be the same, but it is not 100% possible to check BOOST_REQUIRE( A_base != arr.base() ); } BOOST_AUTO_TEST_CASE(array_move_clear) { + multi::array const iarr; multi::array arr({2, 3}); - arr = multi::array(extensions(arr), 123.); - BOOST_REQUIRE( arr[1][2] == 123. ); + + arr = multi::array(extensions(arr), 123.0); + BOOST_REQUIRE( arr[1][2] == 123.0 ); arr.clear(); // clear(arr); BOOST_REQUIRE( num_elements(arr) == 0 ); BOOST_REQUIRE( size(arr) == 0 ); - arr.reextent({5, 4}, 66.); - BOOST_REQUIRE( arr[4][3] == 66. ); + arr.reextent({5, 4}, 66.0); + BOOST_REQUIRE( arr[4][3] == 66.0 ); + + BOOST_REQUIRE(iarr.is_empty()); } BOOST_AUTO_TEST_CASE(array_reextent_1d) { - multi::array arr(multi::extensions_t<1>{multi::iextension{10}}, 4.); + multi::array arr(multi::extensions_t<1>{multi::iextension{10}}, 4.0); BOOST_REQUIRE( size(arr) == 10 ); - BOOST_REQUIRE( arr[9] == 4. ); + BOOST_REQUIRE( arr[9] == 4.0 ); arr.reextent(multi::extensions_t<1>{multi::iextension{20}}); BOOST_REQUIRE( size(arr) == 20 ); - BOOST_REQUIRE( arr[9] == 4. ); -// BOOST_REQUIRE( arr[19] == 0. ); // impossible to know since it is sometimes 0. + BOOST_REQUIRE( arr[9] == 4.0 ); + // BOOST_REQUIRE( arr[19] == 0.0 ); // impossible to know since it is only sometimes 0.0 - arr.reextent( boost::multi::tuple(22) ); + arr.reextent(boost::multi::tuple(22)); BOOST_REQUIRE( size(arr) == 22 ); - BOOST_REQUIRE( arr[9] == 4. ); + BOOST_REQUIRE( arr[9] == 4.0 ); - arr.reextent( {23} ); + arr.reextent({23}); BOOST_REQUIRE( size(arr) == 23 ); - -#pragma warning(push) // NOLINT(clang-diagnostic-unknown-pragmas) -#pragma warning (disable:1478 1786) // NOLINT(clang-diagnostic-unknown-pragmas) -#pragma nv_diagnostic push // NOLINT(clang-diagnostic-unknown-pragmas) -#pragma nv_diag_suppress 1215,1216,1444,1445 // NOLINT(clang-diagnostic-unknown-pragmas) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -// arr.reextent( std::make_tuple(24) ); -// BOOST_REQUIRE( size(arr) == 24 ); -#pragma GCC diagnostic pop -#pragma nv_diagnostic pop // NOLINT(clang-diagnostic-unknown-pragmas) -#pragma warning(pop) // NOLINT(clang-diagnostic-unknown-pragmas) } -//inline void fff(boost::multi::detail::tuple /*t*/) {} // NOLINT(google-runtime-int) for testing - -//#pragma warning(push) // NOLINT(clang-diagnostic-unknown-pragmas) -//#pragma warning (disable:1478 1786) // NOLINT(clang-diagnostic-unknown-pragmas) -//#pragma diagnostic push // NOLINT(clang-diagnostic-unknown-pragmas) -//#pragma diag_suppress 1215,1216,1444,1445 // NOLINT(clang-diagnostic-unknown-pragmas) -//#pragma GCC diagnostic push -//#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -//BOOST_AUTO_TEST_CASE(tuple_implicit_test) { -// fff(1L); -// fff(1); -//} - -//BOOST_AUTO_TEST_CASE(tuple_conversion_deprecated) { -// boost::multi::tuple t{1, 2}; -// BOOST_REQUIRE( std::get<0>(t) == 1 ); -// BOOST_REQUIRE( std::get<1>(t) == 2 ); -//} -//#pragma GCC diagnostic pop -//#pragma diagnostic pop // NOLINT(clang-diagnostic-unknown-pragmas) -//#pragma warning(pop) // NOLINT(clang-diagnostic-unknown-pragmas) - BOOST_AUTO_TEST_CASE(tuple_decomposition) { - boost::multi::tuple tup{1, 2}; + boost::multi::tuple const tup{1, 2}; auto [t0, t1] = tup; BOOST_REQUIRE( t0 == 1 ); BOOST_REQUIRE( t1 == 2 ); } BOOST_AUTO_TEST_CASE(array_reextent_0D) { - multi::array arr({}, 4.); -// arr.reextent(arr.extensions()); // TODO(correaa) : fix unused for D = 0 - BOOST_REQUIRE( *arr.data_elements() == 4. ); + multi::array const arr({}, 4.0); + // arr.reextent(arr.extensions()); // TODO(correaa) : fix unused for D = 0 + BOOST_REQUIRE( *arr.data_elements() == 4.0 ); } BOOST_AUTO_TEST_CASE(array_reextent_1d_with_initialization) { - multi::array arr(multi::extensions_t<1>{multi::iextension{10}}, 4.); + multi::array arr(multi::extensions_t<1>{multi::iextension{10}}, 4.0); BOOST_REQUIRE( size(arr) == 10 ); - BOOST_REQUIRE( arr[9] == 4. ); + BOOST_REQUIRE( arr[9] == 4.0 ); - arr.reextent(multi::extensions_t<1>{multi::iextension{20}}, 8.); + arr.reextent(multi::extensions_t<1>{multi::iextension{20}}, 8.0); BOOST_REQUIRE( size(arr) == 20 ); - BOOST_REQUIRE( arr[9] == 4. ); - BOOST_REQUIRE( arr[19] == 8. ); + BOOST_REQUIRE( arr[9] == 4.0 ); + BOOST_REQUIRE( arr[19] == 8.0 ); } BOOST_AUTO_TEST_CASE(array_reextent_2d) { - multi::array arr({10, 20}, 4.); - BOOST_REQUIRE( arr[1][2] == 4. ); + multi::array arr({10, 20}, 4.0); + BOOST_REQUIRE( arr[1][2] == 4.0 ); arr.clear(); BOOST_REQUIRE( num_elements(arr) == 0 ); BOOST_REQUIRE( size(arr) == 0 ); - arr.reextent({20, 30}, 9.); + arr.reextent({20, 30}, 9.0); BOOST_REQUIRE( arr[1][2] = 9. ); - BOOST_REQUIRE( arr[11][22] = 9. ); + BOOST_REQUIRE( arr[11][22] = 9.0 ); +} + +BOOST_AUTO_TEST_CASE(array_reextent_2d_with_move) { + multi::array arr = { + {1, 2, 3}, + {4, 5, 6}, + }; + BOOST_REQUIRE( arr.size() == 2 ); + + arr = std::move(arr).reextent({3, 2}); + + BOOST_REQUIRE( arr.size() == 3 ); + BOOST_REQUIRE( arr[1][2] = 10 ); } BOOST_AUTO_TEST_CASE(array_reextent_2d_array) { - multi::array arr({10, 20}, 4.); - BOOST_REQUIRE( arr[1][2] == 4. ); + multi::array arr({10, 20}, 4.0); + BOOST_REQUIRE( arr[1][2] == 4.0 ); arr.clear(); BOOST_REQUIRE( num_elements(arr) == 0 ); BOOST_REQUIRE( size(arr) == 0 ); } -#pragma GCC diagnostic push -#pragma GCC diagnostic warning "-Wunknown-pragmas" -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif -template< class T, class U > +template constexpr auto comp_equal(T left, U right) noexcept -> bool { - using UT = std::make_unsigned_t; - using UU = std::make_unsigned_t; - if constexpr (std::is_signed_v == std::is_signed_v) { + using UT = std::make_unsigned_t; + using UU = std::make_unsigned_t; + if constexpr(std::is_signed_v == std::is_signed_v) { return left == right; - } else if constexpr (std::is_signed_v) { + } else if constexpr(std::is_signed_v) { return left < 0 ? false : static_cast(left) == right; } else { return right < 0 ? false : left == UU(right); } - #if not defined(__INTEL_COMPILER) and not defined(__NVCOMPILER) +#if !defined(__INTEL_COMPILER) && !defined(__NVCOMPILER) && !defined(_MSC_VER) __builtin_unreachable(); - #endif -} -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #pragma diagnostic pop - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop #endif -#pragma GCC diagnostic pop +} BOOST_AUTO_TEST_CASE(array_vector_size) { - std::vector vec(100); + std::vector const vec(100); // std::vector NOLINT(fuchsia-default-arguments-calls) { - // multi::array a( vec.size() ); // warning: sign-conversion - multi::array arr(static_cast(vec.size())); + // multi::array a( vec.size() ); // warning: sign-conversion + multi::array const arr(static_cast(vec.size())); BOOST_REQUIRE( comp_equal(arr.size(), vec.size()) ); } { - multi::array arr(multi::iextensions<1>(static_cast(vec.size()))); // warning: sign-conversion - // multi::array a(static_cast(v.size())); + multi::array const arr(multi::iextensions<1>(static_cast(vec.size()))); // warning: sign-conversion + // multi::array a(static_cast(v.size())); BOOST_REQUIRE( comp_equal(arr.size(), vec.size()) ); } } + +BOOST_AUTO_TEST_CASE(array_iota) { + multi::array const Aarr(10); + multi::array Barr(Aarr.extension().begin(), Aarr.extension().end()); + BOOST_REQUIRE( Barr[0] == 0 ); + BOOST_REQUIRE( Barr[1] == 1 ); + BOOST_REQUIRE( Barr[9] == 9 ); + + multi::array Carr(Aarr.extension()); + BOOST_REQUIRE( Carr[0] == 0 ); + BOOST_REQUIRE( Carr[1] == 1 ); + BOOST_REQUIRE( Carr[9] == 9 ); + + multi::array const Darr(Aarr.extensions()); + BOOST_REQUIRE( Darr.extensions() == Aarr.extensions() ); +} + +#ifndef __INTEL_COMPILER +BOOST_AUTO_TEST_CASE(extension_index_op) { + multi::array const Aarr({11, 13}); + auto Aext = Aarr.extensions(); + BOOST_REQUIRE( std::get<0>(Aext[3][5]) == 3 ); + BOOST_REQUIRE( std::get<1>(Aext[3][5]) == 5 ); + + for(int i = 0; i != 3; ++i) { + for(int j = 0; j != 5; ++j) { + auto [ip, jp] = Aext[i][j]; + BOOST_REQUIRE(ip == i); + BOOST_REQUIRE(jp == j); + } + } +} +#endif diff --git a/external_codes/boost_multi/multi/test/reinterpret_array_cast.cpp b/external_codes/boost_multi/multi/test/reinterpret_array_cast.cpp index 5a575dade5..7a076dd72b 100644 --- a/external_codes/boost_multi/multi/test/reinterpret_array_cast.cpp +++ b/external_codes/boost_multi/multi/test/reinterpret_array_cast.cpp @@ -1,28 +1,57 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2022 Alfredo A. Correa +// Copyright 2018-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi reinterpret array" -#include +#include -#include "multi/array.hpp" +#include +#include +#include -#include -#include +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_struct_to_dimension) { struct vec3 { - double x, y, z; + double x; + double y; + double z; }; multi::array arr(multi::extensions_t<1>{multi::iextension{100}}); - arr[8] = {1., 2., 3.}; - BOOST_REQUIRE( arr[8].y == 2. ); + arr[8] = {1.0, 2.0, 3.0}; + BOOST_REQUIRE( arr[8].y == 2.0 ); +#ifndef _MSC_VER // problems with MSVC 14.3 c++17 BOOST_REQUIRE( arr.reinterpret_array_cast(3)[8][1] == arr[8].y ); multi::array A2D = arr.reinterpret_array_cast(3); + + BOOST_REQUIRE( decltype(A2D)::dimensionality == decltype(arr)::dimensionality + 1 ); BOOST_REQUIRE( dimensionality(A2D) == dimensionality(arr) + 1 ); + BOOST_REQUIRE( size(A2D) == size(arr) ); BOOST_REQUIRE( A2D[8][1] == arr[8].y ); BOOST_REQUIRE( &A2D[8][1] != &arr[8].y ); @@ -30,36 +59,134 @@ BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_struct_to_dimension) { BOOST_REQUIRE( & arr[8].x == & arr.reinterpret_array_cast(3)[8][0] ); BOOST_REQUIRE( & arr[8].y == & arr.reinterpret_array_cast(3)[8][1] ); BOOST_REQUIRE( & arr[8].z == & arr.reinterpret_array_cast(3)[8][2] ); +#endif +} + +BOOST_AUTO_TEST_CASE(multi_lower_dimension) { + struct vec3 { + double x; + double y; + double z; + + // [[maybe_unused]] auto operator==(vec3 const& other) const -> bool { return x == other.x && y == other.y && z == other.z; } + }; + + multi::array arr = { + {0.0, 0.1, 0.2}, + {1.0, 1.1, 1.2}, + {2.0, 2.1, 2.2}, + {3.0, 3.1, 3.2}, + }; + { + BOOST_TEST( arr.size() == 4 ); + BOOST_TEST( arr.flatted().size() == 12 ); + BOOST_TEST( arr.flatted().strided(3).size() == 4 ); + BOOST_TEST( arr.flatted().strided(3).reinterpret_array_cast().size() == 4 ); + + auto&& arrvec3 = arr.flatted().strided(3).reinterpret_array_cast(); + + BOOST_TEST( arr.flatted().size() == arrvec3.size()*3 ); + BOOST_TEST( &arrvec3[2].x == &arr[2][0] ); + } +} + +BOOST_AUTO_TEST_CASE(multi_lower_dimension_2d) { + struct vec3 { + double x; + double y; + double z; + }; + + multi::array d2 = { + {0.0, 0.1, 0.2, 0.0, 0.1, 0.2, 0.0, 0.1, 0.2}, + {1.0, 1.1, 1.2, 1.0, 1.1, 1.2, 1.0, 1.1, 1.2}, + {2.0, 2.1, 2.2, 2.0, 2.1, 2.2, 2.0, 2.1, 2.2}, + {3.0, 3.1, 3.2, 3.0, 3.1, 3.2, 3.0, 3.1, 3.2}, + }; + + { + auto&& d2strided3 = d2.unrotated().strided(3).rotated(); + BOOST_TEST( d2strided3.size() == 4 ); + BOOST_TEST( d2strided3[0].size() == 3 ); + BOOST_TEST( &d2strided3[1][2] == &d2[1][6] ); + } + { + auto&& v2view = d2.unrotated().strided(3).rotated().reinterpret_array_cast(); + BOOST_TEST( v2view.size() == 4 ); + BOOST_TEST( v2view[0].size() == 3 ); + BOOST_TEST( &v2view[1][2].x == &d2[1][6] ); + } +} + +BOOST_AUTO_TEST_CASE(multi_lower_dimension_3d) { + struct vec3 { + double x; + double y; + double z; + }; + + multi::array d3({4, 15, 9}, 0.0); + + { + auto&& d3strided3 = d3.unrotated().strided(3).rotated(); + BOOST_TEST( d3strided3.size() == 4 ); + BOOST_TEST( d3strided3[0][0].size() == 3 ); + BOOST_TEST( &d3strided3[3][1][2] == &d3[3][1][6] ); + } + { + auto&& v3view = d3.unrotated().strided(3).rotated().reinterpret_array_cast(); + BOOST_TEST( v3view.size() == 4 ); + BOOST_TEST( v3view[0][0].size() == 3 ); + BOOST_TEST( &v3view[3][1][2].x == &d3[3][1][6] ); + BOOST_TEST( &v3view[3][1][2].y == &d3[3][1][7] ); + BOOST_TEST( &v3view[3][1][2].z == &d3[3][1][8] ); + } } BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_complex_to_real_extra_dimension) { using complex = std::complex; - multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, complex{1., 2.}); + multi::array arr(multi::extensions_t<1>{multi::iextension{100}}, complex{1.0, 2.0}); BOOST_REQUIRE( size(arr) == 100 ); - BOOST_REQUIRE( real(arr[0]) == 1. ); - BOOST_REQUIRE( imag(arr[0]) == 2. ); + { + complex const arr0 = arr[0]; + BOOST_TEST_REQUIRE( arr0.real() == 1.0 ); + BOOST_TEST_REQUIRE( arr0.imag() == 2.0 ); + } + + BOOST_TEST_REQUIRE( arr[0].real() == 1.0 ); + BOOST_TEST_REQUIRE( arr[0].imag() == 2.0 ); + + BOOST_TEST_REQUIRE( std::real(arr[0]) == 1.0 ); + BOOST_TEST_REQUIRE( std::imag(arr[0]) == 2.0 ); - BOOST_REQUIRE(( arr[0] == complex{1., 2.} )); + BOOST_TEST_REQUIRE( real(arr[0]) == 1.0 ); + BOOST_TEST_REQUIRE( imag(arr[0]) == 2.0 ); + BOOST_REQUIRE(( arr[0] == complex{1.0, 2.0} )); + +#ifndef _MSC_VER // problem with MVSC 14.3 c++17 multi::array arr2 = arr.reinterpret_array_cast(); BOOST_REQUIRE( dimensionality(arr2) == dimensionality(arr) ); - BOOST_REQUIRE( arr2[0] == 1 and arr2[1] == 1 ); + BOOST_REQUIRE( arr2[0] == 1 && arr2[1] == 1 ); multi::array arr3 = arr.reinterpret_array_cast(2); BOOST_REQUIRE(( sizes(arr3)==decltype(sizes(arr3)){100, 2} )); BOOST_REQUIRE( arr3[5][0] == real(arr[5]) ); BOOST_REQUIRE( arr3[5][1] == imag(arr[5]) ); +#endif } BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_tuple_as_extra_dimension) { using vector3 = std::array; -// using vector3 = std::tuple; // for tuples reinterpret_array_cast is implementation dependent!! vector3 v3d; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, cppcoreguidelines-avoid-c-arrays, hicpp-avoid-c-arrays, modernize-avoid-c-arrays): test BOOST_REQUIRE( &reinterpret_cast(v3d)[1] == &std::get<1>(v3d) ); + +#ifndef _MSC_VER // problem with MVSC 14.3 c++17 { multi::array arr(multi::extensions_t<1>{multi::iextension{10}}); BOOST_REQUIRE( &arr.reinterpret_array_cast(3)[2][1] == &std::get<1>(arr[2]) ); @@ -69,13 +196,19 @@ BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_tuple_as_extra_dimension) { BOOST_REQUIRE( &arr.reinterpret_array_cast(3)[5][7][2] == &std::get<2>(arr[5][7]) ); } { - multi::array const arr({4, 5}, vector3{{1., 2., 3.}} ); + multi::array const arr({ + 4, 5 + }, + vector3{{1.0, 2.0, 3.0}}); + BOOST_REQUIRE( arr.reinterpret_array_cast(3).dimensionality == 3 ); + BOOST_REQUIRE( decltype(arr.reinterpret_array_cast(3))::dimensionality == 3 ); BOOST_REQUIRE( dimensionality(arr.reinterpret_array_cast(3)) == 3 ); - BOOST_REQUIRE( arr.reinterpret_array_cast(3).num_elements() == arr.num_elements()*3 ); - BOOST_REQUIRE( arr.reinterpret_array_cast(3).size() == 4 ); - BOOST_REQUIRE( arr.reinterpret_array_cast(3)[0].size() == 5 ); - BOOST_REQUIRE( arr.reinterpret_array_cast(3)[0][0].size() == 3 ); + + BOOST_REQUIRE( arr.reinterpret_array_cast(3).num_elements() == arr.num_elements()*3 ); + BOOST_REQUIRE( arr.reinterpret_array_cast(3).size() == 4 ); + BOOST_REQUIRE( arr.reinterpret_array_cast(3)[0].size() == 5 ); + BOOST_REQUIRE( arr.reinterpret_array_cast(3)[0][0].size() == 3 ); BOOST_REQUIRE( &arr.reinterpret_array_cast(3)[2][3][0] == &std::get<0>(arr[2][3]) ); BOOST_REQUIRE( &arr.reinterpret_array_cast(3)[2][3][1] == &std::get<1>(arr[2][3]) ); BOOST_REQUIRE( &arr.reinterpret_array_cast(3)[2][3][2] == &std::get<2>(arr[2][3]) ); @@ -88,72 +221,96 @@ BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_tuple_as_extra_dimension) { auto arr3 = +arr.reinterpret_array_cast(3); BOOST_REQUIRE( arr3 == arr2 ); } +#endif } -template struct complex_dummy{T real; T imag;}; +template struct complex_dummy { + T real; + T imag; +}; BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast) { -{ - std::complex cee{1, 2}; - auto *ptr = reinterpret_cast*>(&cee); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - ptr->real = 11; - BOOST_REQUIRE(real(cee)==11); -} -{ - multi::array, 1> arr(multi::extensions_t<1>{multi::iextension{10}}); - std::iota( begin(arr), end(arr), 1.); - BOOST_REQUIRE( arr[8] == 9. ); - auto&& arr2 = arr.reinterpret_array_cast>(); - arr2[8].real = 1000.; - BOOST_REQUIRE( arr[8] == 1000. ); -} + { + std::complex cee{1.0, 2.0}; + + auto* ptr = reinterpret_cast*>(&cee); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + ptr->real = 11; + BOOST_REQUIRE(real(cee)==11); + } + { + multi::array, 1> arr(multi::extensions_t<1>{multi::iextension{10}}); + std::iota(begin(arr), end(arr), 1.0); + BOOST_REQUIRE( arr[8] == 9.0 ); + auto&& arr2 = arr.reinterpret_array_cast>(); + arr2[8].real = 1000.0; + BOOST_REQUIRE( arr[8] == 1000.0 ); + } } BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_realcomplex) { using complex = std::complex; -{ - complex cee{1, 2}; - auto *conjd_cee = reinterpret_cast*>(&cee); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) - (*conjd_cee)[0] = 11; - BOOST_REQUIRE( conjd_cee ); - BOOST_REQUIRE(real(cee)==11); -} -{ - complex cee{1, 2}; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test purposes - auto *ceePC = reinterpret_cast(&cee); - (*ceePC)[0] = 11; - BOOST_REQUIRE( ceePC ); - BOOST_REQUIRE(real(cee)==11); -} -{ - multi::array arr(multi::extensions_t<1>{multi::iextension{10}}); - auto&& arr2 = arr.reinterpret_array_cast(2); - arr2[8][0] = 1000.; - arr2[8][1] = 2000.; - BOOST_REQUIRE( arr[8] == std::complex(1000., 2000.) ); -} + { + complex cee{1.0, 2.0}; + auto* conjd_cee = reinterpret_cast*>(&cee); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + (*conjd_cee)[0] = 11; + BOOST_REQUIRE( conjd_cee ); + BOOST_REQUIRE( real(cee)==11 ); + } + { + complex cee{1, 2}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast, cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays): test purposes + auto* ceePC = reinterpret_cast(&cee); + (*ceePC)[0] = 11; + BOOST_REQUIRE( ceePC ); + BOOST_REQUIRE( real(cee)==11 ); + } +#ifndef _MSC_VER // problem with MSVC 14.3 c++17 + { + multi::array arr(multi::extensions_t<1>{multi::iextension{10}}); + + auto&& arr2 = arr.reinterpret_array_cast(2); + + arr2[8][0] = 1000.0; + arr2[8][1] = 2000.0; + + BOOST_REQUIRE(( arr[8] == std::complex{1000.0, 2000.0} )); + } +#endif } BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_pair_to_complex) { using complex = std::complex; using pair = std::pair; - multi::array arr({10, 10}, complex{3., 4.}); + multi::array arr({10, 10}, complex{3.0, 4.0}); - multi::array const& Aconst = arr; - auto&& A_block = Aconst({0, 5}, {0, 5}); + multi::array const& Aconst = arr; + auto&& A_block = Aconst({0, 5}, {0, 5}); - auto const& Apair_block = A_block.template reinterpret_array_cast(); // const is important // cppcheck 1.90 needs `template` to avoid internal bug + auto const& Apair_block = A_block.template reinterpret_array_cast(); // const is important // cppcheck 1.90 needs `template` to avoid internal bug BOOST_REQUIRE( &Apair_block[1][2] == static_cast(&arr[1][2]) ); +#ifndef _MSC_VER // problems with MSVC 14.3 c++17 auto&& Adoubles_block = A_block.reinterpret_array_cast(2); BOOST_REQUIRE( &Adoubles_block[1][2][0] == static_cast(&arr[1][2]) ); +#endif } BOOST_AUTO_TEST_CASE(multi_reinterpret_array_cast_pointer) { - multi::array arr({10, 10}, 5.); + multi::array arr({10, 10}, 5.0); auto&& Aconstcast = arr.reinterpret_array_cast(); BOOST_REQUIRE( &arr[0][0] == &Aconstcast[0][0] ); - static_assert( std::is_same{}, "!" ); + static_assert(std::is_same_v); +} + +BOOST_AUTO_TEST_CASE(const_array_cast) { + multi::array arr({10, 10}, 5.0); // NOLINT(misc-const-correctness) test const cast + + multi::array const& carr = arr; + + auto&& marr = carr.const_array_cast(); + + marr[1][1] = 6.0; + + BOOST_REQUIRE( carr[1][1] == 6.0 ); } diff --git a/external_codes/boost_multi/multi/test/reversed.cpp b/external_codes/boost_multi/multi/test/reversed.cpp index e604ab358e..5fd104aef2 100644 --- a/external_codes/boost_multi/multi/test/reversed.cpp +++ b/external_codes/boost_multi/multi/test/reversed.cpp @@ -1,10 +1,33 @@ // -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi reversed" -#include - -#include "multi/array.hpp" +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; diff --git a/external_codes/boost_multi/multi/test/rotated.cpp b/external_codes/boost_multi/multi/test/rotated.cpp index d7c07fe747..aa2a6521d7 100644 --- a/external_codes/boost_multi/multi/test/rotated.cpp +++ b/external_codes/boost_multi/multi/test/rotated.cpp @@ -1,12 +1,35 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4-*- -// Copyright 2021-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi rotate" -#include - -#include "multi/array.hpp" - -#include // for std::iota +// Copyright 2021-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include // for std::iota + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; @@ -45,25 +68,28 @@ BOOST_AUTO_TEST_CASE(multi_rotate_4d) { BOOST_AUTO_TEST_CASE(multi_rotate_4d_op) { multi::array original({14, 14, 7, 4}); - auto&& unrotd = (original.unrotated() ); + auto&& unrotd = (original.unrotated()); BOOST_REQUIRE(( sizes(unrotd) == decltype(sizes(unrotd)){4, 14, 14, 7} )); BOOST_REQUIRE( &original[0][1][2][3] == &unrotd[3][0][1][2] ); - auto&& unrotd2 = (original.unrotated().unrotated() ); + auto&& unrotd2 = (original.unrotated().unrotated()); BOOST_REQUIRE(( sizes(unrotd2) == decltype(sizes(unrotd2)){7, 4, 14, 14} )); BOOST_REQUIRE( &original[0][1][2][3] == &unrotd2[2][3][0][1] ); } BOOST_AUTO_TEST_CASE(multi_rotate_part1) { + // clang-format off std::array, 4> stdarr = {{ - {{ 0., 1., 2., 3., 4.}}, - {{ 5., 6., 7., 8., 9.}}, - {{10., 11., 12., 13., 14.}}, - {{15., 16., 17., 18., 19.}} + {{ 0.0, 1.0, 2.0, 3.0, 4.0}}, + {{ 5.0, 6.0, 7.0, 8.0, 9.0}}, + {{10.0, 11.0, 12.0, 13.0, 14.0}}, + {{15.0, 16.0, 17.0, 18.0, 19.0}}, }}; + // clang-format on + std::array, 4> stdarr2 = {}; - multi::array_ref arr (&stdarr [0][0], {4, 5}); // NOLINT(readability-container-data-pointer) test access + multi::array_ref arr(&stdarr[0][0], {4, 5}); // NOLINT(readability-container-data-pointer) test access multi::array_ref arr2(&stdarr2[0][0], {4, 5}); // NOLINT(readability-container-data-pointer) test access rotated(arr2) = rotated(arr); @@ -75,68 +101,67 @@ BOOST_AUTO_TEST_CASE(multi_rotate_part1) { } BOOST_AUTO_TEST_CASE(multi_rotate) { -{ - multi::array arr = { - {00, 01}, - {10, 11} - }; - BOOST_REQUIRE( arr[1][0] == 10 ); - BOOST_REQUIRE( (arr.rotated())[0][1] == 10 ); - BOOST_REQUIRE( & arr[1][0] == &(arr.rotated() )[0][1] ); - - BOOST_REQUIRE( arr.transposed()[0][1] == 10 ); - BOOST_REQUIRE( transposed(arr)[0][1] == 10 ); - BOOST_REQUIRE( (~arr)[0][1] == 10 ); - BOOST_REQUIRE( &arr[1][0] == &arr.transposed()[0][1] ); - - (arr.rotated())[0][1] = 100; - BOOST_REQUIRE( arr[1][0] == 100 ); -} -{ - multi::array arr({11, 13, 17}); - BOOST_REQUIRE( & arr[3][5][7] == & arr.transposed()[5][3][7] ); - BOOST_REQUIRE( & arr[3][5][7] == & transposed(arr) [5][3][7] ); - BOOST_REQUIRE( & arr[3][5][7] == & (~arr) [5][3][7] ); - BOOST_REQUIRE( & arr[3][5][7] == & arr[3].transposed()[7][5] ); - BOOST_REQUIRE( & arr[3][5][7] == & (~arr[3]) [7][5] ); - - BOOST_REQUIRE( & arr[3][5] == & (~arr)[5][3] ); - - BOOST_REQUIRE( & ~~arr == & arr ); - BOOST_REQUIRE( & (arr.rotated().rotated().rotated() ) == & arr ); - BOOST_REQUIRE( & arr == & (arr.rotated().rotated().rotated() ) ); - BOOST_REQUIRE( & (arr.rotated() ) != & arr ); - BOOST_REQUIRE( & (arr.unrotated().rotated()) == & arr ); - - std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0.1); - BOOST_REQUIRE( ~~arr == arr ); - BOOST_REQUIRE( arr.unrotated().rotated() == arr ); -} -{ - multi::array const arr = { - {00, 01}, - {10, 11} - }; - BOOST_REQUIRE( arr.rotated() [0][1] == 10 ); - BOOST_REQUIRE( &(arr.rotated())[1][0] == &arr[0][1] ); - BOOST_REQUIRE( &(~arr)[1][0] == &arr[0][1] ); -} + { + multi::array arr = { + {00.0, 01.0}, + {10.0, 11.0}, + }; + BOOST_REQUIRE( arr[1][0] == 10 ); + BOOST_REQUIRE( (arr.rotated())[0][1] == 10 ); + BOOST_REQUIRE( & arr[1][0] == &(arr.rotated() )[0][1] ); + + BOOST_REQUIRE( arr.transposed()[0][1] == 10 ); + BOOST_REQUIRE( transposed(arr)[0][1] == 10 ); + BOOST_REQUIRE( (~arr)[0][1] == 10 ); + BOOST_REQUIRE( &arr[1][0] == &arr.transposed()[0][1] ); + + (arr.rotated())[0][1] = 100; + BOOST_REQUIRE( arr[1][0] == 100 ); + } + { + multi::array arr({11, 13, 17}); + BOOST_REQUIRE( & arr[3][5][7] == & arr.transposed()[5][3][7] ); + BOOST_REQUIRE( & arr[3][5][7] == & transposed(arr) [5][3][7] ); + BOOST_REQUIRE( & arr[3][5][7] == & (~arr) [5][3][7] ); + BOOST_REQUIRE( & arr[3][5][7] == & arr[3].transposed()[7][5] ); + BOOST_REQUIRE( & arr[3][5][7] == & (~arr[3]) [7][5] ); + + BOOST_REQUIRE( & arr[3][5] == & (~arr)[5][3] ); + + BOOST_REQUIRE( & ~~arr == & arr ); + BOOST_REQUIRE( & (arr.rotated().rotated().rotated() ) == & arr ); + BOOST_REQUIRE( & arr() == & (arr.rotated().rotated().rotated() ) ); + BOOST_REQUIRE( & (arr.rotated() ) != & arr ); + BOOST_REQUIRE( & (arr.unrotated().rotated()) == & arr ); + + std::iota(arr.data_elements(), arr.data_elements() + arr.num_elements(), 0.1); + BOOST_REQUIRE( ~~arr == arr ); + BOOST_REQUIRE( arr.unrotated().rotated() == arr ); + } + { + multi::array const arr = { + {00, 01}, + {10, 11}, + }; + BOOST_REQUIRE( arr.rotated() [0][1] == 10 ); + BOOST_REQUIRE( &(arr.rotated())[1][0] == &arr[0][1] ); + BOOST_REQUIRE( &(~arr)[1][0] == &arr[0][1] ); + } } BOOST_AUTO_TEST_CASE(multi_transposed) { multi::array const arr0 = { - { 9., 24., 30., 9.}, - { 4., 10., 12., 7.}, - {14., 16., 36., 1.} + { 9.0, 24.0, 30.0, 9.0}, + { 4.0, 10.0, 12.0, 7.0}, + {14.0, 16.0, 36.0, 1.0}, }; - multi::array const arr1 = arr0.transposed(); + multi::array const arr1 = arr0.transposed(); multi::array const arr2 = ~arr0; BOOST_REQUIRE( arr1 == arr2 ); } BOOST_AUTO_TEST_CASE(miguel) { multi::array G2D({41, 35}); - auto const& G3D = G2D.rotated().partitioned(7).sliced(0, 3).unrotated(); + auto const& G3D = G2D.rotated().partitioned(7).sliced(0, 3).unrotated(); BOOST_REQUIRE( &G3D[0][0][0] == &G2D[0][0] ); } - diff --git a/external_codes/boost_multi/multi/test/scoped_allocator.cpp b/external_codes/boost_multi/multi/test/scoped_allocator.cpp new file mode 100644 index 0000000000..dd595ebc45 --- /dev/null +++ b/external_codes/boost_multi/multi/test/scoped_allocator.cpp @@ -0,0 +1,235 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +namespace multi = boost::multi; + +template +class allocator1 { + int* heap_ = nullptr; + + template friend class allocator1; + + public: + using value_type = T; + + allocator1() noexcept = delete; + // NOLINTNEXTLINE(runtime/explicit) + allocator1(int* heap) : heap_{heap} { assert(heap_); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) mimic memory resource syntax (pass pointer) + template allocator1(allocator1 const& other) noexcept : heap_{other.heap_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) allocator conversions are not explicit + + auto allocate(std::size_t n) { + if(n == 0) { + return static_cast(nullptr); + } + if(heap_ == nullptr) { + throw std::bad_alloc{}; + } // this cuts branches with UB (null deref) for the sanitizer + ++*heap_; + return static_cast(::operator new(n * sizeof(value_type))); + } + void deallocate(value_type* ptr, std::size_t n) noexcept { + if(n == 0) { + return; + } + --*heap_; + ::operator delete(ptr); + } + template + friend auto operator==(allocator1 const& self, allocator1 const& other) noexcept -> bool { return self.heap_ == other.heap_; } + template + friend auto operator!=(allocator1 const& self, allocator1 const& other) noexcept -> bool { return self.heap_ != other.heap_; } +}; + +template +auto operator!=(allocator1 const& self, allocator1 const& other) noexcept -> bool { return ! (self == other); } + +template +auto operator==(allocator1 const& self, allocator1 const& other) noexcept -> bool { return (self == other); } + +template +class allocator2 { + std::int64_t* heap_ = nullptr; + + template friend class allocator2; + + public: + using value_type = T; + + allocator2() noexcept = default; + // NOLINTNEXTLINE(runtime/explicit) + allocator2(std::int64_t* heap) : heap_{heap} { assert(heap_); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) mimic memory resource syntax (pass pointer) + template allocator2(allocator2 const& other) noexcept : heap_{other.heap_} {} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) allocator conversions are not explicit + + auto allocate(std::size_t n) { + if(n == 0) { + return static_cast(nullptr); + } + if(heap_ == nullptr) { + throw std::bad_alloc{}; + } // this cuts branches with UB (null deref) for the sanitizer + ++*heap_; + return static_cast(::operator new(n * sizeof(value_type))); + } + + void deallocate(value_type* ptr, std::size_t n) noexcept { + if(n == 0) { + return; + } + --*heap_; + ::operator delete(ptr); + } + + template + friend auto operator==(allocator2 const& self, allocator2 const& other) noexcept -> bool { return self.heap_ == other.heap_; } + template + friend auto operator!=(allocator2 const& self, allocator2 const& other) noexcept -> bool { return self.heap_ != other.heap_; } +}; + +template +auto operator!=(allocator2 const& self, allocator2 const& other) noexcept -> bool { + return ! (self == other); +} + +template +auto operator==(allocator2 const& self, allocator2 const& other) noexcept -> bool { + return (self == other); +} + +BOOST_AUTO_TEST_CASE(scoped_allocator_vector) { + std::int32_t heap1 = 0; + std::int64_t heap2 = 0; + + { + using InnerCont = std::vector>; + using OuterCont = + std::vector< + InnerCont, + std::scoped_allocator_adaptor< + allocator1, + allocator2 + > + > + ; + + // OuterCont cont({&heap1, &heap2}); // gives ambiguous construction in libc++ + OuterCont cont({&heap1, allocator2{&heap2}}); + + cont.resize(2); + + cont.resize(10); + + cont.back().resize(10); + cont.back().resize(100); + cont.back().resize(300); + + // these values are depdenent on the implementation of std::vector + #if !defined(_MSC_VER) + BOOST_TEST( heap1 == 1 ); + BOOST_TEST( heap2 == 1L ); + #endif + } + + BOOST_TEST( heap1 == 0 ); + BOOST_TEST( heap2 == 0 ); +} + +BOOST_AUTO_TEST_CASE(scoped_allocator_array_vector) { + std::int32_t heap1 = 0; + std::int64_t heap2 = 0; + + using InnerCont = std::vector>; + using OuterCont = multi::array, allocator2>>; + + { + OuterCont cont( + #ifdef _MSC_VER // problem with MSVC 14.3 c++17 + multi::extensions_t<2> + #endif + {3, 4}, + {&heap1, allocator2{&heap2}} // without allocator2<>{...} gives ambiguous construction in libc++ + ); + + cont[1][2].resize(10); + cont[1][2].resize(100); + cont[1][2].resize(200); + + // these values are depdenent on the implementation of std::vector + #if !defined(_MSC_VER) + BOOST_TEST( heap1 == 1 ); + BOOST_TEST( heap2 == 1L ); + #endif + } +} + +// vvv this cases confuse gcc (and MSVC?) +// BOOST_AUTO_TEST_CASE(scoped_allocator_array_vector_auto) { +// std::int32_t heap1 = 0; +// std::int64_t heap2 = 0; + +// using InnerCont = std::vector>; +// using OuterCont = multi::array, allocator2<>>>; + +// { +// OuterCont cont({3, 4}, {&heap1, allocator2<>{&heap2}}); // without allocator2<>{...} gives ambiguous construction in libc++ + +// cont[1][2].resize( 10); +// cont[1][2].resize(100); +// cont[1][2].resize(200); + +// BOOST_TEST( heap1 == 1 ); +// // these values are depdenent on the implementation of std::vector +// #if !defined(_MSC_VER) +// BOOST_TEST( heap2 == 1L ); +// #endif +// } +// } + +// BOOST_AUTO_TEST_CASE(scoped_allocator_array_array_auto) { +// std::int32_t heap1 = 0; +// std::int64_t heap2 = 0; + +// using InnerCont = multi::array>; +// using OuterCont = multi::array, allocator2<>>>; + +// { +// OuterCont cont({3, 4}, {&heap1, allocator2<>{&heap2}}); // without allocator2<>{...} gives ambiguous construction in libc++ + +// cont[1][2].reextent({ 10, 10}); +// cont[1][2].reextent({100, 100}); +// cont[1][2].reextent({200, 200}); + +// BOOST_TEST( heap1 == 1 ); +// BOOST_TEST( heap2 == 1L ); +// } +// } diff --git a/external_codes/boost_multi/multi/test/select_column.cpp b/external_codes/boost_multi/multi/test/select_column.cpp index 7a51d401c9..ee99829ae8 100644 --- a/external_codes/boost_multi/multi/test/select_column.cpp +++ b/external_codes/boost_multi/multi/test/select_column.cpp @@ -1,28 +1,54 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2018-2021 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi select range" -#include - -#include "multi/array.hpp" +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_array_range_section_1D) { - multi::array arr = {00., 01., 02.}; (void)arr; + multi::array arr = {0, 10, 20}; (void)arr; BOOST_REQUIRE( arr == arr(multi::ALL) ); BOOST_REQUIRE( size(arr( 1 <= multi::ALL )) == 2 ); - BOOST_REQUIRE( arr( 1 <= multi::ALL )[0] == 1. ); + BOOST_REQUIRE( arr( 1 <= multi::ALL )[0] == 10 ); BOOST_REQUIRE( size(arr( multi::ALL < 2 )) == 2 ); - BOOST_REQUIRE( arr( multi::ALL < 2 )[1] == 1. ); + BOOST_REQUIRE( arr( multi::ALL < 2 )[1] == 10 ); } BOOST_AUTO_TEST_CASE(multi_array_range_section_part1) { multi::array arr = { - {00., 01., 02.}, - {10., 11., 12.}, - {20., 21., 22.}, - {30., 31., 32.}, + {00.0, 01.0, 02.0}, + {10.0, 11.0, 12.0}, + {20.0, 21.0, 22.0}, + {30.0, 31.0, 32.0}, }; using multi::_; @@ -36,30 +62,30 @@ BOOST_AUTO_TEST_CASE(multi_array_range_section_part1) { BOOST_REQUIRE( size( arr( multi::ALL , 2) ) == 4 ); BOOST_REQUIRE( size( arr( multi::ALL < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( 1 <= multi::ALL , 2) ) == 3 ); - BOOST_REQUIRE( size( arr( 1 <= multi::ALL < 3 , 2) ) == 2 ); + BOOST_REQUIRE( size( arr( 1 <= multi::ALL < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) BOOST_REQUIRE( size( arr( multi::_ , 2) ) == 4 ); BOOST_REQUIRE( size( arr( multi::_ < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( 1 <= multi::_ , 2) ) == 3 ); - BOOST_REQUIRE( size( arr( 1 <= multi::_ < 3 , 2) ) == 2 ); + BOOST_REQUIRE( size( arr( 1 <= multi::_ < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) BOOST_REQUIRE( size( arr( _ , 2) ) == 4 ); BOOST_REQUIRE( size( arr( _ < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( 1 <= _ , 2) ) == 3 ); - BOOST_REQUIRE( size( arr( 1 <= _ < 3 , 2) ) == 2 ); + BOOST_REQUIRE( size( arr( 1 <= _ < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) } BOOST_AUTO_TEST_CASE(multi_array_range_section_part2) { - multi::array arr = { - {00., 01., 02.}, - {10., 11., 12.}, - {20., 21., 22.}, - {30., 31., 32.}, + multi::array arr = { + { 0, 10, 20}, + {100, 110, 120}, + {200, 210, 220}, + {300, 310, 320}, }; BOOST_REQUIRE( size( arr(arr.extension(), 2) ) == size(arr) ); - auto&& col2( arr(arr.extension(), 2) ); // select column #2 + auto&& col2( arr(arr.extension(), 2) ); // select column #2 // same as arr(extesion(arr), 2) // same as arr(arr.extension(0), 2); // same as rotated(arr)[2]; @@ -69,9 +95,9 @@ BOOST_AUTO_TEST_CASE(multi_array_range_section_part2) { BOOST_REQUIRE( size(col2) == size(arr) ); BOOST_REQUIRE( col2.size() == size(arr) ); BOOST_REQUIRE( col2.stride() == 3 ); - BOOST_REQUIRE( col2[0] == 02. ); - BOOST_REQUIRE( col2[1] == 12. ); - BOOST_REQUIRE(( col2 == multi::array{02., 12., 22., 32.} )); + BOOST_REQUIRE( col2[0] == 20 ); + BOOST_REQUIRE( col2[1] == 120 ); + BOOST_REQUIRE(( col2 == multi::array{20, 120, 220, 320} )); BOOST_REQUIRE(( col2 == multi::array(rotated(arr)[2]) )); BOOST_REQUIRE(( col2 == rotated(arr)[2] )); BOOST_REQUIRE(( col2 == arr(arr.extension(), 2) )); @@ -79,10 +105,10 @@ BOOST_AUTO_TEST_CASE(multi_array_range_section_part2) { BOOST_AUTO_TEST_CASE(multi_array_range_section_syntax) { multi::array arr = { - {00., 01., 02.}, - {10., 11., 12.}, - {20., 21., 22.}, - {30., 31., 32.}, + {00.0, 01.0, 02.0}, + {10.0, 11.0, 12.0}, + {20.0, 21.0, 22.0}, + {30.0, 31.0, 32.0}, }; using multi::_; @@ -103,12 +129,6 @@ BOOST_AUTO_TEST_CASE(multi_array_range_section_syntax) { BOOST_REQUIRE( size( arr( V , 2) ) == size(arr) ); -// using multi::A; -// BOOST_REQUIRE( size( arr( arr , 2) ) == size(arr) ); -// BOOST_REQUIRE( size( arr( arr , 2) ) == size(arr) ); - -// BOOST_REQUIRE( size( arr( arr , 2) ) == size(arr) ); - BOOST_REQUIRE( size( arr( _ < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( *_ < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( U < 2 , 2) ) == 2 ); @@ -117,11 +137,10 @@ BOOST_AUTO_TEST_CASE(multi_array_range_section_syntax) { BOOST_REQUIRE( size( arr( 1 <= *_ , 2) ) == 3 ); BOOST_REQUIRE( size( arr( 1 <= U , 2) ) == 3 ); - BOOST_REQUIRE( size( arr( 1 <= _ < 3 , 2) ) == 2 ); - BOOST_REQUIRE( size( arr( 1 <= *_ < 3 , 2) ) == 2 ); - BOOST_REQUIRE( size( arr( 1 <= U < 3 , 2) ) == 2 ); + BOOST_REQUIRE( size( arr( 1 <= _ < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) + BOOST_REQUIRE( size( arr( 1 <= *_ < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) + BOOST_REQUIRE( size( arr( 1 <= U < 3 , 2) ) == 2 ); // NOLINT(bugprone-chained-comparison) BOOST_REQUIRE( size( arr( *_ < 2 , 2) ) == 2 ); BOOST_REQUIRE( size( arr( U < 2 , 2) ) == 2 ); } - diff --git a/external_codes/boost_multi/multi/test/sliced.cpp b/external_codes/boost_multi/multi/test/sliced.cpp index 348677c4c7..43da678541 100644 --- a/external_codes/boost_multi/multi/test/sliced.cpp +++ b/external_codes/boost_multi/multi/test/sliced.cpp @@ -1,26 +1,56 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2021-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi slice" -#include - -#include "multi/array.hpp" - -#include // std::iota +// Copyright 2021-2023 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include // std::iota + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +// # pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +// # pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_array_sliced_empty) { - multi::array arr({0, 0}, 99.); + multi::array const arr({0, 0}, 99.0); BOOST_REQUIRE( arr.sliced(0, 0).is_empty() ); - BOOST_REQUIRE( arr.sliced(1, 1).is_empty() ); + // BOOST_REQUIRE( arr.sliced(1, 1).is_empty() ); // this results in offsetting nullptr } BOOST_AUTO_TEST_CASE(multi_array_sliced) { - multi::array arr({10, 20, 30, 40}, 99.); - std::iota(arr.elements().begin(), arr.elements().end(), 0.); + multi::array arr({10, 20, 30, 40}, 99); + std::iota(arr.elements().begin(), arr.elements().end(), 0); - static_assert( decltype( arr.sliced(0, 5) )::rank_v == decltype(arr)::rank_v , "!"); //NOLINT(misc-redundant-expression) + static_assert( decltype( arr.sliced(0, 5) )::rank::value == 4); + static_assert( decltype( arr.sliced(0, 5) )::rank{} == 4); + static_assert( decltype( arr.sliced(0, 5) )::rank_v == 4); BOOST_REQUIRE( arr.sliced( 0, 5)[1][2][3][4] == arr[1][2][3][4] ); BOOST_REQUIRE( &arr.sliced( 0, 5)[1][2][3][4] == &arr[1][2][3][4] ); @@ -47,46 +77,46 @@ BOOST_AUTO_TEST_CASE(multi_array_sliced) { } BOOST_AUTO_TEST_CASE(multi_array_stride) { - multi::array arr = { - { 1., 2., 3., 4.}, - { 5., 6., 7., 8.}, - { 9., 10., 11., 12.}, - {13., 14., 15., 16.}, + multi::array arr = { + { 10, 20, 30, 40}, + { 50, 60, 70, 80}, + { 90, 100, 110, 120}, + {130, 140, 150, 160}, }; BOOST_REQUIRE(( - arr.strided(2) == multi::array{ - { 1., 2., 3., 4.}, - { 9., 10., 11., 12.}, + arr.strided(2) == multi::array{ + { 10, 20, 30, 40}, + { 90, 100, 110, 120}, } )); } -BOOST_AUTO_TEST_CASE(take) { - multi::array arr = { - { 1., 2., 3., 4.}, - { 5., 6., 7., 8.}, - { 9., 10., 11., 12.}, - {13., 14., 15., 16.}, +BOOST_AUTO_TEST_CASE(multi_array_take) { + multi::array arr = { + { 10, 20, 30, 40}, + { 50, 60, 70, 80}, + { 90, 100, 110, 120}, + {130, 140, 150, 160}, }; BOOST_REQUIRE(( - arr.take(2) == multi::array{ - { 1., 2., 3., 4.}, - { 5., 6., 7., 8.}, + arr.taked(2) == multi::array{ + { 10, 20, 30, 40}, + { 50, 60, 70, 80}, } )); } BOOST_AUTO_TEST_CASE(drop) { multi::array arr = { - { 1., 2., 3., 4.}, - { 5., 6., 7., 8.}, - { 9., 10., 11., 12.}, - {13., 14., 15., 16.}, + { 10, 20, 30, 40}, + { 50, 60, 70, 80}, + { 90, 100, 110, 120}, + {130, 140, 150, 160}, }; BOOST_REQUIRE(( - arr.drop(2) == multi::array{ - { 9., 10., 11., 12.}, - {13., 14., 15., 16.}, + arr.dropped(2) == multi::array{ + { 90, 100, 110, 120}, + {130, 140, 150, 160}, } )); } diff --git a/external_codes/boost_multi/multi/test/sort.cpp b/external_codes/boost_multi/multi/test/sort.cpp index 3f7785d964..f5df9537ed 100644 --- a/external_codes/boost_multi/multi/test/sort.cpp +++ b/external_codes/boost_multi/multi/test/sort.cpp @@ -1,73 +1,224 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "Unit Tests for Multi sort" -#include +#include -#include "multi/array.hpp" +#include // for std::stable_sort +#include +#include -#include // stable_sort -#include +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; +BOOST_AUTO_TEST_CASE(array_1D_partial_order_syntax) { + multi::array const tt = {1, 1, 1}; + multi::array const uu = {2, 2, 2}; + + BOOST_REQUIRE( tt < uu ); + BOOST_REQUIRE( ! (tt > uu) ); + BOOST_REQUIRE( tt <= uu ); + BOOST_REQUIRE( ! (tt >= uu) ); + BOOST_REQUIRE( ! (tt == uu) ); + BOOST_REQUIRE( (tt != uu) ); + BOOST_REQUIRE( ! (uu < tt) ); + BOOST_REQUIRE( (uu > tt) ); + BOOST_REQUIRE( ! (uu <= tt) ); + BOOST_REQUIRE( (uu >= tt) ); +} + +#if defined(__cpp_lib_ranges) +BOOST_AUTO_TEST_CASE(sort_2D) { + multi::array A = { + {3, 3, 3}, + {2, 2, 2}, + {1, 1, 1}, + }; + BOOST_REQUIRE(! std::ranges::is_sorted(A)); + + std::ranges::sort(A); + + BOOST_REQUIRE( std::ranges::is_sorted(A)); + + static_assert(std::permutable>); +} + +BOOST_AUTO_TEST_CASE(sort_strings) { + auto A = multi::array{ + {'S', 'e', 'a', 'n', ' ', ' '}, + {'A', 'l', 'e', 'x', ' ', ' '}, + {'B', 'j', 'a', 'r', 'n', 'e'}, + }; + BOOST_REQUIRE(! std::ranges::is_sorted(A)); + + std::ranges::sort(A); + + BOOST_REQUIRE( std::ranges::is_sorted(A)); + + BOOST_REQUIRE(( + A == multi::array{ + {'A', 'l', 'e', 'x', ' ', ' '}, + {'B', 'j', 'a', 'r', 'n', 'e' }, + {'S', 'e', 'a', 'n', ' ', ' '}, + } + )); + + std::ranges::sort(~A); + BOOST_REQUIRE( std::ranges::is_sorted(~A)); + + static_assert(std::permutable>); +} +#endif + BOOST_AUTO_TEST_CASE(multi_array_stable_sort) { - std::vector vec = {1., 2., 3.}; + std::vector vec = {1.0, 2.0, 3.0}; // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( std::is_sorted(begin(vec), end(vec)) ); multi::array d2D = { - {150, 16, 17, 18, 19}, - { 30, 1, 2, 3, 4}, - {100, 11, 12, 13, 14}, - { 50, 6, 7, 8, 9} + {150.0, 16.0, 17.0, 18.0, 19.0}, + { 30.0, 1.0, 2.0, 3.0, 4.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, }; - BOOST_REQUIRE( not std::is_sorted(begin(d2D), end(d2D) ) ); + BOOST_REQUIRE( ! std::is_sorted(begin(d2D), end(d2D) ) ); - std::stable_sort( begin(d2D), end(d2D) ); + std::stable_sort(begin(d2D), end(d2D)); BOOST_REQUIRE( std::is_sorted( begin(d2D), end(d2D) ) ); BOOST_REQUIRE(( d2D == decltype(d2D){ - {30, 1, 2, 3, 4}, - {50, 6, 7, 8, 9}, - {100, 11, 12, 13, 14}, - {150, 16, 17, 18, 19} + { 30.0, 1.0, 2.0, 3.0, 4.0}, + { 50.0, 6.0, 7.0, 8.0, 9.0}, + {100.0, 11.0, 12.0, 13.0, 14.0}, + {150.0, 16.0, 17.0, 18.0, 19.0}, } )); - BOOST_REQUIRE( not std::is_sorted( begin(d2D.rotated()), end(d2D.rotated()) ) ); + BOOST_REQUIRE( ! std::is_sorted( begin(d2D.rotated()), end(d2D.rotated()) ) ); - std::stable_sort( begin(d2D.rotated()), end(d2D.rotated()) ); + std::stable_sort(begin(d2D.rotated()), end(d2D.rotated())); BOOST_REQUIRE( std::is_sorted( begin(d2D.rotated()), end(d2D.rotated()) ) ); BOOST_REQUIRE( std::is_sorted( begin(d2D ), end(d2D ) ) ); BOOST_REQUIRE(( d2D == decltype(d2D){ - {1, 2, 3, 4, 30}, - {6, 7, 8, 9, 50}, - {11, 12, 13, 14, 100}, - {16, 17, 18, 19, 150} + { 1.0, 2.0, 3.0, 4.0, 30.0}, + { 6.0, 7.0, 8.0, 9.0, 50.0}, + {11.0, 12.0, 13.0, 14.0, 100.0}, + {16.0, 17.0, 18.0, 19.0, 150.0}, } )); } BOOST_AUTO_TEST_CASE(multi_array_ref_stable_sort) { - std::vector vec = {1., 2., 3.}; + std::vector vec = {1.0, 2.0, 3.0}; // NOLINT(fuchsia-default-arguments-calls) BOOST_REQUIRE( std::is_sorted(begin(vec), end(vec)) ); + // clang-format off std::array, 4> d2D {{ - {{150, 16, 17, 18, 19}}, - {{ 30, 1, 2, 3, 4}}, - {{100, 11, 12, 13, 14}}, - {{ 50, 6, 7, 8, 9}} + {{150.0, 16.0, 17.0, 18.0, 19.0}}, + {{ 30.0, 1.0, 2.0, 3.0, 4.0}}, + {{100.0, 11.0, 12.0, 13.0, 14.0}}, + {{ 50.0, 6.0, 7.0, 8.0, 9.0}} }}; + // clang-format on + auto&& d2D_ref = *multi::array_ptr(&d2D[0][0], {4, 5}); // NOLINT(readability-container-data-pointer) test access - BOOST_REQUIRE( not std::is_sorted(begin(d2D_ref), end(d2D_ref) ) ); - std::stable_sort( begin(d2D_ref), end(d2D_ref) ); + BOOST_REQUIRE( ! std::is_sorted(begin(d2D_ref), end(d2D_ref) ) ); + std::stable_sort(begin(d2D_ref), end(d2D_ref)); BOOST_REQUIRE( std::is_sorted( begin(d2D_ref), end(d2D_ref) ) ); - BOOST_REQUIRE( not std::is_sorted( begin(d2D_ref.rotated()), end(d2D_ref.rotated()) ) ); - std::stable_sort( begin(d2D_ref.rotated()), end(d2D_ref.rotated()) ); + BOOST_REQUIRE( ! std::is_sorted( begin(d2D_ref.rotated()), end(d2D_ref.rotated()) ) ); + std::stable_sort(begin(d2D_ref.rotated()), end(d2D_ref.rotated())); BOOST_REQUIRE( std::is_sorted( begin(d2D_ref.rotated()), end(d2D_ref.rotated()) ) ); } + +BOOST_AUTO_TEST_CASE(lexicographical_compare) { + multi::array const name1 = {'a', 'b', 'c'}; + multi::array const name2 = {'a', 'c', 'c'}; + BOOST_REQUIRE(name1 != name2 ); + BOOST_REQUIRE(name1 < name2); + BOOST_REQUIRE(name1 <= name2); + BOOST_REQUIRE(!(name1 > name2)); + BOOST_REQUIRE(!(name1 > name2)); +} + +BOOST_AUTO_TEST_CASE(lexicographical_compare_offset) { + multi::array const name1 = {'a', 'b', 'c'}; + multi::array name2({{1, 4}}, '\0'); + + BOOST_REQUIRE( name2.size() == 3 ); + BOOST_REQUIRE(( name2.extension() == multi::extension_t{1, 4} )); + BOOST_REQUIRE(( name2.extension() == multi::extension_t{multi::index{1}, multi::index{4}} )); + + // BOOST_REQUIRE(( name2.extension() == multi::extension_t{1L, 4L} )); + + BOOST_REQUIRE(( name2.extension() == multi::extension_t<>{1, 4} )); + // BOOST_REQUIRE(( name2.extension() == multi::extension_t{1 , 4 } )); TODO(correaa) solve ambiguity + + name2[1] = 'a'; + name2[2] = 'b'; + name2[3] = 'c'; + + BOOST_REQUIRE( name2 != name1 ); + BOOST_REQUIRE(!(name2 == name1)); + + BOOST_REQUIRE( name2 < name1 ); + BOOST_REQUIRE( name2 <= name1 ); + + BOOST_REQUIRE(!(name2 > name1)); + BOOST_REQUIRE(!(name2 >= name1)); + + // BOOST_REQUIRE(!(name1 > name2)); + // BOOST_REQUIRE(!(name1 > name2)); +} + +BOOST_AUTO_TEST_CASE(lexicographical_compare_offset_2d) { + multi::array const name1 = {{'a', 'b'}, {'b', 'c'}, {'c', 'd'}}; + multi::array name2({{1, 4}, {0, 2}}, '\0'); + + BOOST_REQUIRE( name2.size() == 3 ); + BOOST_REQUIRE(( name2.extension() == multi::extension_t{1, 4} )); + BOOST_REQUIRE(( name2.extension() == multi::extension_t<>{1, 4} )); + // BOOST_REQUIRE(( name2.extension() == multi::extension_t{1 , 4 } )); TODO(correaa) solve ambiguity + + name2[1][0] = 'a'; name2[1][1] = 'a'; + name2[2][0] = 'b'; name2[2][1] = 'a'; + name2[3][0] = 'c'; name2[3][1] = 'a'; + + BOOST_REQUIRE( name2 != name1 ); + BOOST_REQUIRE(!(name2 == name1)); + + BOOST_REQUIRE( name2 < name1 ); + BOOST_REQUIRE( name2 <= name1 ); + + // BOOST_REQUIRE(!(name2 > name1)); + // BOOST_REQUIRE(!(name2 >= name1)); + + BOOST_REQUIRE( name1 > name2 ); + BOOST_REQUIRE(!(name1 < name2)); +} diff --git a/external_codes/boost_multi/multi/test/static_array_cast.cpp b/external_codes/boost_multi/multi/test/static_array_cast.cpp index 133f839fd7..254d7a09b2 100644 --- a/external_codes/boost_multi/multi/test/static_array_cast.cpp +++ b/external_codes/boost_multi/multi/test/static_array_cast.cpp @@ -1,13 +1,34 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa - -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi static array cast" -#include +// Copyright 2019-2024 Alfredo A. Correa +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include +#include // TODO(correaa) remove in c++20 + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif -#include "multi/array.hpp" -#include "multi/config/NO_UNIQUE_ADDRESS.hpp" +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif -#include +#include namespace multi = boost::multi; @@ -15,26 +36,30 @@ template class involuter; template class involuted { - Ref r_; - MULTI_NO_UNIQUE_ADDRESS Involution f_; + Ref r_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + BOOST_MULTI_NO_UNIQUE_ADDRESS Involution f_; // TODO(correaa) put nounique members first? public: using decay_type = std::decay_t()(std::declval()))>; - constexpr involuted(Ref ref, Involution fun) : r_{std::forward(ref)}, f_{fun} {} - constexpr explicit involuted(Ref ref) : r_{std::forward(ref)}, f_{} {} - involuted(involuted const&) = default; + constexpr involuted(Ref ref, Involution fun) : r_{ref}, f_{fun} {} + constexpr explicit involuted(Ref ref) : r_{ref}, f_{} {} + + involuted(involuted const&) = default; involuted(involuted&&) noexcept = default; + constexpr auto operator=(involuted const& other) = delete; + ~involuted() = default; - // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions): simulates a reference - constexpr operator decay_type() const& {return f_(r_);} - // NOLINTNEXTLINE(google-runtime-operator,fuchsia-overloaded-operator): simulates reference - constexpr auto operator&() && -> decltype(auto) {return involuter()), Involution>{&r_, f_};} // NOLINT(runtime/operator) + + constexpr operator decay_type() const& noexcept { return f_(r_); } // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) // NOSONAR(cpp:S1709) simulates a reference // NOLINTNEXTLINE(fuchsia-trailing-return,-warnings-as-errors): trailing return helps reading - template constexpr auto operator=(DecayType&& other) & -> involuted& {r_ = f_(std::forward(other)); return *this;} + template constexpr auto operator=(DecayType&& other) & -> involuted& { + r_ = f_(std::forward(other)); + return *this; + } // NOLINTNEXTLINE(fuchsia-trailing-return): trailing return helps reading - constexpr auto operator=(involuted&& other)& noexcept -> involuted& = default; + constexpr auto operator=(involuted&& other) & noexcept -> involuted& = default; friend auto operator==(involuted const& self, involuted const& other) -> bool { assert(self.f_ == other.f_); @@ -48,16 +73,14 @@ class involuted { template class involuter { - It it_; - MULTI_NO_UNIQUE_ADDRESS F f_; + It it_; + BOOST_MULTI_NO_UNIQUE_ADDRESS F f_; template friend class involuter; -// template{}, int> =0> -// static constexpr auto implicit_cast(From&& f) {return static_cast(f);} public: - using pointer = involuter::pointer, F>; - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; + using pointer = involuter::pointer, F>; + using element_type = typename std::pointer_traits::element_type; + using difference_type = typename std::pointer_traits::difference_type; template using rebind = involuter::template rebind, F>; using reference = involuted::reference, F>; @@ -65,96 +88,113 @@ class involuter { using iterator_category = typename std::iterator_traits::iterator_category; explicit constexpr involuter(It it) : it_{std::move(it)}, f_{} {} // NOLINT(readability-identifier-length) clang-tidy 14 bug constexpr involuter(It it, F fun) : it_{std::move(it)}, f_{std::move(fun)} {} -// involuter(involuter const& other) = default; + // NOLINTNEXTLINE(google-explicit-constructor, hicpp-explicit-conversions): this is needed to make involuter implicitly convertible to involuter - template constexpr involuter(involuter const& other) : it_{multi::implicit_cast(other.it_)}, f_{other.f_} {} -// auto operator=(involuter const& other) -> involuter& = default; - constexpr auto operator*() const {return reference{*it_, f_};} - constexpr auto operator==(involuter const& other) const {return it_ == other.it_;} - constexpr auto operator!=(involuter const& other) const {return it_ != other.it_;} - constexpr auto operator+=(typename involuter::difference_type n) -> decltype(auto) {it_+=n; return *this;} - constexpr auto operator+ (typename involuter::difference_type n) const {return involuter{it_+n, f_};} - constexpr auto operator- (typename involuter::difference_type n) const {return involuter{it_-n, f_};} - constexpr auto operator-(involuter const& other) const {return it_ - other.it_;} - constexpr auto operator->() const {return pointer{&*it_, f_};} -// ~involuter() = default; - constexpr auto operator[](typename involuter::difference_type n) const {return reference{*(it_ + n), f_};} + template constexpr involuter(involuter const& other) : it_{multi::detail::implicit_cast(other.it_)}, f_{other.f_} {} // NOSONAR(cpp:S1709) + + constexpr auto operator*() const { return reference{*it_, f_}; } + constexpr auto operator->() const { return pointer{&*it_, f_}; } + + constexpr auto operator==(involuter const& other) const { return it_ == other.it_; } + constexpr auto operator!=(involuter const& other) const { return it_ != other.it_; } + constexpr auto operator<(involuter const& other) const { return it_ < other.it_; } + + constexpr auto operator+=(typename involuter::difference_type n) -> decltype(auto) { + it_ += n; + return *this; + } + constexpr auto operator+(typename involuter::difference_type n) const { return involuter{it_ + n, f_}; } + constexpr auto operator-(typename involuter::difference_type n) const { return involuter{it_ - n, f_}; } + constexpr auto operator-(involuter const& other) const { return it_ - other.it_; } + + constexpr auto operator[](typename involuter::difference_type n) const { return reference{*(it_ + n), f_}; } }; #if defined(__cpp_deduction_guides) -template involuted(T&&, F)->involuted; +template involuted(T&&, F) -> involuted; #endif template using negated = involuted>; -template using negater = involuter>; +template using negater = involuter>; BOOST_AUTO_TEST_CASE(multi_array_involution) { double doub = 5; auto&& cee = involuted>{doub}; - BOOST_REQUIRE( cee == -5. ); + BOOST_REQUIRE( cee == -5.0 ); cee = 10.; - BOOST_REQUIRE( doub = -10. ); + BOOST_REQUIRE( doub = -10.0 ); - auto m5 = involuted>(5.); - BOOST_REQUIRE( m5 == -5. ); + auto m5 = involuted>(5.0); + BOOST_REQUIRE( m5 == -5.0 ); } BOOST_AUTO_TEST_CASE(static_array_cast) { - multi::static_array arr = { 0., 1., 2., 3., 4.}; + multi::static_array arr = {0.0, 1.0, 2.0, 3.0, 4.0}; + auto&& ref = arr.static_array_cast(); - BOOST_REQUIRE( &ref[2] == &arr [2] ); - BOOST_REQUIRE( &arr [2] == &ref[2] ); + + BOOST_REQUIRE( &ref[2] == &arr[2] ); + BOOST_REQUIRE( &arr[2] == &ref[2] ); BOOST_REQUIRE( std::equal(begin(ref), end(ref), begin(arr), end(arr)) ); - BOOST_REQUIRE( ref == arr ); + + BOOST_REQUIRE( ref == arr() ); + BOOST_REQUIRE( arr() == ref ); + + BOOST_REQUIRE( ref == arr ); BOOST_REQUIRE( arr == ref ); } BOOST_AUTO_TEST_CASE(static_array_cast_2) { multi::array arr({2, 5}); - std::iota(arr.elements().begin(), arr.elements().end(), 0.); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); auto&& ref = arr.static_array_cast(); + BOOST_REQUIRE( ref[1][1] == arr[1][1] ); BOOST_REQUIRE( std::equal(begin(ref[1]), end(ref[1]), begin(arr[1]), end(arr[1])) ); BOOST_REQUIRE( ref[1] == arr[1] ); + BOOST_REQUIRE( std::equal(begin(ref), end(ref), begin(arr), end(arr)) ); - BOOST_REQUIRE( ref == arr ); - BOOST_REQUIRE( arr == ref ); + + BOOST_REQUIRE( ref == arr ); + BOOST_REQUIRE( arr == ref ); } BOOST_AUTO_TEST_CASE(static_array_cast_3) { -{ - multi::static_array arr { { 0., 1., 2., 3., 4.} }; - multi::static_array arr2 = { -0., -1., -2., -3., -4.}; - auto&& neg_arr = multi::static_array_cast>>(arr); - BOOST_REQUIRE( neg_arr[2] == arr2[2] ); - BOOST_REQUIRE( arr2[2] == neg_arr[2] ); - BOOST_REQUIRE( std::equal(begin(neg_arr), end(neg_arr), begin(arr2), end(arr2)) ); - BOOST_REQUIRE( neg_arr == arr2 ); - BOOST_REQUIRE( arr2 == neg_arr ); -} -{ - multi::static_array arr({4, 5}, 0.); - std::iota(elements(arr).begin(), elements(arr).end(), 0.); + { + multi::static_array const arr = {+0.0, +1.0, +2.0, +3.0, +4.0}; + multi::static_array arr2 = {-0.0, -1.0, -2.0, -3.0, -4.0}; - multi::array arr2({4, 5}); - std::transform(begin(elements(arr)), end(elements(arr)), begin(elements(arr2)), std::negate<>{}); + auto&& neg_arr = multi::static_array_cast>>(arr); - auto&& neg_arr = arr.static_array_cast>(); + BOOST_REQUIRE( neg_arr[2] == arr2[2] ); + BOOST_REQUIRE( arr2[2] == neg_arr[2] ); + BOOST_REQUIRE( std::equal(begin(neg_arr), end(neg_arr), begin(arr2), end(arr2)) ); + BOOST_REQUIRE( neg_arr == arr2 ); + BOOST_REQUIRE( arr2 == neg_arr ); + } + { + multi::static_array arr({4, 5}, 0.0); + std::iota(elements(arr).begin(), elements(arr).end(), 0.0); - BOOST_REQUIRE( neg_arr[1][1] == arr2[1][1] ); - BOOST_REQUIRE( arr2[1][1] == neg_arr[1][1] ); + multi::array arr2({4, 5}); + std::transform(begin(elements(arr)), end(elements(arr)), begin(elements(arr2)), std::negate<>{}); - BOOST_REQUIRE( std::equal(begin(arr2[1]), end(arr2[1]), begin(neg_arr[1]), end(neg_arr[1])) ); + auto&& neg_arr = arr.static_array_cast>(); - BOOST_REQUIRE( arr2[1] == neg_arr[1] ); - BOOST_REQUIRE( neg_arr[1] == arr2[1] ); + BOOST_REQUIRE( neg_arr[1][1] == arr2[1][1] ); + BOOST_REQUIRE( arr2[1][1] == neg_arr[1][1] ); - BOOST_REQUIRE( std::equal(begin(arr2), end(arr2), begin(neg_arr), end(neg_arr)) ); - BOOST_REQUIRE( neg_arr == arr2 ); - BOOST_REQUIRE( arr2 == neg_arr ); -} + BOOST_REQUIRE( std::equal(begin(arr2[1]), end(arr2[1]), begin(neg_arr[1]), end(neg_arr[1])) ); + + BOOST_REQUIRE( arr2[1] == neg_arr[1] ); + BOOST_REQUIRE( neg_arr[1] == arr2[1] ); + + BOOST_REQUIRE( std::equal(begin(arr2), end(arr2), begin(neg_arr), end(neg_arr)) ); + BOOST_REQUIRE( neg_arr == arr2 ); + BOOST_REQUIRE( arr2 == neg_arr ); + } } diff --git a/external_codes/boost_multi/multi/test/std_vector_substitutability.cpp b/external_codes/boost_multi/multi/test/std_vector_substitutability.cpp new file mode 100644 index 0000000000..f14c88deff --- /dev/null +++ b/external_codes/boost_multi/multi/test/std_vector_substitutability.cpp @@ -0,0 +1,308 @@ +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif + +#include + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +// workaround for libc++ and boost test +// namespace boost::unit_test::ut_detail { +// auto normalize_test_case_name(const_string name) -> std::string { +// return ( name[0] == '&' ? std::string(name.begin()+1, name.size()-1) : std::string(name.begin(), name.size() )); // NOLINT(fuchsia-default-arguments-calls) +// } +// } // end namespace boost::unit_test::ut_detail + +namespace multi = boost::multi; + +template // e.g. std::vector or multi::array +void resize_copy_1(std::vector const& source, DynamicArray& darr) { + darr = DynamicArray(source); +} + +template // e.g. std::vector or multi::array +void resize_copy_2(std::vector const& source, DynamicArray& darr) { + darr = DynamicArray(source.begin(), source.end()); // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) +} + +template // e.g. std::vector or multi::array +void resize_copy_3(std::vector const& source, DynamicArray& darr) { + darr = std::decay_t(source.begin(), source.end()); // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) +} + +template // e.g. std::vector or multi::array +void resize_copy_4(It first, It last, DynamicArray& darr) { + darr = DynamicArray(first, last); // or std::decay_t(source.begin(), source.end()) // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) +} + +template // e.g. std::vector or multi::array +void resize_copy_5(It first, It last, DynamicArray& darr) { + darr.assign(first, last); // or std::decay_t(source.begin(), source.end()) +} + +// void resize_copy_6 ----> see below test_resize_copy_6 + +BOOST_AUTO_TEST_CASE(test_resize_copy_1) { + std::vector const source = {0, 1, 2, 3}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {99, 99}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {88, 88}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + resize_copy_1(source, dest_v); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 3 ); + + resize_copy_1(source, dest_a); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 3 ); +} + +BOOST_AUTO_TEST_CASE(test_resize_copy_2) { + std::vector const source = {0, 1, 2, 3}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {99, 99}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {88, 88}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + resize_copy_2(source, dest_v); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 3 ); + + resize_copy_2(source, dest_a); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 3 ); +} + +BOOST_AUTO_TEST_CASE(test_resize_copy_3) { + std::vector const source = {0, 10, 20, 30}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {990, 990}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {880, 880}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + resize_copy_3(source, dest_v); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); + + resize_copy_3(source, dest_a); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); +} + +BOOST_AUTO_TEST_CASE(test_resize_copy_4) { + std::vector const source = {0, 10, 20, 30}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {990, 990}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {880, 880}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + resize_copy_4(source.begin(), source.end(), dest_v); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); + + resize_copy_4(source.begin(), source.end(), dest_a); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); +} + +BOOST_AUTO_TEST_CASE(test_resize_copy_5) { + std::vector const source = {0, 10, 20, 30}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {990, 990}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {880, 880}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + resize_copy_5(source.begin(), source.end(), dest_v); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); + + resize_copy_5(source.begin(), source.end(), dest_a); + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); +} + +BOOST_AUTO_TEST_CASE(test_resize_copy_6) { + std::vector const source = {0, 10, 20, 30}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + + std::vector dest_v = {990, 990}; // testing std::vector vs multi:array NOLINT(fuchsia-default-arguments-calls,-warnings-as-errors) + multi::array dest_a = {880, 880}; + + BOOST_REQUIRE( dest_v.size() == 2 ); + BOOST_REQUIRE( dest_a.size() == 2 ); + + { // look same code as below + dest_v = decltype(dest_v)(source); + } + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); + + { // look same code as above + dest_a = decltype(dest_a)(source); + } + + BOOST_REQUIRE( dest_v.size() == 4 ); + BOOST_REQUIRE( dest_v[3] == 30 ); +} + +BOOST_AUTO_TEST_CASE(assign_equality) { + { + multi::array const AA = {10, 20, 30}; + std::vector const aa = {10, 20, 30}; // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + } + { + multi::array const AA = {10, 20, 30}; + std::vector const aa(AA.begin(), AA.end()); // NOLINT(fuchsia-default-arguments-calls) + + BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + } + { + multi::array const AA = {10, 20, 30}; + + auto const aa(AA().operator std::vector()); + + BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + } + // { + // multi::array const AA = {1.0, 2.0, 3.0}; + // std::vector const aa(AA); + + // BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + // } + { + std::vector const aa = {10, 20, 30}; // NOLINT(fuchsia-default-arguments-calls) + multi::array const AA(aa.begin(), aa.end()); + + BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + } + { + std::vector const aa = {10, 20, 30}; // NOLINT(fuchsia-default-arguments-calls) + multi::array const AA(aa); + + BOOST_REQUIRE( std::equal(AA.begin(), AA.end(), aa.begin() ) ); + } +} + +BOOST_AUTO_TEST_CASE(construct_from_vector_2D) { + { + multi::array const AA = { + {10, 20}, + {30, 40}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + + std::vector> const aa(AA.begin(), AA.end()); // NOLINT(fuchsia-default-arguments-calls) + } + { + multi::array const AA = { + {10, 20}, + {30, 40}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + + auto const aa(AA().operator std::vector>()); + } + { + multi::array const AA = { + {10, 20}, + {30, 40}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + + auto const aa = AA.operator std::vector>(); + } + { + multi::array const AA = { + {10, 20}, + {30, 40}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + + auto const aa = static_cast>>(AA); + } +#if !defined(__circle_build__) || (__circle_build__ > 200) // crashes circle 187-200 in docker + { + multi::array const AA = { + {1.0, 2.0}, + {3.0, 4.0}, + }; + + BOOST_REQUIRE( AA.num_elements() == 4 ); + + std::vector> const aa(AA); + + BOOST_REQUIRE( aa.size() == 2 ); + // std::vector> const aaa = AA; // doesn't compile, needs implicit conversion + } +#endif + { + multi::array const AA = { + {1.0, 2.0}, + {3.0, 4.0}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + } +#if !defined(__circle_build__) || (__circle_build__ > 200 ) // crashes circle 187-200 in docker + { + multi::array const AA = { + {1.0, 2.0}, + {3.0, 4.0}, + }; + BOOST_REQUIRE( AA.num_elements() == 4 ); + + std::vector> const aa(AA); + BOOST_REQUIRE( aa.size() == 2 ); + } +#endif +} diff --git a/external_codes/boost_multi/multi/test/subrange.cpp b/external_codes/boost_multi/multi/test/subrange.cpp index 31bbac633e..da42e71136 100644 --- a/external_codes/boost_multi/multi/test/subrange.cpp +++ b/external_codes/boost_multi/multi/test/subrange.cpp @@ -1,102 +1,137 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// © Alfredo A. Correa 2018-2021 +// Copyright 2018-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi subrange selection" -#define BOOST_TEST_DYN_LINK -#include +#include -#include "multi/array.hpp" +#include // for std::iota -#include // iota +// Suppress warnings from boost.test +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wold-style-cast" +# pragma clang diagnostic ignored "-Wundef" +# pragma clang diagnostic ignored "-Wconversion" +# pragma clang diagnostic ignored "-Wsign-conversion" +# pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wundef" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +#ifndef BOOST_TEST_MODULE +# define BOOST_TEST_MAIN +#endif + +#include namespace multi = boost::multi; BOOST_AUTO_TEST_CASE(multi_array_range_section) { -{ - multi::array arr({10, 20, 30, 40}, 99.); - std::iota(arr.elements().begin(), arr.elements().end(), 0.); - - { - static_assert( decltype( arr({0, 10}, {0, 20}, {0, 30}, {0, 40}) )::rank_v == 4 , "!"); - static_assert( decltype( arr( 5, {0, 20}, {0, 30}, {0, 40}) )::rank_v == 3 , "!"); - static_assert( decltype( arr({0, 10}, 10, {0, 30}, {0, 40}) )::rank_v == 3 , "!"); - static_assert( decltype( arr({0, 10}, {0, 20}, 15, {0, 40}) )::rank_v == 3 , "!"); - static_assert( decltype( arr({0, 10}, {0, 20}, {0, 30}, 20) )::rank_v == 3 , "!"); - - static_assert( decltype( arr( 5, 6, {0, 30}, {0, 40}) )::rank_v == 2 , "!"); - static_assert( decltype( arr({0, 10}, 6, 15, {0, 40}) )::rank_v == 2 , "!"); - static_assert( decltype( arr({0, 10}, {0, 20}, 15, 20) )::rank_v == 2 , "!"); - } { - auto&& all = arr({0, 10}, {0, 20}, {0, 30}, {0, 40}); - BOOST_REQUIRE( &arr[1][2][3][4] == &all[1][2][3][4] ); - BOOST_REQUIRE( &arr[1][2][3][4] == &arr({0, 10}, {0, 20}, {0, 30}, {0, 40})[1][2][3][4] ); - } - { - using multi::_; - auto&& all = arr( {0, 10} , {0, 20} ); - BOOST_REQUIRE( &arr[1][2][3][4] == &all[1][2][3][4] ); + #ifndef _MSC_VER + multi::array arr({10, 20, 30, 40}, 99.0); + #else + multi::array arr(multi::extensions_t<4>{10, 20, 30, 40}, 99.0); + #endif + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); + + { + static_assert(decltype(arr({0, 10}, {0, 20}, {0, 30}, {0, 40}))::rank::value == 4); + static_assert(decltype(arr(5, {0, 20}, {0, 30}, {0, 40}))::rank::value == 3); + static_assert(decltype(arr({0, 10}, 10, {0, 30}, {0, 40}))::rank::value == 3); + static_assert(decltype(arr({0, 10}, {0, 20}, 15, {0, 40}))::rank::value == 3); + static_assert(decltype(arr({0, 10}, {0, 20}, {0, 30}, 20))::rank::value == 3); + + static_assert(decltype(arr(5, 6, {0, 30}, {0, 40}))::rank::value == 2); + static_assert(decltype(arr({0, 10}, 6, 15, {0, 40}))::rank::value == 2); + static_assert(decltype(arr({0, 10}, {0, 20}, 15, 20))::rank::value == 2); + + static_assert(decltype(arr({0, 10}, {0, 20}, {0, 30}, {0, 40}))::rank_v == 4); + static_assert(decltype(arr(5, {0, 20}, {0, 30}, {0, 40}))::rank_v == 3); + static_assert(decltype(arr({0, 10}, 10, {0, 30}, {0, 40}))::rank_v == 3); + static_assert(decltype(arr({0, 10}, {0, 20}, 15, {0, 40}))::rank_v == 3); + static_assert(decltype(arr({0, 10}, {0, 20}, {0, 30}, 20))::rank_v == 3); + + static_assert(decltype(arr(5, 6, {0, 30}, {0, 40}))::rank_v == 2); + static_assert(decltype(arr({0, 10}, 6, 15, {0, 40}))::rank_v == 2); + static_assert(decltype(arr({0, 10}, {0, 20}, 15, 20))::rank_v == 2); + } + { + auto&& all = arr({0, 10}, {0, 20}, {0, 30}, {0, 40}); + BOOST_REQUIRE( &arr[1][2][3][4] == &all[1][2][3][4] ); + BOOST_REQUIRE( &arr[1][2][3][4] == &arr({0, 10}, {0, 20}, {0, 30}, {0, 40})[1][2][3][4] ); + } + { + using multi::_; + auto&& all = arr({0, 10}, {0, 20}); + BOOST_REQUIRE( &arr[1][2][3][4] == &all[1][2][3][4] ); + } + { + BOOST_REQUIRE( &arr(0, 0, 0, 0) == &arr[0][0][0][0] ); + } + { + auto&& sub = arr({0, 5}, {0, 10}, {0, 15}, {0, 20}); + BOOST_REQUIRE( &sub[1][2][3][4] == &arr[1][2][3][4] ); + } } { - BOOST_REQUIRE( &arr(0, 0, 0, 0) == &arr[0][0][0][0] ); - } - { - auto&& sub = arr({0, 5}, {0, 10}, {0, 15}, {0, 20}); - BOOST_REQUIRE( &sub[1][2][3][4] == &arr[1][2][3][4] ); - } -} -{ - multi::array arr = { - { 1., 2., 3., 4.}, - { 5., 6., 7., 8.}, - { 9., 0., 1., 2.}, - { 3., 4., 5., 6.} - }; - multi::array arr2 = { - {91., 92., 93., 94.}, - {95., 96., 97., 98.}, - {99., 90., 91., 92.}, - {93., 94., 95., 96.} - }; + multi::array arr = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 0.0, 1.0, 2.0}, + {3.0, 4.0, 5.0, 6.0}, + }; + multi::array arr2 = { + {91.0, 92.0, 93.0, 94.0}, + {95.0, 96.0, 97.0, 98.0}, + {99.0, 90.0, 91.0, 92.0}, + {93.0, 94.0, 95.0, 96.0}, + }; - arr({0, 2}, {0, 2}) = arr2({0, 2}, {0, 2}); - BOOST_REQUIRE( arr != arr2 ); - BOOST_REQUIRE( arr({0, 2}, {0, 2}) == arr2({0, 2}, {0, 2}) ); - BOOST_REQUIRE( arr[1][1] == 96. ); -} + arr({0, 2}, {0, 2}) = arr2({0, 2}, {0, 2}); + BOOST_REQUIRE( arr != arr2 ); + BOOST_REQUIRE( arr({0, 2}, {0, 2}) == arr2({0, 2}, {0, 2}) ); + BOOST_REQUIRE( arr[1][1] == 96. ); + } } BOOST_AUTO_TEST_CASE(subrange_assignment) { multi::array const arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 0., 1., 2.}, - {3., 4., 5., 6.} + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 0.0, 1.0, 2.0}, + {3.0, 4.0, 5.0, 6.0}, }; { multi::array arr2 = { - {9., 9., 9.}, - {9., 9., 9.}, - {9., 9., 9.} + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, }; arr2({0, 3}, {0, 3}) = arr({0, 3}, {0, 3}); BOOST_REQUIRE( arr2[1][2] == arr[1][2] ); } { multi::array arr2 = { - {9., 9., 9.}, - {9., 9., 9.}, - {9., 9., 9.} + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, }; arr2() = arr({0, 3}, {0, 3}); BOOST_REQUIRE( arr2[1][2] == arr[1][2] ); BOOST_REQUIRE( arr2() == arr({0, 3}, {0, 3}) ); } - { + { multi::array arr2 = { - {9., 9., 9.}, - {9., 9., 9.}, - {9., 9., 9.} + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, + {9.0, 9.0, 9.0}, }; arr2 = arr({0, 3}, {0, 3}); BOOST_REQUIRE( arr2[1][2] == arr[1][2] ); @@ -105,15 +140,15 @@ BOOST_AUTO_TEST_CASE(subrange_assignment) { } BOOST_AUTO_TEST_CASE(subrange_ranges_sliced_1D) { - multi::array arr = {1., 2., 3., 4.}; - auto&& Ab = arr.sliced(1, 3); + multi::array arr = {1.0, 2.0, 3.0, 4.0}; + auto&& Ab = arr.sliced(1, 3); BOOST_REQUIRE( &Ab[0] == &arr[1] ); auto&& Ab2 = Ab; BOOST_REQUIRE( &Ab2[0] == &arr[1] ); -// auto Abb = Ab; // not allowed -// auto Abb = std::move(Ab); (void)Abb; + // auto Abb = Ab; // not allowed! + // auto Abb = std::move(Ab); (void)Abb; auto const& Abc = arr.sliced(1, 3); BOOST_REQUIRE( &Abc[0] == &arr[1] ); @@ -124,10 +159,10 @@ BOOST_AUTO_TEST_CASE(subrange_ranges_sliced_1D) { BOOST_AUTO_TEST_CASE(subrange_ranges_sliced) { multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 0., 1., 2.}, - {3., 4., 5., 6.} + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 0.0, 1.0, 2.0}, + {3.0, 4.0, 5.0, 6.0}, }; auto&& Ab = arr.sliced(0, 3); BOOST_REQUIRE( &Ab[2][2] == &arr[2][2] ); @@ -135,16 +170,16 @@ BOOST_AUTO_TEST_CASE(subrange_ranges_sliced) { auto const& Abc = arr.sliced(0, 3); BOOST_REQUIRE( &Abc[2][2] == &arr[2][2] ); - auto AB = arr.sliced(0, 3); + auto AB = arr.sliced(0, 3); BOOST_REQUIRE( &AB[2][2] == &arr[2][2] ); } BOOST_AUTO_TEST_CASE(subrange_ranges) { multi::array arr = { - {1., 2., 3., 4.}, - {5., 6., 7., 8.}, - {9., 0., 1., 2.}, - {3., 4., 5., 6.} + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 0.0, 1.0, 2.0}, + {3.0, 4.0, 5.0, 6.0}, }; auto&& Ab = arr({0, 3}, {0, 3}); BOOST_REQUIRE( &Ab[2][2] == &arr[2][2] ); @@ -158,34 +193,74 @@ BOOST_AUTO_TEST_CASE(subrange_ranges) { BOOST_AUTO_TEST_CASE(subrange_1D_issue129) { multi::array arr({1024}, double{}); - std::iota(arr.elements().begin(), arr.elements().end(), 0.); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); - BOOST_REQUIRE( arr.sliced(0, 512, 2)[ 1] == 2. ); - BOOST_REQUIRE( arr.sliced(0, 512, 2)[255] == 510. ); + BOOST_REQUIRE( arr.sliced(0, 512, 2)[ 1] == 2.0 ); + BOOST_REQUIRE( arr.sliced(0, 512, 2)[255] == 510.0 ); - BOOST_REQUIRE( arr.sliced(0, 512)[ 1] == 1. ); - BOOST_REQUIRE( arr.sliced(0, 512)[511] == 511. ); + BOOST_REQUIRE( arr.sliced(0, 512)[ 1] == 1.0 ); + BOOST_REQUIRE( arr.sliced(0, 512)[511] == 511.0 ); - BOOST_REQUIRE( arr({0, 512})[ 1] == 1. ); - BOOST_REQUIRE( arr({0, 512})[511] == 511. ); + BOOST_REQUIRE( arr({0, 512})[ 1] == 1.0 ); + BOOST_REQUIRE( arr({0, 512})[511] == 511.0 ); -// BOOST_REQUIRE( arr({0, 512, 2})[ 1] == 2. ); // TODO(correaa) coompilation error -// BOOST_REQUIRE( arr({0, 512, 2})[255] == 510. ); // TODO(correaa) coompilation error + // BOOST_REQUIRE( arr({0, 512, 2})[ 1] == 2. ); // TODO(correaa) coompilation error + // BOOST_REQUIRE( arr({0, 512, 2})[255] == 510. ); // TODO(correaa) coompilation error } BOOST_AUTO_TEST_CASE(subrange_2D_issue129) { multi::array arr({66, 1024}, double{}); - std::iota(arr.elements().begin(), arr.elements().end(), 0.); + std::iota(arr.elements().begin(), arr.elements().end(), 0.0); + + BOOST_REQUIRE( arr[0].sliced(0, 512, 2)[ 1] == 2.0 ); + BOOST_REQUIRE( arr[0].sliced(0, 512, 2)[255] == 510.0 ); + + BOOST_REQUIRE( arr[0].sliced(0, 512)[ 1] == 1.0 ); + BOOST_REQUIRE( arr[0].sliced(0, 512)[511] == 511.0 ); + + BOOST_REQUIRE( arr(0, {0, 512})[ 1] == 1.0 ); + BOOST_REQUIRE( arr(0, {0, 512})[511] == 511.0 ); + + // BOOST_REQUIRE( arr(0, {0, 512, 2})[ 1] == 2. ); // TODO(correaa) coompilation error + // BOOST_REQUIRE( arr(0, {0, 512, 2})[255] == 510. ); // TODO(correaa) coompilation error +} + +class rng3_t { + int start_; + int finish_; + + public: + rng3_t(int start, int finish) : start_{start}, finish_{finish} {} // NOLINT(bugprone-easily-swappable-parameters) + auto first() const { return start_; } + auto last() const { return finish_; } +}; + +BOOST_AUTO_TEST_CASE(subrange_start_finish) { + multi::array arr = { + { 1.0, 2.0}, + { 3.0, 4.0}, + { 5.0, 6.0}, + { 7.0, 8.0}, + { 9.0, 10.0}, + {11.0, 12.0}, + {13.0, 14.0}, + }; + BOOST_REQUIRE( &arr({2, 5}, 1)[0] == &arr[2][1] ); + + multi::irange const rng(2, 5); + BOOST_REQUIRE( &arr(rng, 1)[0] == &arr[2][1] ); + + struct : multi::irange { + using multi::irange::irange; + } const rng2(2, 5); + + BOOST_REQUIRE( &arr(rng2, 1)[0] == &arr[2][1] ); - BOOST_REQUIRE( arr[0].sliced(0, 512, 2)[ 1] == 2. ); - BOOST_REQUIRE( arr[0].sliced(0, 512, 2)[255] == 510. ); + rng3_t const rng3{2, 5}; - BOOST_REQUIRE( arr[0].sliced(0, 512)[ 1] == 1. ); - BOOST_REQUIRE( arr[0].sliced(0, 512)[511] == 511. ); + multi::irange const rng4(rng3); - BOOST_REQUIRE( arr(0, {0, 512})[ 1] == 1. ); - BOOST_REQUIRE( arr(0, {0, 512})[511] == 511. ); + BOOST_REQUIRE( &arr(rng4, 1)[0] == &arr[2][1] ); -// BOOST_REQUIRE( arr(0, {0, 512, 2})[ 1] == 2. ); // TODO(correaa) coompilation error -// BOOST_REQUIRE( arr(0, {0, 512, 2})[255] == 510. ); // TODO(correaa) coompilation error + BOOST_REQUIRE( &arr(rng3, 1)[0] == &arr[2][1] ); } diff --git a/external_codes/boost_multi/multi/test/transform.cpp b/external_codes/boost_multi/multi/test/transform.cpp index e99a74412d..44ef532fce 100644 --- a/external_codes/boost_multi/multi/test/transform.cpp +++ b/external_codes/boost_multi/multi/test/transform.cpp @@ -1,19 +1,48 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- -// Copyright 2019-2022 Alfredo A. Correa +// Copyright 2019-2024 Alfredo A. Correa +// Copyright 2024 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +#include + +#include +#include + +// Suppress warnings from boost.test +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wsign-conversion" +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma GCC diagnostic ignored "-Wundef" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif -#define BOOST_TEST_MODULE "C++ Unit Tests for Multi transformed array" -#include +#ifndef BOOST_TEST_MODULE +#define BOOST_TEST_MAIN +#endif -#include "multi/array.hpp" +#include -#include +#define BOOST_MULTI_DECLRETURN(ExpR) \ + ->decltype(ExpR) { return ExpR; } // NOLINT(cppcoreguidelines-macro-usage) saves a lot of typing namespace test { - constexpr struct neg_t { - template - constexpr auto operator()(T const& value) const -> decltype(-value) {return -value;} - } neg; -} // end namespace test + +struct neg_t { + template + constexpr auto operator()(T const& value) const -> decltype(-value) { return -value; } +}; +constexpr inline neg_t neg; + +} // end namespace test namespace test { @@ -21,30 +50,31 @@ template class involuter; template class involuted { - Ref r_; - template>{}, int> =0> - friend auto underlying(Involuted&& self) ->decltype(self.r_) {return self.r_;} + Ref r_; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members) + friend auto underlying(involuted& self) -> decltype(auto) { return self.r_; } + friend auto underlying(involuted&& self) -> decltype(auto) { return std::move(self).r_; } + friend auto underlying(involuted const& self) -> decltype(auto) { return self.r_; } public: using decay_type = std::decay_t()(std::declval()))>; - constexpr involuted(Involution /*stateless*/, Ref ref) : r_{std::forward(ref)} {} + constexpr involuted(Involution /*stateless*/, Ref ref) : r_{ref} {} auto operator=(decay_type const& other) -> involuted& { // NOLINT(fuchsia-trailing-return) simulate reference r_ = Involution{}(other); return *this; } - constexpr explicit operator decay_type() const {return Involution{}(r_);} + constexpr explicit operator decay_type() const { return Involution{}(r_); } // NOLINTNEXTLINE(google-runtime-operator): simulated reference - constexpr auto operator&()&& {return involuter())>{Involution{}, &r_};} // NOLINT(runtime/operator) + // constexpr auto operator&() && { return involuter())>{Involution{}, &r_}; } // NOLINT(runtime/operator) // NOLINTNEXTLINE(google-runtime-operator): simulated reference - constexpr auto operator&() & {return involuter())>{Involution{}, &r_};} // NOLINT(runtime/operator) + // constexpr auto operator&() & { return involuter())>{Involution{}, &r_}; } // NOLINT(runtime/operator) // NOLINTNEXTLINE(google-runtime-operator): simulated reference - constexpr auto operator&() const& {return involuter())>{Involution{}, &r_};} // NOLINT(runtime/operator) + // constexpr auto operator&() const& { return involuter())>{Involution{}, &r_}; } // NOLINT(runtime/operator) - auto operator==(involuted const& other) const {return r_ == other.r_;} - auto operator!=(involuted const& other) const {return r_ == other.r_;} + auto operator==(involuted const& other) const { return r_ == other.r_; } + auto operator!=(involuted const& other) const { return r_ == other.r_; } - auto operator==(decay_type const& other) const {return Involution{}(r_) == other;} - auto operator!=(decay_type const& other) const {return Involution{}(r_) != other;} + auto operator==(decay_type const& other) const { return Involution{}(r_) == other; } + auto operator!=(decay_type const& other) const { return Involution{}(r_) != other; } }; template @@ -53,9 +83,9 @@ class involuter { template friend class involuter; public: - using pointer = involuter::pointer>; - using element_type = typename std::pointer_traits::element_type; - using difference_type = typename std::pointer_traits::difference_type; + using pointer = involuter::pointer>; + using element_type = typename std::pointer_traits::element_type; + using difference_type = typename std::pointer_traits::difference_type; template using rebind = involuter::template rebind>; using reference = involuted::reference>; @@ -63,65 +93,81 @@ class involuter { using iterator_category = typename std::iterator_traits::iterator_category; constexpr explicit involuter(It it) : it_{std::move(it)} {} - constexpr involuter(Involution /*stateless*/, It it) : it_{std::move(it)} {}// f_{std::move(f)}{} + constexpr involuter(Involution /*stateless*/, It it) : it_{std::move(it)} {} // f_{std::move(f)}{} template explicit involuter(involuter const& other) : it_{other.it_} {} - constexpr auto operator*() const {return reference{Involution{}, *it_};} - constexpr auto operator->() const {return pointer{&*it_};} + constexpr auto operator*() const { return reference{Involution{}, *it_}; } + constexpr auto operator->() const { return pointer{&*it_}; } - constexpr auto operator==(involuter const& other) const {return it_ == other.it_;} - constexpr auto operator!=(involuter const& other) const {return it_ != other.it_;} + constexpr auto operator==(involuter const& other) const { return it_ == other.it_; } + constexpr auto operator!=(involuter const& other) const { return it_ != other.it_; } - constexpr auto operator+=(difference_type n) -> involuter& {it_ += n; return *this;} - constexpr auto operator-=(difference_type n) -> involuter& {it_ -= n; return *this;} + constexpr auto operator+=(difference_type n) -> involuter& { + it_ += n; + return *this; + } + constexpr auto operator-=(difference_type n) -> involuter& { + it_ -= n; + return *this; + } - constexpr auto operator+(difference_type n) const {return involuter{it_ + n};} - constexpr auto operator-(difference_type n) const {return involuter{it_ - n};} + constexpr auto operator+(difference_type n) const { return involuter{it_ + n}; } + constexpr auto operator-(difference_type n) const { return involuter{it_ - n}; } }; template using negated = involuted, Ref>; -template using negater = involuter, It >; +template using negater = involuter, It>; class basic_conjugate_t { - template struct prio : std::conditional_t, std::true_type>{}; - template static auto _(prio<0>/**/, T const& value) DECLRETURN(std::conj(value)) - template static auto _(prio<1>/**/, T const& value) DECLRETURN( conj(value)) - template static auto _(prio<2>/**/, T const& value) DECLRETURN( T::conj(value)) - template static auto _(prio<3>/**/, T const& value) DECLRETURN( value.conj( )) + // clang-format off + template struct prio : std::conditional_t, std::true_type> {}; + + template static auto _(prio<0> /**/, T const& value) BOOST_MULTI_DECLRETURN( std::conj(value)) + template static auto _(prio<1> /**/, T const& value) BOOST_MULTI_DECLRETURN( conj(value)) + template static auto _(prio<2> /**/, T const& value) BOOST_MULTI_DECLRETURN( T::conj(value)) + template static auto _(prio<3> /**/, T const& value) BOOST_MULTI_DECLRETURN(value.conj() ) public: - template static auto _(T const& value) DECLRETURN(_(prio<3>{}, value)) + template + static auto _(T const& value) BOOST_MULTI_DECLRETURN(_(prio<3>{}, value)) + // clang-format on }; template struct conjugate : private basic_conjugate_t { - constexpr auto operator()(T const& arg) const DECLRETURN(_(arg)) + constexpr auto operator()(T const& arg) const BOOST_MULTI_DECLRETURN(_(arg)) }; template<> struct conjugate<> : private basic_conjugate_t { template - constexpr auto operator()(T const& arg) const DECLRETURN(_(arg)) + constexpr auto operator()(T const& arg) const BOOST_MULTI_DECLRETURN(_(arg)) }; #if defined(__NVCC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsubobject-linkage" #endif -template struct conjd : test::involuted, ComplexRef>{ +template struct conjd : test::involuted, ComplexRef> { explicit conjd(ComplexRef ref) : test::involuted, ComplexRef>(conjugate<>{}, ref) {} - auto real() const {return underlying(*this).real();} - auto imag() const {return negated, ComplexRef> const&>()).imag())>{std::negate<>{}, underlying(*this).imag()};} - friend auto real(conjd const& self) -> decltype(auto) {using std::real; return real(static_cast(self));} - friend auto imag(conjd const& self) -> decltype(auto) {using std::imag; return imag(static_cast(self));} + auto real() const { return underlying(*this).real(); } + auto imag() const { return negated, ComplexRef> const&>()).imag())>{std::negate<>{}, underlying(*this).imag()}; } + friend auto real(conjd const& self) -> decltype(auto) { + using std::real; + return real(static_cast(self)); + } + friend auto imag(conjd const& self) -> decltype(auto) { + using std::imag; + return imag(static_cast(self)); + } }; #if defined(__NVCC__) #pragma GCC diagnostic pop #endif #if defined(__cpp_deduction_guides) -template conjd(T&&)->conjd; +template conjd(T&&) -> conjd; #endif template using conjr = test::involuter, Complex>; @@ -132,14 +178,14 @@ class indirect_real { public: explicit indirect_real(P const& ptr) : impl_{ptr} {} - auto operator+(std::ptrdiff_t n) const {return indirect_real{impl_ + n};} + auto operator+(std::ptrdiff_t n) const { return indirect_real{impl_ + n}; } // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast): extra real part as reference - auto operator*() const -> decltype(auto) {return reinterpret_cast&>(*impl_)[0];} + auto operator*() const -> decltype(auto) { return reinterpret_cast&>(*impl_)[0]; } - using difference_type = std::ptrdiff_t; - using value_type = typename std::iterator_traits