diff --git a/CMake/DetermineDeviceArchitectures.cmake b/CMake/DetermineDeviceArchitectures.cmake index 5c5c573b47b..1b95d8604c3 100644 --- a/CMake/DetermineDeviceArchitectures.cmake +++ b/CMake/DetermineDeviceArchitectures.cmake @@ -63,6 +63,7 @@ function(verifyNVIDIAGPUconsistency) endfunction() # auto detect QMC_GPU_ARCHS if not set by user and GPU features are enabled. +# CMAKE_CUDA/HIP_ARCHITECTURES are used as hints if(NOT QMC_GPU_ARCHS AND ENABLE_CUDA) if(QMC_CUDA2HIP) detectAMDGPU() @@ -93,3 +94,13 @@ endif() set(QMC_GPU_ARCHS ${QMC_GPU_ARCHS} CACHE STRING "Accelerator device architectures" FORCE) + +# QMC_GPU_ARCHS is the single source of truth and thus overwrite CMAKE_CUDA/HIP_ARCHITECTURES +if(ENABLE_CUDA) + if(QMC_CUDA2HIP) + set(CMAKE_HIP_ARCHITECTURES ${QMC_GPU_ARCHS} CACHE STRING "HIP architectures" FORCE) + else() + string(REPLACE "sm_" "" CUDA_ARCH_NUMBERS "${QMC_GPU_ARCHS}") + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_NUMBERS} CACHE STRING "CUDA architectures" FORCE) + endif() +endif() diff --git a/CMake/FindMKL.cmake b/CMake/FindMKL.cmake index 76e5b666240..a52b0b683c5 100644 --- a/CMake/FindMKL.cmake +++ b/CMake/FindMKL.cmake @@ -85,7 +85,7 @@ endif(HAVE_MKL) if(HAVE_MKL AND ENABLE_SYCL) find_library(MKL_SYCL mkl_sycl HINTS ${MKL_ROOT} $ENV{MKLROOT} $ENV{MKL_ROOT} $ENV{MKL_HOME} - PATH_SUFFIXES lib/intel64 + PATH_SUFFIXES lib/intel64 lib REQUIRED ) diff --git a/CMakeLists.txt b/CMakeLists.txt index 271631d012e..494383495e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -368,32 +368,34 @@ if(alignment_remainder) endif() message(STATUS "QMC_SIMD_ALIGNMENT is set to ${QMC_SIMD_ALIGNMENT}") -#--------------------------------------------------------- -# Determine if OpenMP taskloop works with the CXX compiler -#--------------------------------------------------------- -include(TestOpenMPtaskloop) -option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY}) -message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}") - -#--------------------------------------------------------- -# Set up OpenMP offload compile options -#--------------------------------------------------------- -set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF) -if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS) - message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") - if(${COMPILER} MATCHES "Clang" - AND OPENMP_OFFLOAD_COMPILE_OPTIONS MATCHES "gfx" - AND QMC_CUDA2HIP) - # As of 11/2021, QMC_OFFLOAD_MEM_ASSOCIATED=ON is needed for AMD and mainline LLVM compilers - # when using OpenMP offload to AMD GPU together with HIP. - set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON) +if(QMC_OMP) + #--------------------------------------------------------- + # Determine if OpenMP taskloop works with the CXX compiler + #--------------------------------------------------------- + include(TestOpenMPtaskloop) + option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY}) + message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}") + + #--------------------------------------------------------- + # Set up OpenMP offload compile options + #--------------------------------------------------------- + set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF) + if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS) + message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") + if(${COMPILER} MATCHES "Clang" + AND OPENMP_OFFLOAD_COMPILE_OPTIONS MATCHES "gfx" + AND QMC_CUDA2HIP) + # As of 11/2021, QMC_OFFLOAD_MEM_ASSOCIATED=ON is needed for AMD and mainline LLVM compilers + # when using OpenMP offload to AMD GPU together with HIP. + set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON) + endif() endif() + # Some OpenMP offload runtime libraries have composibility issue with a vendor native runtime. + # A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them. + cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime" + ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF) endif() -# Some OpenMP offload runtime libraries have composibility issue with a vendor native runtime. -# A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them. -cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime" - ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF) #------------------------------------------------------------------------------------- # consider making this always on if OpenMP is no longer UB with Thread Support Library diff --git a/docs/requirements.txt b/docs/requirements.txt index 1c9cc6c5860..68fd750be0e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -69,7 +69,7 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -urllib3==2.0.6 +urllib3==2.0.7 # via requests # The following packages are considered to be unsafe in a requirements file: diff --git a/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml b/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml index 4f77c2d2707..ee7e33b98c4 100644 --- a/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml +++ b/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml @@ -1,7 +1,7 @@ # -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;-*- # Copyright 2020-2023 Alfredo A. Correa -image: correaadock/gnudev:v2 +image: debian:testing variables: GIT_SUBMODULE_STRATEGY: recursive @@ -9,7 +9,7 @@ variables: openmpi: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build @@ -18,12 +18,43 @@ openmpi: - cmake --build . --parallel 2 || make VERBOSE=1 - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure +exampi: + allow_failure: true + stage: build + script: + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake g++ git libboost-serialization-dev make python3 ssh strace # ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev + - git clone https://correaa:${EXATOKEN}@github.com/tonyskjellum/ExaMPI.git + - cd ExaMPI + - mkdir build && cd build + - cmake .. --install-prefix=$HOME/exa + - make -j 4 + - make install + - export PATH=$HOME/exa/bin:$PATH + - export PATH=$HOME/exa/runtime:$PATH + - export LD_LIBRARY_PATH=$HOME/exa/lib:$LD_LIBRARY_PATH + - export MPI_PATH=$HOME/exa/bin + - export MPI_LIB=$HOME/exa/lib + - export MPI_INC=$HOME/exa/include + - export MPI_HOME=$HOME/exa + - which mpicxx + - which mpirun + - strace mpirun -n 4 tests/integration_tests/allreduce + - strace mpirun -n 4 tests/integration_tests/alltoall + - ctest --output-on-failure + - cd ../.. + - mkdir build && cd build + - which mpicxx + - mpicxx --version + - cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_EXAMPI=1 -DMPI_HOME=$HOME/exa + - make -j 2 || make VERBOSE=1 + - ls + - ctest --output-on-failure + icpc-intelmpi: stage: build image: intel/oneapi-hpckit:latest - allow_failure: true script: - - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev + - apt-get update && apt-get install --no-install-recommends -y --quiet bash ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build @@ -50,8 +81,9 @@ icpx-intelmpi: openmpi-clang: stage: build + image: debian:testing script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3 - cd test - mkdir build && cd build @@ -63,10 +95,11 @@ openmpi-clang: - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure needs: ["openmpi"] -openmpi-clang20: +"openmpi-clang20": stage: build + image: debian:stable script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev - cd test - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" @@ -80,7 +113,7 @@ openmpi-clang20: openmpi-clang-tidy: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" - clang++ --version @@ -93,7 +126,7 @@ openmpi-clang-tidy: openmpi-cppcheck: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev + - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev - mkdir build && cd build - export MPI_OVERSUBSCRIBE="--oversubscribe" - g++ --version @@ -106,10 +139,12 @@ openmpi-cppcheck: mpich-debug: stage: build script: - - apt update -qq && apt install -qq -y --no-install-recommends libmpich-dev mpich + - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev - cd test - mkdir build && cd build - export MPI_OVERSUBSCRIBE="" + - export OMPI_ALLOW_RUN_AS_ROOT=1 + - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - cmake .. -DCMAKE_BUILD_TYPE=Debug - cmake --build . --parallel 2 || cmake --build . --verbose - ctest --output-on-failure @@ -118,7 +153,7 @@ mpich-valgrind: stage: build allow_failure: true script: - - apt update -qq && apt-get install -qq -y --no-install-recommends libmpich-dev mpich + - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake git libboost-test-dev libboost-serialization-dev libmpich-dev make mpich valgrind - mpirun --version - mkdir build && cd build - export MPI_OVERSUBSCRIBE="" @@ -132,6 +167,7 @@ mpich-valgrind: qmcpack-openmpi: stage: test + image: debian:testing script: - apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev - git clone https://github.com/QMCPACK/qmcpack.git @@ -146,7 +182,7 @@ qmcpack-openmpi: needs: ["openmpi"] qmcpack-cuda-runner: - allow_failure: true + allow_failure: false image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04 tags: - nvidia-docker @@ -161,25 +197,26 @@ qmcpack-cuda-runner: - git subtree add --squash -P external_codes/mpi3 $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git - cd ../qmcpack - cd build - - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. + - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DQMC_GPU_ARCHS=sm_61 -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" .. - make -j4 afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure needs: ["openmpi-cuda-11", "qmcpack-openmpi"] inq-openmpi: stage: test + image: debian:testing tags: - cpu script: - - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config make git ca-certificates wget + - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libboost-iostreams-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config python3-dev make git ca-certificates wget - cmake --version - git clone https://gitlab.com/npneq/inq.git --recurse-submodules - cd inq - cd external_libs/mpi3 - - git checkout $CI_COMMIT_BRANCH + - git checkout $CI_COMMIT_BRANCH # check that multi repo is mirrored correctly from this repo to the submodule repo (npneq) - cd ../.. - mkdir build && cd build - - ../configure --prefix=$HOME --disable-debug + - cmake .. --install-prefix=$HOME -DCMAKE_BUILD_TYPE=Release - make --jobs=2 || make VERBOSE=1 - make install - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure @@ -222,7 +259,7 @@ inq-cuda-11-openmpi-compileonly: tags: - nvidia-docker script: - - apt-get update && apt-get install --no-install-recommends -y --quiet cmake libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ pkg-config make git ca-certificates wget + - apt-get update && apt-get install --no-install-recommends -y --quiet cmake git libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev ca-certificatesgfortran g++ make pkg-config python3-dev wget - cmake --version - git clone https://gitlab.com/npneq/inq.git --recurse-submodules - cd inq @@ -231,7 +268,7 @@ inq-cuda-11-openmpi-compileonly: - cd ../.. - mkdir build && cd build - /usr/local/cuda-11/bin/nvcc -V - - CUDA_ARCH_OVERRIDE=1 ../configure --prefix=$HOME --enable-cuda --with-cuda-prefix=/usr/local/cuda --pass-thru -DCMAKE_CUDA_COMPILER=/usr/local/cuda-11/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=61 + - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. --install-prefix=$HOME -DENABLE_CUDA=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=61 - make silicon --jobs=2 - make install - ctest -R silicon diff --git a/external_codes/mpi_wrapper/mpi3/CMakeLists.txt b/external_codes/mpi_wrapper/mpi3/CMakeLists.txt index 74f0a7cbd57..f72eaee20b0 100644 --- a/external_codes/mpi_wrapper/mpi3/CMakeLists.txt +++ b/external_codes/mpi_wrapper/mpi3/CMakeLists.txt @@ -2,27 +2,37 @@ cmake_minimum_required(VERSION 3.16) project( - mpi3 + bmpi3 VERSION 0.79.0 DESCRIPTION "B-MPI3 is a C++ library wrapper for version 3.1 of the MPI standard interface that simplifies the utilization and maintenance of MPI code." HOMEPAGE_URL "https://gitlab.com/correaa/boost-mpi3" LANGUAGES CXX ) -include(GNUInstallDirs) +find_package(MPI REQUIRED) # might need to `module load mpi` add_library(${PROJECT_NAME} INTERFACE) +target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17) target_include_directories(${PROJECT_NAME} INTERFACE $ $ $) +target_link_libraries(${PROJECT_NAME} INTERFACE MPI::MPI_CXX) -target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17) -#set_target_properties(${PROJECT_NAME} PROPERTIES CXX_EXTENSIONS OFF) +# to use this project directly from CMake +# FetchContent_Declare( +# bmpi3 +# GIT_REPOSITORY git@gitlab.com:correaa/boost-mpi3.git # https://gitlab.com/correaa/boost-mpi3.git +# GIT_TAG master) +# FetchContent_MakeAvailable(bmpi3) +# add_executable(main main.cpp) +# target_link_libraries(main PUBLIC bmpi3) # this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/ if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) - return() + return() endif() +include(GNUInstallDirs) + include(CTest) enable_testing() diff --git a/external_codes/mpi_wrapper/mpi3/README.md b/external_codes/mpi_wrapper/mpi3/README.md index e949010eaaa..d3a602d8452 100644 --- a/external_codes/mpi_wrapper/mpi3/README.md +++ b/external_codes/mpi_wrapper/mpi3/README.md @@ -81,27 +81,32 @@ It turns out that this interface was a very minimal change over the C version, a The B.MPI3 library was designed to use simultaneously (interleaved) with the standard C interface of MPI. In this way, changes to existing code can be made incrementally. -## Installation +## Usage -The library is "header-only"; no separate compilation is necessary. -In order to compile it requires an MPI distribution (e.g. OpenMPI or MPICH2) and the corresponding compiler-wrapper (`mpic++` or `mpicxx`). -This library requires C++14 and the Boost library installed. +The library is "header-only"; no separate compilation or configuration of the library is necessary. +It requires an MPI distribution (e.g. OpenMPI or MPICH2), a C++14 compiler and Boost libraries installed. A typical compilation/run command looks like this: ```bash -$ mpic++ -std=c++14 -O3 mpi3/test/communicator_send.cpp -o communicator_send.x -lboost_serialization +$ mpic++ communicator_send.cpp -o communicator_send.x -lboost_serialization $ mpirun -n 8 ./communicator_send.x ``` -In a system such as Red Hat, the dependencies can by installed by +In a system such as Red Hat or Fedora, the dependencies can by installed by `sudo dnf install gcc-c++ boost-devel openmpi-devel mpich-devel`. -```bash -dnf install gcc-c++ boost-devel openmpi-devel mpich-devel +Alternatively, the library can be fetched on demand by the CMake project: + +```cmake +include(FetchContent) +FetchContent_Declare(bmpi3 GIT_REPOSITORY https://gitlab.com/correaa/boost-mpi3.git) # or git@gitlab.com:correaa/boost-mpi3.git +FetchContent_MakeAvailable(bmpi3) + +target_link_libraries(your_executable PRIVATE bmpi3) ``` -Some systems require loading the MPI module before compiling and using MPI programs, `module load mpi/mpich`. +Some systems require loading the MPI module before compiling and using MPI programs, `module load mpi` (or `mpich`). -The library is tested frequently against `openmpi` and `mpich`, and less frequently with `mvapich2`. +The library is tested frequently against `openmpi` and `mpich` implementations of MPI. ## Testing @@ -119,7 +124,7 @@ ctest ## Initialization Like MPI, B.MPI3 requires some global library initialization. -The library includes a convenience `mpi3/main.hpp` which wraps around this initialization steps and *simulates* a main function. +The library includes a convenience header `mpi3/main.hpp`, which provides a "main" function that does this initialization. In this way, a parallel program looks very much like normal programs, except that the main function has a third argument with the default global communicator passed in. ```cpp @@ -129,10 +134,9 @@ In this way, a parallel program looks very much like normal programs, except tha #include namespace mpi3 = boost::mpi3; -using std::cout; -int mpi3::main(int argc, char* argv[], mpi3::communicator world){ - if(world.rank() == 0) cout << mpi3::version() << '\n'; +int mpi3::main(int argc, char** argv, mpi3::communicator world) { + if(world.rank() == 0) {std::cout << mpi3::version() << '\n';} return 0; } ``` diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp index 9fd4a522083..d96fd09f37c 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/allocator.hpp @@ -1,4 +1,4 @@ -/* -*- indent-tabs-mode: t -*- */ +// Copyright 2018-2023 Alfredo A. Correa #ifndef BOOST_MPI3_ALLOCATOR_HPP #define BOOST_MPI3_ALLOCATOR_HPP @@ -17,13 +17,21 @@ struct /*__attribute__((aligned(0)))*/ bad_alloc : std::bad_alloc{using std::bad inline void* malloc(mpi3::size_t size) { void* ret; // NOLINT(cppcoreguidelines-init-variables) delayed init +#if not defined(EXAMPI) int const s = MPI_Alloc_mem(size, MPI_INFO_NULL, &ret); if(s != MPI_SUCCESS) {return nullptr;} //s throw bad_alloc();//"cannot allocate " + std::to_string(size) + " bytes"); +#else + ret = std::malloc(size); +#endif return ret; } inline void free(void* ptr){ +#if not defined(EXAMPI) MPI_(Free_mem)(ptr); +#else + std::free(ptr); +#endif } template @@ -87,42 +95,42 @@ constexpr std::add_const_t& as_const(T& t) noexcept{return t;} //int mpi3::main(int argc, char* argv[], mpi3::communicator world){ -// std::vector> v(1000000); -// std::vector> uv(1000000); -// std::iota(v.begin(), v.end(), 0.); -// using boost::mpi3::data; -// assert( data(uv.begin()) == &*uv.begin() ); -// assert( std::accumulate(v.begin(), v.end(), 0.) == (v.size()*(v.size() - 1))/2 ); -// return 0; -// -// { -// boost::container::flat_set, mpi3::allocator > fs; -// fs.insert(5.); -// fs.insert(3.); -// auto it = fs.begin(); -// assert(*it == 3.); -// ++it; -// assert(*it == 5.); -// } -// { -// boost::container::flat_set, std::allocator_traits>::rebind_alloc> fs; -// fs.insert(5); -// fs.insert(3); -// auto it = fs.begin(); -// assert(*it == 3); -// ++it; -// assert(*it == 5); -// } -// { -// boost::container::flat_set, std::less>, mpi3::allocator>> fsp; -// fsp.insert({1.,2.}); -// fsp.insert({3.,4.}); -// auto it = fsp.begin(); -// assert(*it == std::make_pair(1.,2.)); -// ++it; -// assert(*it == std::make_pair(3.,4.)); -// } -// return 0; +// std::vector> v(1000000); +// std::vector> uv(1000000); +// std::iota(v.begin(), v.end(), 0.); +// using boost::mpi3::data; +// assert( data(uv.begin()) == &*uv.begin() ); +// assert( std::accumulate(v.begin(), v.end(), 0.) == (v.size()*(v.size() - 1))/2 ); +// return 0; +// +// { +// boost::container::flat_set, mpi3::allocator > fs; +// fs.insert(5.); +// fs.insert(3.); +// auto it = fs.begin(); +// assert(*it == 3.); +// ++it; +// assert(*it == 5.); +// } +// { +// boost::container::flat_set, std::allocator_traits>::rebind_alloc> fs; +// fs.insert(5); +// fs.insert(3); +// auto it = fs.begin(); +// assert(*it == 3); +// ++it; +// assert(*it == 5); +// } +// { +// boost::container::flat_set, std::less>, mpi3::allocator>> fsp; +// fsp.insert({1.,2.}); +// fsp.insert({3.,4.}); +// auto it = fsp.begin(); +// assert(*it == std::make_pair(1.,2.)); +// ++it; +// assert(*it == std::make_pair(3.,4.)); +// } +// return 0; //} //#endif diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp index 8ace96f7843..f993cc2aa56 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/cartesian_communicator.hpp @@ -1,4 +1,3 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa #ifndef BOOST_MPI3_CARTESIAN_COMMUNICATOR_HPP @@ -32,7 +31,7 @@ struct cartesian_communicator : communicator { assert(s.size() == p.size()); using dimensionality_type = int; MPI_(Cart_create)(comm_old.get(), static_cast(s.size()), s.data(), p.data(), /*reorder*/ true, &impl_); - // assert(impl_ != MPI_COMM_NULL); // null communicator is a valid outcome + // assert(impl_ != MPI_COMM_NULL); // null communicator is a valid outcome // TODO(correaa) try with mpich, WAS: there is an bug in mpich, in which if the remaining dim are none then the communicator is not well defined. } @@ -46,11 +45,15 @@ struct cartesian_communicator : communicator { cartesian_communicator(communicator& comm_old, std::initializer_list shape, std::initializer_list period) : cartesian_communicator(comm_old, std::vector(shape), std::vector(period)) {} - [[deprecated("use dimensionality() instead of dimension")]] int dimension() const { - int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init - MPI_Cartdim_get(impl_, &ret); + +#if not defined(EXAMPI) + [[deprecated("use dimensionality() instead of dimension")]] + int dimension() const { + int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init // TODO(correaa) + MPI_(Cartdim_get)(impl_, &ret); return ret; } +#endif cartesian_communicator& operator=(cartesian_communicator const&) = delete; cartesian_communicator& operator=(cartesian_communicator&&) = default; @@ -59,12 +62,14 @@ struct cartesian_communicator : communicator { if(this == std::addressof(other)) { return *this; } // lints cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator - communicator::operator=(other); + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // communicator::operator=(other); return *this; } ~cartesian_communicator() = default; +#if not defined(EXAMPI) int dimensionality() const { int ret; // NOLINT(cppcoreguidelines-init-variables) delayed init MPI_(Cartdim_get)(impl_, &ret); @@ -100,10 +105,13 @@ struct cartesian_communicator : communicator { } std::vector shape() const { return topology().dimensions(); } + std::vector periods() const { auto ps = topology().periods(); return {ps.begin(), ps.end()}; } +#endif + auto num_elements() const { return size(); } template @@ -111,8 +119,10 @@ struct cartesian_communicator : communicator { int rank = -1; MPI_(Cart_rank)(impl_, coord.data(), &rank); return (*this)[rank]; - // return operator[](rank); + // return operator[](rank); } + +#if not defined(EXAMPI) // int MPI_Cart_map not implemented cartesian_communicator sub_aux(std::vector const& remain_dims) { assert(static_cast(remain_dims.size()) == dimensionality()); @@ -131,6 +141,7 @@ struct cartesian_communicator : communicator { remain[0] = 0 /*false*/; return sub_aux(remain); } +#endif }; enum fill_t { @@ -150,24 +161,45 @@ struct cartesian_communicator : cartesian_communicator<> { ~cartesian_communicator() = default; +// #if not defined(EXAMPI) static std::array division(int nnodes, std::array suggest = {}) { MPI_(Dims_create)(nnodes, D, suggest.data()); return suggest; } +// #endif + constexpr static dimensionality_type dimensionality = D; - explicit cartesian_communicator( + cartesian_communicator( communicator& other, - std::array dims = {}, - std::array periods = std::apply([](auto... e) { return std::array{(static_cast(e), true)...}; }, std::array{}) - ) try : cartesian_communicator - <>{other, division(other.size(), dims), std::apply([](auto... e) { return std::array{e...}; }, periods)} {} + std::array dims, + std::array periods + ) + try + : cartesian_communicator<>{ + other, + division(other.size(), dims), + std::apply([](auto... e) { return std::array{e...}; }, periods) + } {} catch(std::runtime_error& e) { std::ostringstream ss; std::copy(dims.begin(), dims.end(), std::ostream_iterator{ss, " "}); throw std::runtime_error{"cannot create cartesian communicator with constrains " + ss.str() + " from communicator of size " + std::to_string(other.size()) + " because " + e.what()}; } + cartesian_communicator( + communicator& other, + std::array dims + ) : cartesian_communicator( + other, + dims, + std::apply([](auto... e) { return std::array{(static_cast(e), true)...}; }, std::array{}) + ) {} + + explicit cartesian_communicator( + communicator& other + ) : cartesian_communicator(other, std::array{}) {} + auto topology() const { struct topology_t { std::array dimensions, periods, coordinates; @@ -188,7 +220,8 @@ struct cartesian_communicator : cartesian_communicator<> { if(this == std::addressof(other)) { return *this; } // lints cert-oop54-cpp - cartesian_communicator<>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // cartesian_communicator<>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) return *this; } @@ -249,6 +282,7 @@ struct cartesian_communicator : cartesian_communicator<> { using coordinates_type = std::array; using cartesian_communicator<>::rank; +#if not defined(EXAMPI) auto rank(coordinates_type cs) const -> int { auto const ps = periods(); auto const s = shape(); @@ -260,6 +294,7 @@ struct cartesian_communicator : cartesian_communicator<> { } return MPI_(Cart_rank)(impl_, cs.data()); } +#endif auto coordinates(int r) const -> coordinates_type { coordinates_type ret; MPI_(Cart_coords)(impl_, r, D, ret.data()); @@ -309,7 +344,8 @@ struct circular_communicator : cartesian_communicator<1> { if(this == std::addressof(other)) { return *this; } // lints cert-oop54-cpp - cartesian_communicator<1>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) + if(not (compare(other) == boost::mpi3::detail::congruent)) {throw std::logic_error{"assignment is going to be deprecated"};} + // cartesian_communicator<1>::operator=(other); // NOLINT(clang-diagnostic-deprecated-declarations) return *this; } @@ -317,7 +353,10 @@ struct circular_communicator : cartesian_communicator<1> { auto coordinate(int rank) const { return std::get<0>(this->coordinates(rank)); } using cartesian_communicator<1>::rank; + +#if not defined(EXAMPI) auto rank(int coordinate) const { return cartesian_communicator<1>::rank({coordinate}); } +#endif template auto rotate(As... as, int displacement) { return this->send_receive(as..., this->shift<0>(-displacement)); } diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp index 7e708690427..57afb77d065 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/communicator.hpp @@ -1,4 +1,3 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa #ifndef MPI3_COMMUNICATOR_HPP @@ -109,6 +108,7 @@ namespace mpi3 { // https://www.open-mpi.org/doc/v4.0/man3/MPI_Comm_split_type.3.php#toc8 enum class communicator_type : int { +#if not defined(EXAMPI) shared = MPI_COMM_TYPE_SHARED ,/*synomym*/ node = OMPI_COMM_TYPE_NODE, hw_thread = OMPI_COMM_TYPE_HWTHREAD, core = OMPI_COMM_TYPE_CORE , @@ -121,26 +121,48 @@ enum class communicator_type : int { host = OMPI_COMM_TYPE_HOST , cu = OMPI_COMM_TYPE_CU ,/*synomym*/ cpu = OMPI_COMM_TYPE_CU , cluster = OMPI_COMM_TYPE_CLUSTER +#else }; - -enum constant { - undefined = MPI_UNDEFINED , - process_null = MPI_PROC_NULL , - any_source = MPI_ANY_SOURCE +auto const shared = {static_cast(MPI_COMM_TYPE_SHARED) +#endif }; -enum key { // for attributes - tag_ub = MPI_TAG_UB, - host = MPI_HOST, - io = MPI_IO, - wtime_is_global = MPI_WTIME_IS_GLOBAL, - application_number = MPI_APPNUM, - universe_size = MPI_UNIVERSE_SIZE, - last_used_code = MPI_LASTUSEDCODE -}; +#if defined(EXAMPI) +inline +#endif +enum constant : int { +#if defined(EXAMPI) +} const +#endif + undefined = static_cast(MPI_UNDEFINED ), + process_null = static_cast(MPI_PROC_NULL ), + any_source = static_cast(MPI_ANY_SOURCE) +#if not defined(EXAMPI) +} +#endif +; + +#if defined(EXAMPI) +inline +#endif +enum key : int { // for attributes +#if defined(EXAMPI) +} +#endif + tag_ub = static_cast(MPI_TAG_UB) +#if not defined(EXAMPI) + , host = static_cast(MPI_HOST) + , io = static_cast(MPI_IO) + , wtime_is_global = static_cast(MPI_WTIME_IS_GLOBAL) + , application_number = static_cast(MPI_APPNUM) + , universe_size = static_cast(MPI_UNIVERSE_SIZE) + , last_used_code = static_cast(MPI_LASTUSEDCODE) +} +#endif +; template struct overload_priority : overload_priority{ -// using overload_priority::overload_priority; +// using overload_priority::overload_priority; }; template<> struct overload_priority<0>{}; @@ -214,17 +236,21 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator(communicator&&) = default; communicator& operator=(communicator const&) = delete; - [[deprecated]] auto operator=(communicator& other) -> communicator& { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) duplicate assigment + [[deprecated("duplicate assignment is a flawed operation")]] + auto operator=(communicator& other) -> communicator& { // NOLINT(cppcoreguidelines-c-copy-assignment-signature,misc-unconventional-assign-operator) duplicate assigment communicator tmp{other}; operator=(std::move(tmp)); - // swap(tmp); + // swap(tmp); return *this; } - auto operator=(communicator && other) noexcept -> communicator& { // TODO(correaa) tidy this operator + auto operator=(communicator && other) noexcept -> communicator& { // TODO(correaa) tidy this operator, consider removing it if(impl_ != MPI_COMM_NULL) { try { + #if not defined(EXAMPI) MPI_(Comm_disconnect)(&impl_); //this will wait for communications to finish communications, if it gets to this point is probably an error anyway <-- not true, it is necessary to synchronize the flow - // MPI_Comm_free(&impl_); + #else + MPI_(Comm_free )(&impl_); + #endif } catch(std::exception& e) { std::cerr<< e.what() < iterator_t& {++rank_; return *this;} -//// auto operator--() -> iterator_t& {--rank_; return *this;} -//// auto operator*() const -> reference; - -//// private: -//// communicator* commP_ = nullptr; -//// int rank_ = MPI_PROC_NULL; - -//// friend class communicator; -//// iterator_t(communicator* self, int rank) : commP_{self}, rank_{rank} {} -// }; +// struct iterator_t { +//// iterator_t() = default; +//// explicit iterator_t(std::nullptr_t n) : commP_{n} {} +//// auto operator++() -> iterator_t& {++rank_; return *this;} +//// auto operator--() -> iterator_t& {--rank_; return *this;} +//// auto operator*() const -> reference; + +//// private: +//// communicator* commP_ = nullptr; +//// int rank_ = MPI_PROC_NULL; + +//// friend class communicator; +//// iterator_t(communicator* self, int rank) : commP_{self}, rank_{rank} {} +// }; // using iterator = iterator_t; -// auto begin() -> iterator {return {this, 0 };} -// auto end () -> iterator {return {this, size()};} +// auto begin() -> iterator {return {this, 0 };} +// auto end () -> iterator {return {this, size()};} auto& handle() {return impl_;} auto get_mutable() {return impl_;} @@ -272,7 +298,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com explicit ptr(communicator* ptr) : ptr_{ptr} {} operator MPI_Comm() const {return ptr_->get_mutable();} // NOLINT(google-explicit-constructor,hicpp-explicit-conversions) explicit operator communicator *() const {return ptr_;} - // explicit operator communicator const*() const{return ptr_;} + // explicit operator communicator const*() const{return ptr_;} friend bool operator==(ptr const& a, ptr const& b) {return a.ptr_ == b.ptr_;} friend bool operator!=(ptr const& a, ptr const& b) {return a.ptr_ != b.ptr_;} }; @@ -284,8 +310,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ~communicator() { if(impl_ != MPI_COMM_WORLD and impl_ != MPI_COMM_NULL and impl_ != MPI_COMM_SELF) { try { + #if not defined(EXAMPI) MPI_(Comm_disconnect)(&impl_); //this will wait for communications to finish communications, if it gets to this point is probably an error anyway <-- not true, it is necessary to synchronize the flow - // MPI_Comm_free(&impl_); + #else + MPI_Comm_free(&impl_); + #endif } catch(std::exception& e) { std::cerr<< e.what() < - class keyval { - static int delete_fn(MPI_Comm /*comm*/, int /*keyval*/, void *attr_val, void */*extra_state*/){ - delete static_cast(attr_val); // NOLINT(cppcoreguidelines-owning-memory) - // attr_val = nullptr; - return MPI_SUCCESS; - } - static int copy_fn( - MPI_Comm /*oldcomm*/, int /*keyval*/, - void * /*extra_state*/, void *attribute_val_in, - void *attribute_val_out, int *flag - ) { - *static_cast(attribute_val_out) = static_cast(new T{*(static_cast(attribute_val_in))}); - assert(flag); *flag = 1; - return MPI_SUCCESS; - } - - public: - int impl_ = {}; // NOLINT(misc-non-private-member-variables-in-classes) TODO(correaa) - - using mapped_type = T; - - keyval() { // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) - MPI_(Comm_create_keyval)(copy_fn, delete_fn, &impl_, nullptr); - } - - keyval(keyval const&) = delete; - keyval(keyval &&) = delete; - - keyval& operator=(keyval const&) = delete; - keyval& operator=(keyval &&) = delete; - - ~keyval() noexcept {MPI_Comm_free_keyval(&impl_);} - }; - using detail::basic_communicator::send_receive_n; + #if not defined(EXAMPI) using detail::basic_communicator::matched_probe; + #endif template auto send_n( @@ -394,7 +390,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com package_oarchive poa(p); std::copy_n(first, count, package_oarchive::iterator::value_type>(poa)); // while(count--) {poa << *first++;} - send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); + send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); } template auto isend_n(It first, Size count, int dest, int tag = 0){ @@ -474,7 +470,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com package_oarchive poa(p); std::copy(first, last, package_oarchive::iterator::value_type>(poa)); // while(first!=last) {poa << *first++;} - send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); + send_n(p.begin(), p.size(), dest, tag); // p.send(dest, tag); } template @@ -527,8 +523,10 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator split(int color, int key) { communicator ret; MPI_(Comm_split)(impl_, color, key, &ret.impl_); - if(ret) {ret.set_name(name() + std::to_string(color));} - if(ret) {ret.attribute("color") = color;} + if(ret) { ret.set_name(name() + std::to_string(color)); } + #if not defined(EXAMPI) + if(ret) { ret.attribute("color") = color; } + #endif return ret; } communicator split(int color = MPI_UNDEFINED) { @@ -544,6 +542,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator reversed() {return split(0, size() - rank());} + #if not defined(EXAMPI) int cartesian_map(std::vector const& dims, std::vector const& periods) const { assert(dims.size() == periods.size()); return MPI_(Cart_map)(impl_, static_cast(dims.size()), dims.data(), periods.data()); // TODO(correaa) use safe cast @@ -551,6 +550,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com int cartesian_map(std::vector const& dimensions) const { return cartesian_map(dimensions, std::vector(dimensions.size(), 0)); } + #endif pointer malloc(MPI_Aint size) const; template void deallocate_shared(pointer p); @@ -572,7 +572,6 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator subcomm(std::initializer_list l) const { return subcomm(std::vector(l)); } - enum class topology{undefined = MPI_UNDEFINED, graph = MPI_GRAPH, cartesian = MPI_CART}; int rank() const { assert(not is_empty()); // an empty communicator doesn't have ranks @@ -598,21 +597,28 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com assert(rank() - n > 0); return rank() - n; } + #if not defined(EXAMPI) communicator accept(port const& p, int root = 0) const { communicator ret; MPI_Comm_accept(p.name_.c_str(), MPI_INFO_NULL, root, impl_, &ret.impl_); return ret; } + #endif + [[deprecated("call non const version")]] void barrier() const { MPI_( Barrier)(get() ) ;} void barrier() { MPI_( Barrier)(handle()) ;} +#if not defined(EXAMPI) auto ibarrier() {request ret; MPI_(Ibarrier)(handle(), &ret.impl_); return ret;} +#endif +#if not defined(EXAMPI) communicator connect(port const& p, int root = 0) const { communicator ret; MPI_(Comm_connect)(p.name_.c_str(), MPI_INFO_NULL, root, impl_, &ret.impl_); return ret; } +#endif bool root() const {return (not empty()) and (rank() == 0);} bool is_root() const {return root();} @@ -624,6 +630,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto operator[](int rank) -> reference; protected: +#if not defined(EXAMPI) template void set_attribute(int kv_idx, T const& t) { MPI_(Comm_set_attr)(impl_, kv_idx, new T{t}); // NOLINT(readability-implicit-bool-conversion, cppcoreguidelines-owning-memory) TODO(correaa) } @@ -643,8 +650,46 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Comm_get_attr(impl_, kvidx, &v, &flag); return flag != 0; } +#endif public: + +#if not defined(EXAMPI) + template + class keyval { + static int delete_fn(MPI_Comm /*comm*/, int /*keyval*/, void *attr_val, void */*extra_state*/){ + delete static_cast(attr_val); // NOLINT(cppcoreguidelines-owning-memory) + // attr_val = nullptr; + return MPI_SUCCESS; + } + static int copy_fn( + MPI_Comm /*oldcomm*/, int /*keyval*/, + void * /*extra_state*/, void *attribute_val_in, // cppcheck-suppress constParameterCallback ; C-function callback + void *attribute_val_out, int *flag + ) { + *static_cast(attribute_val_out) = static_cast(new T{*(static_cast(attribute_val_in))}); + assert(flag); *flag = 1; + return MPI_SUCCESS; + } + + public: + int impl_ = {}; // NOLINT(misc-non-private-member-variables-in-classes) TODO(correaa) + + using mapped_type = T; + + keyval() { // NOLINT(cppcoreguidelines-pro-type-member-init,hicpp-member-init) + MPI_(Comm_create_keyval)(copy_fn, delete_fn, &impl_, nullptr); + } + + keyval(keyval const&) = delete; + keyval(keyval &&) = delete; + + keyval& operator=(keyval const&) = delete; + keyval& operator=(keyval &&) = delete; + + ~keyval() noexcept {MPI_Comm_free_keyval(&impl_);} + }; + template void set_attribute(keyval const& k, TT const& t = {}) {set_attribute(k.impl_, t);} template @@ -660,8 +705,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com if(not has_attribute(kv)) {set_attribute(kv);} return get_attribute(kv); } + mpi3::any& attribute(std::string const& s); +#endif +#if not defined(EXAMPI) void call_error_handler(int errorcode) noexcept { auto const s = MPI_Comm_call_errhandler(impl_, errorcode); (void)s; assert(s == MPI_SUCCESS); @@ -670,6 +718,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto const s = MPI_Comm_call_errhandler(impl_, static_cast(e)); (void)s; assert(s == MPI_SUCCESS); } +#endif + communicator divide_low(int n) { assert(n != 0); return split( @@ -772,6 +822,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com #endif #endif +#if not defined(EXAMPI) template auto send_receive_replace_n( It first, Size size, @@ -801,56 +852,22 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + s.count::value_type>(); } - template - auto send_receive_n( - It1 first, Size count, int dest, - It2 d_first, Size d_count, int source, - int sendtag = 0, int recvtag = MPI_ANY_TAG - ) { - return send_receive_n( - first, count, dest, - d_first, d_count, source, - detail::iterator_category_t{}, // It2??? - detail::value_category_t::value_type>{}, - sendtag, recvtag - ); - } - template - auto send_receive_n( - It1 first, Size count, int dest, - It2 d_first, int source = MPI_ANY_SOURCE, - int sendtag = 0, int recvtag = MPI_ANY_TAG + template + auto send_receive_replace_n( + It first, + detail::forward_iterator_tag /*tag*/, + detail::basic_tag /*tag*/, + Size count, int dest, int source, int sendtag, int recvtag ) { - return send_receive_n( - first, count, dest, - d_first, source, - detail::iterator_category_t{}, // It2??? TODO(correaa) - detail::value_category_t::value_type>{}, // It2??? TODO(correaa) - sendtag, recvtag - ); + uvector::value_type> v(static_cast(count)); + std::copy_n(first, count, v.begin()); + send_receive_replace_n(v.begin(), v.size(), dest, source, sendtag, recvtag); + return std::copy_n(v.begin(), v.size(), first); } -// private: - -// public: -// template -// auto isend_receive_replace_n( -// It first, Size size, -// int dest, int source, // = MPI_ANY_SOURCE, -// int sendtag = 0, int recvtag = MPI_ANY_TAG -// ) { -// using value_type = typename std::iterator_traits::value_type; -// return isend_receive_replace_n( -// first, -// detail::iterator_category_t{}, -// detail::value_category_t{}, -// size, -// dest, source, sendtag, recvtag -// ); -// } +#endif // not defined(EXAMPI) - private: template auto send_receive_n( It first, Size count, int dest, @@ -869,29 +886,23 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return d_first + static_cast::difference_type>(d_count); } - template::value_type - , class V2 = typename std::iterator_traits::value_type - > + template auto send_receive_n( - It1 first, Size count, int dest, - It2 d_first, int source, - /**/ detail::contiguous_iterator_tag /*tag*/, - /**/ detail::basic_tag /*tag*/, - int sendtag, int recvtag + It1 first, Size count, int dest, + It2 d_first, Size d_count, int source, + int sendtag = 0, int recvtag = MPI_ANY_TAG ) { - static_assert( std::is_same{}, "source and destination need to be same type" ); - status const ret = MPI_(Sendrecv)( - detail::data(first), static_cast(count), datatype{}(), - dest, sendtag, - detail::data(d_first), std::numeric_limits::max() /*unlim in receiving*/, datatype{}(), - source, recvtag, - impl_ //, &ret.impl_ // status refers to the receive operation. + return send_receive_n( + first, count, dest, + d_first, d_count, source, + detail::iterator_category_t{}, // It2??? + detail::value_category_t::value_type>{}, + sendtag, recvtag ); - return d_first + ret.count(); } - template +#if not defined(EXAMPI) + template auto send_receive_replace_n( It first, /**/ detail::forward_iterator_tag /*tag*/, @@ -923,18 +934,63 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com count, first ); } +#endif // not defined(EXAMPI) - template - auto send_receive_replace_n( - It first, - detail::forward_iterator_tag /*tag*/, - detail::basic_tag /*tag*/, - Size count, int dest, int source, int sendtag, int recvtag + template + auto send_receive_n( + It1 first, Size count, int dest, + It2 d_first, int source = MPI_ANY_SOURCE, + int sendtag = 0, int recvtag = MPI_ANY_TAG ) { - uvector::value_type> v(static_cast(count)); - std::copy_n(first, count, v.begin()); - send_receive_replace_n(v.begin(), v.size(), dest, source, sendtag, recvtag); - return std::copy_n(v.begin(), v.size(), first); + return send_receive_n( + first, count, dest, + d_first, source, + detail::iterator_category_t{}, // It2??? TODO(correaa) + detail::value_category_t::value_type>{}, // It2??? TODO(correaa) + sendtag, recvtag + ); + } + +// private: + +// public: +// template +// auto isend_receive_replace_n( +// It first, Size size, +// int dest, int source, // = MPI_ANY_SOURCE, +// int sendtag = 0, int recvtag = MPI_ANY_TAG +// ) { +// using value_type = typename std::iterator_traits::value_type; +// return isend_receive_replace_n( +// first, +// detail::iterator_category_t{}, +// detail::value_category_t{}, +// size, +// dest, source, sendtag, recvtag +// ); +// } + + private: + template::value_type + , class V2 = typename std::iterator_traits::value_type + > + auto send_receive_n( + It1 first, Size count, int dest, + It2 d_first, int source, + /**/ detail::contiguous_iterator_tag /*tag*/, + /**/ detail::basic_tag /*tag*/, + int sendtag, int recvtag + ) { + static_assert( std::is_same{}, "source and destination need to be same type" ); + status const ret = MPI_(Sendrecv)( + detail::data(first), static_cast(count), datatype{}(), + dest, sendtag, + detail::data(d_first), std::numeric_limits::max() /*unlim in receiving*/, datatype{}(), + source, recvtag, + impl_ //, &ret.impl_ // status refers to the receive operation. + ); + return d_first + ret.count(); } public: @@ -1098,6 +1154,8 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Recv(begin, n, MPI_PACKED, source, tag, impl_, &ret.impl_); return ret; } + +#if not defined(EXAMPI) auto receive_packed(void* begin, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { MPI_Status status; MPI_Message msg; // NOLINT(cppcoreguidelines-init-variables) delayed init @@ -1105,10 +1163,76 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_Mprobe(source, tag, impl_, &msg, &status); MPI_Get_count(&status, MPI_PACKED, &count); MPI_Mrecv(begin, count, MPI_PACKED, &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) - // auto n = probe(source, tag).count(); - // receive_packed_n(begin, n, source, tag); + // auto n = probe(source, tag).count(); + // receive_packed_n(begin, n, source, tag); return static_cast(std::next(static_cast(begin), count)); } + + template + auto receive_n( + It dest, + detail::forward_iterator_tag /*tag*/, + detail::value_unspecified_tag /*tag*/, + Size count, + int source, int tag + ){ + detail::package p(*this); + p.receive(source, tag); + package_iarchive pia(p); + return std::copy_n(package_iarchive::iterator::value_type>{pia}, count, dest); + } + + template + auto receive( + It dest, + detail::contiguous_iterator_tag /*tag*/, + detail::basic_tag /*tag*/, + int source, int tag + ) { + match m = matched_probe(source, tag); + auto count = m.count::value_type>(); + m.receive_n(dest, count); + return dest + count; + } + + template + [[deprecated]] auto receive( + It dest, + /**/ detail::forward_iterator_tag /*tag*/, + /**/ detail::value_unspecified_tag /*tag*/, + int source, int tag + ) { + detail::package p(*this); + p.receive(source, tag); + package_iarchive const pia(p); // TODO(correaa) investigate + while(p) {pia >> *dest++;} // NOLINT(altera-unroll-loops) deprecating + return dest; + } + + template + auto receive( + It dest, + detail::forward_iterator_tag /*tag*/, + detail::basic_tag /*tag*/, + int source, int tag + ) { + return matched_probe(source, tag).receive_n(dest); + } + + template::value_type> + auto dynamic_receive(InputIt first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { + // auto count = probe(source, tag).count(); + // return receive(first, first + count, source, tag); + MPI_Status status; + MPI_Message msg; // NOLINT(cppcoreguidelines-init-variables) delayed init + int count = -1; + MPI_Mprobe(source, tag, impl_, &msg, &status); + MPI_Get_count(&status, datatype{}(), &count); + using detail::data; + MPI_Mrecv(data(first), count, datatype{}(), &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) for macro + } +#endif + template auto receive_n( It dest, @@ -1142,32 +1266,20 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return r; } // NOLINT(clang-analyzer-optin.mpi.MPI-Checker) // MPI_Wait called on destructor of ret - template - auto receive_n( - It dest, - detail::forward_iterator_tag /*tag*/, - detail::value_unspecified_tag /*tag*/, - Size count, - int source, int tag - ){ - detail::package p(*this); - p.receive(source, tag); - package_iarchive pia(p); - return std::copy_n(package_iarchive::iterator::value_type>{pia}, count, dest); - } template{}, int> =0// or (not detail::is_basic::value_type>{}), int> =0 // needed by intel commpiler > auto receive_n(It dest, Size n, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { return receive_n( - dest, + dest, detail::iterator_category_t{}, detail::value_category_t::value_type>{}, n, source, tag ); } + template mpi3::request ireceive_n( It dest, Size n, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG @@ -1180,40 +1292,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com source, tag ); } - template - auto receive( - It dest, - detail::contiguous_iterator_tag /*tag*/, - detail::basic_tag /*tag*/, - int source, int tag - ) { - match m = matched_probe(source, tag); - auto count = m.count::value_type>(); - m.receive_n(dest, count); - return dest + count; - } - template - [[deprecated]] auto receive( - It dest, - /**/ detail::forward_iterator_tag /*tag*/, - /**/ detail::value_unspecified_tag /*tag*/, - int source, int tag - ) { - detail::package p(*this); - p.receive(source, tag); - package_iarchive const pia(p); // TODO(correaa) investigate - while(p) {pia >> *dest++;} // NOLINT(altera-unroll-loops) deprecating - return dest; - } - template - auto receive( - It dest, - detail::forward_iterator_tag /*tag*/, - detail::basic_tag /*tag*/, - int source, int tag - ) { - return matched_probe(source, tag).receive_n(dest); - } + template [[deprecated]] auto receive(It dest, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { return receive( @@ -1240,7 +1319,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com int source, int tag ) { return receive_n(std::addressof(*d_first), std::distance(d_first, d_last), source, tag); - // return std::copy(buffer.begin(), buffer.end(), d_first); + // return std::copy(buffer.begin(), buffer.end(), d_first); } template @@ -1254,89 +1333,89 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com receive_n(buffer.begin(), buffer.size(), source, tag); return std::copy(buffer.begin(), buffer.end(), d_first); } -// class ir_req{ -// boost::mpi3::status query(){ -// boost::mpi3::status ret; -// ret.set_source(MPI_UNDEFINED); -// ret.set_tag(MPI_UNDEFINED); -// ret.set_cancelled(); -// ret.set_elements(0); -// return ret; -// } -// static void free(){ -// std::cout << "free" << std::endl; -// } -// static void cancel(int complete) { -// std::cout << "cancel " << complete << std::endl; -// } -// }; -// template -// struct receive_args { -// communicator* commP; -// It d_first; -// // It d_last; -// int source; -// int tag; -// MPI_Request* requestP; -// }; -// struct receive_state{ -// int cancelled = 0; -// int source = MPI_UNDEFINED; -// int tag = MPI_UNDEFINED; -// }; -// template -// inline static void* receive_thread(void* ptr) { -// receive_args* args = (receive_args*)ptr; -// args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); -// MPI_Grequest_complete(*args->requestP); -// ::free(ptr); -// return nullptr; -// } -// inline static int query_fn(void* extra_state, MPI_Status *status){ -// auto* rs = static_cast(extra_state); -// /* always send just one int */ -// MPI_Status_set_elements(status, MPI_INT, 1); -// /* can never cancel so always true */ -// MPI_Status_set_cancelled(status, rs->cancelled); -// /* choose not to return a value for this */ -// status->MPI_SOURCE = rs->source; -// /* tag has not meaning for this generalized request */ -// status->MPI_TAG = rs->tag; -// /* this generalized request never fails */ -// return MPI_SUCCESS; -// } -// inline static int free_fn(void* extra_state) { -// /* this generalized request does not need to do any freeing */ -// /* as a result it never fails here */ -// ::free(extra_state); -// return MPI_SUCCESS; -// } -// inline static int cancel_fn(void* /*extra_state*/, int complete) { -// /* This generalized request does not support cancelling. -// Abort if not already done. If done then treat as if cancel failed. */ -// if(not (complete == 0)) { -// std::cerr<< "Cannot cancel generalized request - aborting program" < -// auto ireceive(It d_first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { -// // based on http://liinwww.ira.uka.de/courses/spprakt/mpi2-html-doc/node157.html -// mpi3::request ret; /* receive_args* args = (receive_args*)::malloc(sizeof(receive_args)); args->commP = this; args->d_first = d_first; // args->d_last = d_last; args->source = source; args->tag = tag; args->requestP = &ret.impl_;*/ -// receive_state* rs = (receive_state*)::malloc(sizeof(receive_state)); -// rs->cancelled = 0; -// rs->source = source; -// rs->tag = tag; -// MPI_Grequest_start(query_fn, free_fn, cancel_fn, rs, &ret.impl_);//args->requestP); -// std::thread( // static_cast(receive_thread), args -// [this, d_first, source, tag, &ret](){ -// this->receive(d_first, source, tag); // receive_args* args = (receive_args*)ptr; // args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); -// MPI_Grequest_complete(ret.impl_); // MPI_Grequest_complete(*args->requestP); // ::free(ptr); -// } -// ).detach(); // t.detach(); // pthread_t thread; // pthread_create(&thread, NULL, static_cast(receive_thread), args); // pthread_detach(thread); -// return ret; -// } +// class ir_req{ +// boost::mpi3::status query(){ +// boost::mpi3::status ret; +// ret.set_source(MPI_UNDEFINED); +// ret.set_tag(MPI_UNDEFINED); +// ret.set_cancelled(); +// ret.set_elements(0); +// return ret; +// } +// static void free(){ +// std::cout << "free" << std::endl; +// } +// static void cancel(int complete) { +// std::cout << "cancel " << complete << std::endl; +// } +// }; +// template +// struct receive_args { +// communicator* commP; +// It d_first; +// // It d_last; +// int source; +// int tag; +// MPI_Request* requestP; +// }; +// struct receive_state{ +// int cancelled = 0; +// int source = MPI_UNDEFINED; +// int tag = MPI_UNDEFINED; +// }; +// template +// inline static void* receive_thread(void* ptr) { +// receive_args* args = (receive_args*)ptr; +// args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); +// MPI_Grequest_complete(*args->requestP); +// ::free(ptr); +// return nullptr; +// } +// inline static int query_fn(void* extra_state, MPI_Status *status){ +// auto* rs = static_cast(extra_state); +// /* always send just one int */ +// MPI_Status_set_elements(status, MPI_INT, 1); +// /* can never cancel so always true */ +// MPI_Status_set_cancelled(status, rs->cancelled); +// /* choose not to return a value for this */ +// status->MPI_SOURCE = rs->source; +// /* tag has not meaning for this generalized request */ +// status->MPI_TAG = rs->tag; +// /* this generalized request never fails */ +// return MPI_SUCCESS; +// } +// inline static int free_fn(void* extra_state) { +// /* this generalized request does not need to do any freeing */ +// /* as a result it never fails here */ +// ::free(extra_state); +// return MPI_SUCCESS; +// } +// inline static int cancel_fn(void* /*extra_state*/, int complete) { +// /* This generalized request does not support cancelling. +// Abort if not already done. If done then treat as if cancel failed. */ +// if(not (complete == 0)) { +// std::cerr<< "Cannot cancel generalized request - aborting program" < +// auto ireceive(It d_first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { +// // based on http://liinwww.ira.uka.de/courses/spprakt/mpi2-html-doc/node157.html +// mpi3::request ret; /* receive_args* args = (receive_args*)::malloc(sizeof(receive_args)); args->commP = this; args->d_first = d_first; // args->d_last = d_last; args->source = source; args->tag = tag; args->requestP = &ret.impl_;*/ +// receive_state* rs = (receive_state*)::malloc(sizeof(receive_state)); +// rs->cancelled = 0; +// rs->source = source; +// rs->tag = tag; +// MPI_Grequest_start(query_fn, free_fn, cancel_fn, rs, &ret.impl_);//args->requestP); +// std::thread( // static_cast(receive_thread), args +// [this, d_first, source, tag, &ret](){ +// this->receive(d_first, source, tag); // receive_args* args = (receive_args*)ptr; // args->commP->receive(args->d_first, args->source, args->tag);//, /*args->d_last,*/ ); +// MPI_Grequest_complete(ret.impl_); // MPI_Grequest_complete(*args->requestP); // ::free(ptr); +// } +// ).detach(); // t.detach(); // pthread_t thread; // pthread_create(&thread, NULL, static_cast(receive_thread), args); // pthread_detach(thread); +// return ret; +// } template auto ireceive( It d_first, It d_last, @@ -1401,18 +1480,6 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com auto bsend(InputIterator It1, InputIterator It2, int dest, int tag = 0){ return send(buffered_communication_mode{}, blocking_mode{}, It1, It2, dest, tag); } - template::value_type> - auto dynamic_receive(InputIt first, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) { - // auto count = probe(source, tag).count(); - // return receive(first, first + count, source, tag); - MPI_Status status; - MPI_Message msg; // NOLINT(cppcoreguidelines-init-variables) delayed init - int count = -1; - MPI_Mprobe(source, tag, impl_, &msg, &status); - MPI_Get_count(&status, datatype{}(), &count); - using detail::data; - MPI_Mrecv(data(first), count, datatype{}(), &msg, MPI_STATUS_IGNORE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast) for macro - } template::iterator_category> auto breceive(Iterator It1, Iterator It2, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG){ @@ -1549,6 +1616,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com return d_first + count; } +#if not defined(EXAMPI) using in_place_type = decltype(MPI_IN_PLACE); // NOLINT(cppcoreguidelines-pro-type-cstyle-cast,performance-no-int-to-ptr) openmpi #defines this as (void*)1, it may not be a pointer in general template @@ -1578,8 +1646,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + count*size(); } +#endif public: + +#if not defined(EXAMPI) template auto all_to_all_inplace_n(It1 first, Size count) { using count_type = int; @@ -1595,6 +1666,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); return first + count; } +#endif template auto all_to_all_n(It1 first, Size count, It2 d_first) { @@ -1688,7 +1760,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } template auto scatter_builtin_q(std::false_type, Iterator1 first, Iterator2 last, Iterator1 d_first, int root) -// { TODO implement } +// { TODO implement } ; public: @@ -1829,7 +1901,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com detail::iterator_category_t{}, detail::value_category_t::value_type>{}, op, - // predefined_operation{}, + // predefined_operation{}, root ); } @@ -1930,10 +2002,11 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } public: +#if not defined(EXAMPI) template< class It1, class Size, class Op = std::plus<>, - class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})), - class = decltype(std::declval::reference>() = std::declval()(V1{}, V1{})) + class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})), + class = std::enable_if_t()(std::declval(), std::declval()))>> > auto all_reduce_in_place_n(It1 first, Size count, Op /*op*/) { auto const in_place = MPI_IN_PLACE; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast,llvm-qualified-auto,readability-qualified-auto,performance-no-int-to-ptr) openmpi #defines this as (void*)1, it may not be a pointer in general @@ -1941,14 +2014,6 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_(Allreduce)(in_place, data_adl(first), static_cast(count), datatype{}(), &combine, impl_); } - template< - class It1, class Size, class Op = std::plus<>, - class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})) - > - auto all_reduce_n(It1 first, Size count, Op op = {}) - ->decltype(all_reduce_in_place_n(first, count, op)) { - return all_reduce_in_place_n(first, count, op); } - template< class It1, class Size, class Op, class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})), @@ -1960,6 +2025,15 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com MPI_(Reduce)(data_adl(first), nullptr , count, datatype{}(), PredefinedOp{}, root, impl_) ; } +#endif + + template< + class It1, class Size, class Op = std::plus<>, + class V1 = typename std::iterator_traits::value_type, class P1 = decltype(data_adl(It1{})) + > + auto all_reduce_n(It1 first, Size count, Op op = {}) + ->decltype(all_reduce_in_place_n(first, count, op)) { + return all_reduce_in_place_n(first, count, op); } template< class It1, class Size, class Op = std::plus<>, @@ -2243,7 +2317,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com std::vector counts(c.size()); std::transform( counts.begin(), counts.end(), begin(c), counts.begin(), - [](auto& /*unused*/, auto& b){return std::distance(begin(b), end(b));} + [](auto& /*unused*/, auto const& b){return std::distance(begin(b), end(b));} ); int n = scatter(counts); scatterv_n( @@ -2386,7 +2460,7 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com ); if(s != MPI_SUCCESS) {throw std::runtime_error("cannot gather");} advance(d_first, count*size(), root...); - // std::advance(d_first, count); + // std::advance(d_first, count); return d_first; } @@ -2943,22 +3017,25 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com public: std::string get_name() const { - std::array comm_name{}; + std::string ret(MPI_MAX_OBJECT_NAME, '\0'); int len; // NOLINT(cppcoreguidelines-init-variables) : delayed initialization - MPI_(Comm_get_name)(impl_, comm_name.data(), &len); - return {comm_name.data(), static_cast(len)}; + MPI_(Comm_get_name)(impl_, ret.data(), &len); + ret.resize(static_cast(len)); + return ret; } void set_name(std::string const& s) {MPI_(Comm_set_name)(impl_, s.c_str());} std::string name() const {return get_name();} [[deprecated]] void name(std::string const& s) {set_name(s);} +#if not defined(EXAMPI) static mpi3::communicator& parent() { static_assert(sizeof(MPI_Comm) == sizeof(mpi3::communicator), "!"); static_assert(std::is_same{}, "!"); MPI_Comm* p{}; MPI_Comm_get_parent(p); assert(p); return reinterpret_cast(*p); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) : TODO(correaa) avoid reinterpret_cast } + static communicator spawn(std::string const& argv0, int np) { communicator intercomm; MPI_Comm_spawn(argv0.data(), MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_COMM_SELF, &intercomm.impl_, MPI_ERRCODES_IGNORE ); @@ -2967,33 +3044,46 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com communicator intercommunicator_create(int local_leader, communicator const& peer, int remote_leader, int tag = 0) const{ communicator ret; - int const s = MPI_Intercomm_create(impl_, local_leader, peer.impl_, remote_leader, tag, &ret.impl_); - if(s != MPI_SUCCESS) {throw std::runtime_error("cannot create intercommunicator");} + MPI_(Intercomm_create)(impl_, local_leader, peer.impl_, remote_leader, tag, &ret.impl_); return ret; } communicator create(int local_leader, communicator const& peer, int remote_leader, int tag = 0) const{ return intercommunicator_create(local_leader, peer, remote_leader, tag); } +#endif communicator create(group const& g) const; communicator create_group(group const& g, int tag) const; - FILE* fopen(char const* filename, int amode = unsigned{MPI_MODE_RDWR} | unsigned{MPI_MODE_CREATE}); + FILE* fopen(char const* filename, int amode = unsigned{ + #if not defined(EXAMPI) + MPI_MODE_RDWR} | unsigned{MPI_MODE_CREATE + #endif + }); + + class topology { + int impl_; + + public: + explicit topology(int impl) noexcept : impl_{impl} {} + + static topology const undefined; + static topology const graph; + static topology const cartesian; + + bool operator==(topology const& other) const {return impl_ == other.impl_;} + bool operator!=(topology const& other) const {return impl_ != other.impl_;} + bool operator< (topology const& other) const {return impl_ < other.impl_;} + }; inline static auto name(communicator::topology const& t) -> std::string const& { static std::map const names = { {communicator::topology::undefined, "undefined"}, - {communicator::topology::graph, "graph"}, + {communicator::topology::graph , "graph"}, {communicator::topology::cartesian, "cartesian"}}; return names.find(t)->second; } -//template -//friend auto operator,(communicator& comm, T const& t){ -// std::vector ret(comm.size()); -// comm.all_gather_n(std::addressof(t), 1, first, root); -//} - template friend T operator+=(communicator& comm, T const& t) { // NOLINT(fuchsia-overloaded-operator) : experimental operator return comm.all_reduce_value(t, std::plus<>{}); @@ -3022,8 +3112,14 @@ class communicator : protected detail::basic_communicator { // in mpich MPI_Com } }; +inline communicator::topology const communicator::topology::undefined{MPI_UNDEFINED}; // NOLINT(fuchsia-statically-constructed-objects) see if EXAMPI will allow it to be constexpr +inline communicator::topology const communicator::topology::graph {MPI_GRAPH}; // NOLINT(fuchsia-statically-constructed-objects) see if EXAMPI will allow it to be constexpr +inline communicator::topology const communicator::topology::cartesian{MPI_CART}; // NOLINT(fuchsia-statically-constructed-objects) see if EXAMPI will allow it to be constexpr + inline void barrier(communicator& self) { self. barrier();} +#if not defined(EXAMPI) inline auto ibarrier(communicator& self) {return self.ibarrier();} +#endif inline communicator::communicator(group const& g, int tag){ MPI_(Comm_create_group)(MPI_COMM_WORLD, &const_cast(g), tag, &impl_); // NOLINT(cppcoreguidelines-pro-type-const-cast) : TODO(correaa) consider using non-const argument to begin with @@ -3059,16 +3155,16 @@ inline communicator communicator::create_group(class group const& g, int tag = 0 template inline void communicator::deallocate_shared(pointer /*unused*/){ -// MPI_Free_mem(p.base_ptr(rank())); +// MPI_Free_mem(p.base_ptr(rank())); } template inline void communicator::deallocate(pointer& /*p*/, MPI_Aint /*size*/) { // TODO(correaa) should be called free? -// p.pimpl_->fence(); -// MPI_Free_mem(p.local_ptr()); -// MPI_Win_free(&p.pimpl_->impl_); -// delete p.pimpl_; -// p.pimpl_ == nullptr; +// p.pimpl_->fence(); +// MPI_Free_mem(p.local_ptr()); +// MPI_Win_free(&p.pimpl_->impl_); +// delete p.pimpl_; +// p.pimpl_ == nullptr; } #if 0 @@ -3134,97 +3230,97 @@ inline mpi3::communicator& grip_communicator(MPI_Comm const& handle) { //namespace mpi3 = boost::mpi3; //class V{ -// mpi3::communicator comm_; -// public: -// V(mpi3::communicator const& c) : comm_(c){} -// V(mpi3::communicator&& c) : comm_(std::move(c)){} +// mpi3::communicator comm_; +// public: +// V(mpi3::communicator const& c) : comm_(c){} +// V(mpi3::communicator&& c) : comm_(std::move(c)){} //}; //int mpi3::main(int, char*[], mpi3::communicator world){ -// std::cout << mpi3::undefined << std::endl; +// std::cout << mpi3::undefined << std::endl; -// static_assert(std::is_nothrow_constructible::value, "MyType should be noexcept MoveConstructible"); +// static_assert(std::is_nothrow_constructible::value, "MyType should be noexcept MoveConstructible"); -//// auto worldcopy1 = world; -//// auto worldcopy2 = std::move(worldcopy1); -//// V v(worldcopy); -//// V v2(std::move(v)); +//// auto worldcopy1 = world; +//// auto worldcopy2 = std::move(worldcopy1); +//// V v(worldcopy); +//// V v2(std::move(v)); -// if(world.rank() == 0) cout << "MPI version " << mpi3::version() << '\n'; -//// if(world.rank() == 0) cout << "Topology: " << name(world.topo()) << '\n'; +// if(world.rank() == 0) cout << "MPI version " << mpi3::version() << '\n'; +//// if(world.rank() == 0) cout << "Topology: " << name(world.topo()) << '\n'; -// cout << "MPI_ERR_COMM = " << MPI_ERR_COMM << '\n'; +// cout << "MPI_ERR_COMM = " << MPI_ERR_COMM << '\n'; -// mpi3::communicator comm; -// assert(!comm); -//// cout << comm.rank() << '\n'; +// mpi3::communicator comm; +// assert(!comm); +//// cout << comm.rank() << '\n'; -// mpi3::communicator comm2 = world; -// assert(comm2); -// assert(comm2.size() == world.size()); -// assert(comm2 == world); -// assert(&comm2 != &world); +// mpi3::communicator comm2 = world; +// assert(comm2); +// assert(comm2.size() == world.size()); +// assert(comm2 == world); +// assert(&comm2 != &world); -// mpi3::communicator comm3 = world;//.duplicate(); -// assert(comm3); -// assert(comm3 == world); -// assert(&comm3 != &world); -// comm = comm2; -// assert(&comm != &comm2); +// mpi3::communicator comm3 = world;//.duplicate(); +// assert(comm3); +// assert(comm3 == world); +// assert(&comm3 != &world); +// comm = comm2; +// assert(&comm != &comm2); -//// world2 = world; +//// world2 = world; -// return 0; +// return 0; //#if 0 -//// boost::mpi3::communicator newcomm = world; -// { -// int color = world.rank()/3; -// communicator row_comm; -// row_comm = world.split(color); -// world.barrier(); -// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; -// world.barrier(); -// } -// { -// communicator row_comm = world/3; -// world.barrier(); -// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; -// world.barrier(); -// } - -// world.barrier(); -// if(world.rank() == 0) cout << "prime communicator" << '\n'; -// world.barrier(); - -// { -// // group world_group(world); -// // const int ranks[4] = {2, 3, 5, 7}; -// // group prime = world_group.include(ranks, ranks + 4); -// // communicator prime_comm(world, prime); -// auto prime_comm = world.subcomm({2,3,5,7}); -// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; +//// boost::mpi3::communicator newcomm = world; +// { +// int color = world.rank()/3; +// communicator row_comm; +// row_comm = world.split(color); +// world.barrier(); +// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; +// world.barrier(); +// } +// { +// communicator row_comm = world/3; +// world.barrier(); +// std::cout << std::to_string(world.rank()) + " " + std::to_string(row_comm.rank()) + "\n";// << std::endl; +// world.barrier(); +// } + +// world.barrier(); +// if(world.rank() == 0) cout << "prime communicator" << '\n'; +// world.barrier(); + +// { +// // group world_group(world); +// // const int ranks[4] = {2, 3, 5, 7}; +// // group prime = world_group.include(ranks, ranks + 4); +// // communicator prime_comm(world, prime); +// auto prime_comm = world.subcomm({2,3,5,7}); +// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; //#if 0 -// if(communicator::null != prime_comm){ -// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; -// }else{ -// cout << world.rank() << " not in prime comm\n"; -// } +// if(communicator::null != prime_comm){ +// cout << world.rank() << " -> " << prime_comm.rank() << "/" << prime_comm.size() << '\n'; +// }else{ +// cout << world.rank() << " not in prime comm\n"; +// } //#endif -// } - -// world.barrier(); -// if(world.rank() == 0) cout << "prime communicator" << '\n'; -// world.barrier(); - -// if(0){ -// auto prime = world.subcomm({2,3,5,7}); -// if(prime.is_empty()){ -// // if (communicator::null != prime){ -// cout << world.rank() << " -> " << prime.rank() << "/" << prime.size() << '\n'; -// }else{ -// cout << world.rank() << " not in prime comm\n"; -// } -// } +// } + +// world.barrier(); +// if(world.rank() == 0) cout << "prime communicator" << '\n'; +// world.barrier(); + +// if(0){ +// auto prime = world.subcomm({2,3,5,7}); +// if(prime.is_empty()){ +// // if (communicator::null != prime){ +// cout << world.rank() << " -> " << prime.rank() << "/" << prime.size() << '\n'; +// }else{ +// cout << world.rank() << " not in prime comm\n"; +// } +// } //#endif //} diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp index cc42b027f73..ec3569082a4 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/basic_communicator.hpp @@ -1,8 +1,7 @@ -// -*-indent-tabs-mode:t;c-basic-offset:4;tab-width:4;autowrap:nil;-*- // Copyright 2018-2023 Alfredo A. Correa -#ifndef MPI3_DETAIL_BASIC_COMMUNICATOR_HPP -#define MPI3_DETAIL_BASIC_COMMUNICATOR_HPP +#ifndef BMPI3_DETAIL_BASIC_COMMUNICATOR_HPP +#define BMPI3_DETAIL_BASIC_COMMUNICATOR_HPP #include "../../mpi3/vector.hpp" @@ -154,8 +153,8 @@ class basic_communicator{ ) { std::for_each(first, last, [&b, &pos, this](auto& e) {pos = unpack_n(b, pos, std::addressof(e), 1);}); // while(first != last){ - // pos = unpack_n(b, pos, std::addressof(*first), 1); - // ++first; + // pos = unpack_n(b, pos, std::addressof(*first), 1); + // ++first; // } return pos; } @@ -186,7 +185,7 @@ class basic_communicator{ } template auto unpack_n(detail::buffer& b, It first, Size count) { - // assert(0); + // assert(0); b.pos = unpack_n(b, b.pos, first, count); return b.pos; } @@ -246,11 +245,7 @@ class basic_communicator{ auto send(uvector const& p, int dest, int tag = 0) { return send_n(p.data(), p.size(), dest, tag); } - match matched_probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { - match m; - MPI_(Mprobe)(source, tag, impl_, &m.message::impl_, &m.status::impl_); - return m; - } + template auto receive_n( It dest, @@ -263,6 +258,14 @@ class basic_communicator{ receive_n(buffer.data(), buffer.size(), source, tag); return std::copy_n(buffer.begin(), n, dest); } + + #if not defined(EXAMPI) + match matched_probe(int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { + match m; + MPI_(Mprobe)(source, tag, impl_, &m.message::impl_, &m.status::impl_); + return m; + } + auto receive(uvector& b, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { match m = matched_probe(source, tag); auto const count = static_cast(m.count()); @@ -270,9 +273,13 @@ class basic_communicator{ b.resize(b.size() + count); return m.receive_n(std::next(b.data(), size), count); } + #endif + + #if not defined(EXAMPI) auto receive(detail::buffer& b, int source = MPI_ANY_SOURCE, int tag = MPI_ANY_TAG) const { return receive(static_cast&>(b), source, tag); } + #endif template auto send_receive_replace_n( It first, diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp index 7edb5633702..b3e81844cb9 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/call.hpp @@ -26,11 +26,12 @@ int call() { template std::string call() { int len = -1; - std::array name{}; + std::string name(MPI_MAX_PROCESSOR_NAME, '\0'); // std::array name{}; auto const e = static_cast((*F)(name.data(), &len)); assert(len >= 0); + name.resize(static_cast(len)); if(e != mpi3::error::success) {throw std::system_error{e, "cannot call function " + std::string{__PRETTY_FUNCTION__}};} - return {name.data(), static_cast(len)}; + return name; } template((*F)(std::declval()...)))* = nullptr> diff --git a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp index adf95dd7021..4e5797fbfc2 100644 --- a/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp +++ b/external_codes/mpi_wrapper/mpi3/include/mpi3/detail/datatype.hpp @@ -77,6 +77,8 @@ class packed { template struct basic_datatype; + +#if defined(MPI_DOUBLE_COMPLEX) // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define MPI3_DECLARE_DATATYPE(TypE, MpiiD) \ template<> struct basic_datatype { \ @@ -88,6 +90,17 @@ template<> struct basic_datatype { \ auto get() const -> MPI_Datatype {return MpiiD;} \ /* static constexpr MPI_Datatype value = MpiiD;*/ \ } +#else +#define MPI3_DECLARE_DATATYPE(TypE, MpiiD) \ +template<> struct basic_datatype { \ +/* constexpr*/ operator MPI_Datatype() const { \ + assert( (MpiiD) != MPI_DATATYPE_NULL ); /* NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) in some MPI distros this is not constexpr */ /*this system doesn't support this type*/ \ + return MpiiD; \ + } \ + auto get() const -> MPI_Datatype {return MpiiD;} \ +/* static constexpr MPI_Datatype value = MpiiD;*/ \ +} +#endif // basic data types http://beige.ucs.indiana.edu/I590/node100.html MPI3_DECLARE_DATATYPE(char , MPI_CHAR); @@ -119,19 +132,31 @@ MPI3_DECLARE_DATATYPE(bool , MPI_C_BOOL); // C++ binding not // MPI_UINT16_T uint16_t // MPI_UINT32_T uint32_t // MPI_UINT64_T uint64_t - +#if defined(MPI_C_FLOAT_COMPLEX) MPI3_DECLARE_DATATYPE(cxx_float_complex , MPI_C_FLOAT_COMPLEX); MPI3_DECLARE_DATATYPE(cxx_double_complex , MPI_C_DOUBLE_COMPLEX); MPI3_DECLARE_DATATYPE(cxx_long_double_complex, MPI_C_LONG_DOUBLE_COMPLEX); +#else +MPI3_DECLARE_DATATYPE(cxx_float_complex , MPI_CXX_FLOAT_COMPLEX); +MPI3_DECLARE_DATATYPE(cxx_double_complex , MPI_CXX_DOUBLE_COMPLEX); +MPI3_DECLARE_DATATYPE(cxx_long_double_complex, MPI_CXX_LONG_DOUBLE_COMPLEX); +#endif // MPI3_DECLARE_DATATYPE(cxx_2double_complex , MPI_2DOUBLE_COMPLEX); // not available in mpich // TODO(correaa) these types below probably don't behave correctly for reductions with multiplication +#if defined(MPI_COMPLEX) MPI3_DECLARE_DATATYPE(float_float , MPI_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); MPI3_DECLARE_DATATYPE(double_double , MPI_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); -MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_DOUBLE_COMPLEX); +MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_DOUBLE_COMPLEX); // TODO(correaa) is this correct? reduce (specially multiplication) will not give correct result MPI3_DECLARE_DATATYPE(long_double_long_double, MPI_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#else +MPI3_DECLARE_DATATYPE(float_float , MPI_CXX_FLOAT_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +MPI3_DECLARE_DATATYPE(double_double , MPI_CXX_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +MPI3_DECLARE_DATATYPE(decltype(std::tuple{}), MPI_CXX_DOUBLE_COMPLEX); // TODO(correaa) is this correct? reduce (specially multiplication) will not give correct result +MPI3_DECLARE_DATATYPE(long_double_long_double, MPI_CXX_DOUBLE_COMPLEX); static_assert(sizeof(std::pair) == sizeof(std::complex), "checking that complex mem layout maps to pair"); +#endif #if defined(__NVCC__) MPI3_DECLARE_DATATYPE(thrust::complex, MPI_DOUBLE_COMPLEX); @@ -189,6 +214,10 @@ auto datatype_detect(...) -> default_datatype; template auto datatype_detect(U const&) -> default_datatype>; +// support enums +template> +auto datatype_detect(U const&) -> default_datatype