diff --git a/.ci_fedora.sh b/.ci_fedora.sh index 452afb4b7e..b8805abb15 100755 --- a/.ci_fedora.sh +++ b/.ci_fedora.sh @@ -50,7 +50,7 @@ then cp -a /tmp/BOUT-dev /home/test/ chown -R test /home/test chmod u+rwX /home/test -R - sudo -u test ${0/\/tmp/\/home\/test} $mpi + su - test -c "${0/\/tmp/\/home\/test} $mpi" ## If we are called as normal user, run test else . /etc/profile.d/modules.sh diff --git a/.clang-tidy b/.clang-tidy index 6ca5262f41..0117c20e42 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -2,7 +2,6 @@ Checks: 'clang-diagnostic-*,clang-analyzer-*,performance-*,readability-*,bugprone-*,clang-analyzer-*,cppcoreguidelines-*,mpi-*,misc-*,-readability-magic-numbers,-cppcoreguidelines-avoid-magic-numbers,-misc-non-private-member-variables-in-classes,-clang-analyzer-optin.mpi*,-bugprone-exception-escape,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-misc-no-recursion,-bugprone-easily-swappable-parameters' WarningsAsErrors: '' HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false FormatStyle: file CheckOptions: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 42965e75e8..bdaeb3dc4f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: is_cron: - ${{ github.event_name == 'cron' }} config: - - name: "CMake, PETSc unreleased, ADIOS" + - name: "CMake, PETSc unreleased, ADIOS2" os: ubuntu-20.04 cmake_options: "-DBUILD_SHARED_LIBS=ON -DBOUT_ENABLE_METRIC_3D=ON diff --git a/.gitignore b/.gitignore index 7ddf9526ab..934da1c0de 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,5 @@ coverage/ /_version.txt /BOUT++-v*.tar.gz /BOUT++-v*.tar.xz +/CMakeCache.txt +/CMakeFiles/cmake.check_cache diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000..7830073846 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,98 @@ +############################################################################### +# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) +############################################################################### + +# DESCRIPTION: +############################################################################### +# General GitLab pipelines configurations for supercomputers and Linux clusters +# at Lawrence Livermore National Laboratory (LLNL). +# This entire pipeline is LLNL-specific +# +# Important note: This file is a template provided by llnl/radiuss-shared-ci. +# Remains to set variable values, change the reference to the radiuss-shared-ci +# repo, opt-in and out optional features. The project can then extend it with +# additional stages. +# +# In addition, each project should copy over and complete: +# - .gitlab/custom-jobs-and-variables.yml +# - .gitlab/subscribed-pipelines.yml +# +# The jobs should be specified in a file local to the project, +# - .gitlab/jobs/${CI_MACHINE}.yml +# or generated (see LLNL/Umpire for an example). +############################################################################### + +# We define the following GitLab pipeline variables: +variables: +##### LC GITLAB CONFIGURATION +# Use an LLNL service user to run CI. This prevents from running pipelines as +# an actual user. + LLNL_SERVICE_USER: "" +# Use the service user workspace. Solves permission issues, stores everything +# at the same location whoever triggers a pipeline. + CUSTOM_CI_BUILDS_DIR: "/usr/workspace/BOUT-GPU/gitlab-runner" +# Tells Gitlab to recursively update the submodules when cloning the project. +# GIT_SUBMODULE_STRATEGY: recursive + +##### PROJECT VARIABLES +# We build the projects in the CI clone directory. +# Used in script/gitlab/build_and_test.sh script. +# TODO: add a clean-up mechanism. + BUILD_ROOT: ${CI_PROJECT_DIR} + +##### SHARED_CI CONFIGURATION +# Required information about GitHub repository + GITHUB_PROJECT_NAME: "boutproject" + GITHUB_PROJECT_ORG: "BOUT-dev" +# Set the build-and-test command. +# Nested variables are allowed and useful to customize the job command. We +# prevent variable expansion so that you can define them at job level. + JOB_CMD: + value: "tests/gitlab/ci-tests.sh" + expand: false +# Override the pattern describing branches that will skip the "draft PR filter +# test". Add protected branches here. See default value in +# preliminary-ignore-draft-pr.yml. +# ALWAYS_RUN_PATTERN: "" + +# We organize the build-and-test stage with sub-pipelines. Each sub-pipeline +# corresponds to a test batch on a given machine. + +# High level stages +stages: + - prerequisites + - build-and-test + +# Template for jobs triggering a build-and-test sub-pipeline: +.build-and-test: + stage: build-and-test + trigger: + include: + - local: '.gitlab/custom-jobs-and-variables.yml' + - project: 'radiuss/radiuss-shared-ci' + ref: 'v2024.07.0' + file: 'pipelines/${CI_MACHINE}.yml' + # Add your jobs + # you can use a local file + - local: '.gitlab/jobs/${CI_MACHINE}.yml' + # or a file generated in the previous steps + # - artifact: '${CI_MACHINE}-jobs.yml' + # job: 'generate-job-file' + # (See Umpire CI setup for an example). + strategy: depend + forward: + pipeline_variables: true + +include: + # Sets ID tokens for every job using `default:` + - project: 'lc-templates/id_tokens' + file: 'id_tokens.yml' + # [Optional] checks preliminary to running the actual CI test + - project: 'radiuss/radiuss-shared-ci' + ref: 'v2024.07.0' + file: 'utilities/preliminary-ignore-draft-pr.yml' + # pipelines subscribed by the project + - local: '.gitlab/subscribed-pipelines.yml' diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml new file mode 100644 index 0000000000..5af33aee04 --- /dev/null +++ b/.gitlab/custom-jobs-and-variables.yml @@ -0,0 +1,62 @@ +############################################################################### +# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) +############################################################################### + +# We define the following GitLab pipeline variables: +variables: +# In some pipelines we create only one allocation shared among jobs in +# order to save time and resources. This allocation has to be uniquely +# named so that we are sure to retrieve it and avoid collisions. + ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID} + +# Ruby +# Arguments for top level allocation + RUBY_SHARED_ALLOC: "--mpi=none --exclusive --reservation=ci --time=20 --nodes=1" +# Arguments for job level allocation + RUBY_JOB_ALLOC: "--mpi=none --reservation=ci --nodes=1" +# Add variables that should apply to all the jobs on a machine: +# RUBY_MY_VAR: "..." + +# Poodle +# Arguments for top level allocation + POODLE_SHARED_ALLOC: "--exclusive --partition=pdebug --time=10 --nodes=1" +# Arguments for job level allocation + POODLE_JOB_ALLOC: "--nodes=1" +# Add variables that should apply to all the jobs on a machine: +# POODLE_MY_VAR: "..." + +# Corona +# Arguments for top level allocation +# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node. + CORONA_SHARED_ALLOC: "--exclusive --time-limit=15m --nodes=1" +# Arguments for job level allocation + CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" +# Add variables that should apply to all the jobs on a machine: +# CORONA_MY_VAR: "..." + +# Tioga +# Arguments for top level allocation +# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node. + TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=15m --nodes=1" +# Arguments for job level allocation + TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s" +# Add variables that should apply to all the jobs on a machine: +# TIOGA_MY_VAR: "..." + +# Lassen uses a different job scheduler (spectrum lsf) that does not allow +# pre-allocation the same way slurm does. Arguments for job level allocation + LASSEN_JOB_ALLOC: "1 -W 30 -q pci" +# Add variables that should apply to all the jobs on a machine: +# LASSEN_MY_VAR: "..." + + +# Configuration shared by build and test jobs specific to this project. +# Not all configuration can be shared. Here projects can fine tune the +# CI behavior. +# See Umpire for an example (export junit test reports). +.custom_job: + variables: + JOB_TEMPLATE_CANNOT_BE_EMPTY: "True" diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml new file mode 100644 index 0000000000..6363dd9336 --- /dev/null +++ b/.gitlab/jobs/lassen.yml @@ -0,0 +1,59 @@ +############################################################################### +# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) +############################################################################### + +# We require project to define their job command using a variable (JOB_CMD). +# In customization/gitlab-ci.yml, we encourage to define this variable as +# non-expandable, so that project can use nested variables to configure the job +# command. The caveat is that the reproducer here cannot capture the +# definition of these variables in a generic fashion. By overriding the +# following section, projects can specify the variables to define in the +# reproducer to exactly reproduce the CI build. +.lassen_reproducer_vars: + script: + - echo -e "Running on Lassen\n" + +# With GitLab CI, included files cannot be empty. +# TODO: remove when you have at least on job defined. +variables: + INCLUDED_FILE_CANNOT_BE_EMPTY: "True" + +############### +# Explanations: +############### +# RADIUSS Shared CI provides a pipeline for each machine, where a template job +# is provided. Each of your jobs must extend this template to be added to the +# list of jobs running on the associated machine. +# +# The job template then expects you to define the "JOB_CMD" variable with the +# one line command used to trigger the build and test of your project. +# +# We suggest that you set your command in such a way that you can then +# customize it per job with variables. E.g.: +# "./path/to/my_ci_script ${A_VARIABLE}" + +## Adding jobs defined by the project. +## Note: placing the extends section first allows you to override part of the +## shared implementation if needed (and if you know what you are doing). +#: +# extends: .job_on_lassen +# variables: +# : "" + +.base-job: + extends: .job_on_lassen + before_script: + # Update BOUT-configs in the shared directory. + - pushd /usr/workspace/BOUT-GPU/BOUT-configs + - git pull + - popd + # Create the environment. + - source /usr/workspace/BOUT-GPU/BOUT-configs/lassen/setup-env.sh + after_script: + - rm -rf ${CI_BUILDS_DIR} ${CI_PROJECT_DIR} + +build-test-cuda-minimal: + extends: .base-job \ No newline at end of file diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml new file mode 100644 index 0000000000..265a344ba8 --- /dev/null +++ b/.gitlab/subscribed-pipelines.yml @@ -0,0 +1,91 @@ +############################################################################### +# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS +# project contributors. See the COPYRIGHT file for details. +# +# SPDX-License-Identifier: (MIT) +############################################################################### + +# The template job to test whether a machine is up. +# Expects CI_MACHINE defined to machine name. +.machine-check: + stage: prerequisites + tags: [shell, oslic] + variables: + GIT_STRATEGY: none + script: + - | + if [[ $(jq '.[env.CI_MACHINE].total_nodes_up' /usr/global/tools/lorenz/data/loginnodeStatus) == 0 ]] + then + echo -e "\e[31mNo node available on ${CI_MACHINE}\e[0m" + curl --url "https://api.github.com/repos/${GITHUB_PROJECT_ORG}/${GITHUB_PROJECT_NAME}/statuses/${CI_COMMIT_SHA}" \ + --header 'Content-Type: application/json' \ + --header "authorization: Bearer ${GITHUB_TOKEN}" \ + --data "{ \"state\": \"failure\", \"target_url\": \"${CI_PIPELINE_URL}\", \"description\": \"GitLab ${CI_MACHINE} down\", \"context\": \"ci/gitlab/${CI_MACHINE}\" }" + exit 1 + fi + +### +# Trigger a build-and-test pipeline for a machine. +# Comment the jobs for machines you don’t need. +### + +# RUBY +#ruby-up-check: +# variables: +# CI_MACHINE: "ruby" +# extends: [.machine-check] +# +#ruby-build-and-test: +# variables: +# CI_MACHINE: "ruby" +# needs: [ruby-up-check] +# extends: [.build-and-test] + +## POODLE +#poodle-up-check: +# variables: +# CI_MACHINE: "poodle" +# extends: [.machine-check] +# +#poodle-build-and-test: +# variables: +# CI_MACHINE: "poodle" +# needs: [poodle-up-check] +# extends: [.build-and-test] +# +## CORONA +#corona-up-check: +# variables: +# CI_MACHINE: "corona" +# extends: [.machine-check] +# +#corona-build-and-test: +# variables: +# CI_MACHINE: "corona" +# needs: [corona-up-check] +# extends: [.build-and-test] +# +## TIOGA +#tioga-up-check: +# variables: +# CI_MACHINE: "tioga" +# extends: [.machine-check] +# +#tioga-build-and-test: +# variables: +# CI_MACHINE: "tioga" +# needs: [tioga-up-check] +# extends: [.build-and-test] + +# LASSEN +lassen-up-check: + variables: + CI_MACHINE: "lassen" + extends: [.machine-check] + +lassen-build-and-test: + variables: + CI_MACHINE: "lassen" + needs: [lassen-up-check] + extends: [.build-and-test] + diff --git a/CMakeLists.txt b/CMakeLists.txt index c1c82ea4e3..f57a78a14a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -935,7 +935,7 @@ message(" SUNDIALS support : ${BOUT_HAS_SUNDIALS} HYPRE support : ${BOUT_HAS_HYPRE} NetCDF support : ${BOUT_HAS_NETCDF} - ADIOS support : ${BOUT_HAS_ADIOS} + ADIOS2 support : ${BOUT_HAS_ADIOS2} FFTW support : ${BOUT_HAS_FFTW} LAPACK support : ${BOUT_HAS_LAPACK} OpenMP support : ${BOUT_USE_OPENMP} diff --git a/bin/bout-build-deps.sh b/bin/bout-build-deps.sh index 19e3b2a0d3..d96d500dc9 100755 --- a/bin/bout-build-deps.sh +++ b/bin/bout-build-deps.sh @@ -98,7 +98,7 @@ netcdf() { nccxx() { cd $BUILD - wget -c ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-cxx4-$NCCXXVER.tar.gz || : + wget -c https://downloads.unidata.ucar.edu/netcdf-cxx/$NCCXXVER/netcdf-cxx4-$NCCXXVER.tar.gz || : tar -xf netcdf-cxx4-$NCCXXVER.tar.gz cd netcdf-cxx4-$NCCXXVER CPPFLAGS="-I$PREFIX/include" LDFLAGS="-L$PREFIX/lib/" ./configure --prefix=$PREFIX $NCCXXFLAGS @@ -286,17 +286,17 @@ set -x ## Setup folders and links setup ## Build and install hdf5 -hdf5 +test $NO_HDF5 || hdf5 ## Build and install netcdf -netcdf +test $NO_NETCDF || netcdf ## Build and install C++ interface for netcdf -nccxx +test $NO_NCXX || nccxx ## Build and install FFTW -fftw +test $NO_FFTW || fftw ## Build and install Sundials -sundials +test $NO_SUNDIALS || sundials ## Build and install PETSc -petsc +test $NO_PETSC || petsc ## Download BOUT++ submodules submod # Install python packages diff --git a/bin/bout-config.in b/bin/bout-config.in index fa19779cfe..b5a62a42eb 100755 --- a/bin/bout-config.in +++ b/bin/bout-config.in @@ -29,7 +29,7 @@ idlpath="@IDLCONFIGPATH@" pythonpath="@PYTHONCONFIGPATH@" has_netcdf="@BOUT_HAS_NETCDF@" -has_adios="@BOUT_HAS_ADIOS@" +has_adios2="@BOUT_HAS_ADIOS2@" has_legacy_netcdf="@BOUT_HAS_LEGACY_NETCDF@" has_pnetcdf="@BOUT_HAS_PNETCDF@" has_pvode="@BOUT_HAS_PVODE@" @@ -71,18 +71,18 @@ Available values for OPTION include: --idl IDL path --python Python path - --has-netcdf NetCDF file support - --has-adios ADIOS file support + --has-netcdf NetCDF file support + --has-adios2 ADIOS2 file support --has-legacy-netcdf Legacy NetCDF file support - --has-pnetcdf Parallel NetCDF file support - --has-pvode PVODE solver support - --has-cvode SUNDIALS CVODE solver support - --has-ida SUNDIALS IDA solver support - --has-lapack LAPACK support - --has-petsc PETSc support - --has-hypre Hypre support - --has-slepc SLEPc support - --has-nls Natural Language Support + --has-pnetcdf Parallel NetCDF file support + --has-pvode PVODE solver support + --has-cvode SUNDIALS CVODE solver support + --has-ida SUNDIALS IDA solver support + --has-lapack LAPACK support + --has-petsc PETSc support + --has-hypre Hypre support + --has-slepc SLEPc support + --has-nls Natural Language Support --petsc-has-sundials diff --git a/bout++Config.cmake.in b/bout++Config.cmake.in index 3d824e455f..5af0dc43ea 100644 --- a/bout++Config.cmake.in +++ b/bout++Config.cmake.in @@ -15,7 +15,7 @@ set(BOUT_USE_METRIC_3D @BOUT_USE_METRIC_3D@) set(BOUT_HAS_PVODE @BOUT_HAS_PVODE@) set(BOUT_HAS_NETCDF @BOUT_HAS_NETCDF@) -set(BOUT_HAS_ADIOS @BOUT_HAS_ADIOS@) +set(BOUT_HAS_ADIOS2 @BOUT_HAS_ADIOS2@) set(BOUT_HAS_FFTW @BOUT_HAS_FFTW@) set(BOUT_HAS_LAPACK @BOUT_HAS_LAPACK@) set(BOUT_HAS_PETSC @BOUT_HAS_PETSC@) diff --git a/cmake/FindCython.cmake b/cmake/FindCython.cmake index 76f43480d9..3b98cde89e 100644 --- a/cmake/FindCython.cmake +++ b/cmake/FindCython.cmake @@ -10,7 +10,7 @@ # CYTHON_FOUND - true if Cython was found # CYTHON_VERSION - Cython version -execute_process(COMMAND ${Python_EXECUTABLE} -c "import cython ; print(cython.__version__)" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import cython ; print(cython.__version__)" RESULT_VARIABLE _cython_runs OUTPUT_VARIABLE CYTHON_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE diff --git a/cmake/FindNumpy.cmake b/cmake/FindNumpy.cmake index 201bc19221..b6de6e3e35 100644 --- a/cmake/FindNumpy.cmake +++ b/cmake/FindNumpy.cmake @@ -12,32 +12,32 @@ # Numpy_INCLUDE_DIR -find_package(Python 3.6 COMPONENTS Interpreter Development) +find_package(Python3 3.6 COMPONENTS Interpreter Development) -if (NOT Python_FOUND) +if (NOT Python3_FOUND) message(STATUS "Could not find numpy as python was not found. Maybe the developement package is missing?") - set(Numpy_FOUND ${Python_FOUND}) + set(Numpy_FOUND ${Python3_FOUND}) return() endif() if (NOT Numpy_FOUND) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.__version__)" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.__version__)" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE Numpy_VERSION ) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.get_include())" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.get_include())" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE _numpy_include_dirs ) endif() if (Numpy_DEBUG) - message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${PYTHON_INCLUDE_DIR}") + message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${Python3_INCLUDE_DIRS}") endif() find_path(Numpy_INCLUDE_DIR numpy/arrayobject.h - PATHS "${_numpy_include_dirs}" "${PYTHON_INCLUDE_DIR}" + PATHS "${_numpy_include_dirs}" "${Python3_INCLUDE_DIRS}" PATH_SUFFIXES numpy/core/include ) diff --git a/cmake/FindPackageMultipass.cmake b/cmake/FindPackageMultipass.cmake index 2452096b56..99bbace448 100644 --- a/cmake/FindPackageMultipass.cmake +++ b/cmake/FindPackageMultipass.cmake @@ -108,7 +108,7 @@ macro (MULTIPASS_C_SOURCE_RUNS includes libraries source runs) endmacro (MULTIPASS_C_SOURCE_RUNS) macro (MULTIPASS_SOURCE_COMPILES includes libraries source runs language) - include (Check${language}SourceRuns) + include (Check${language}SourceCompiles) # This is a ridiculous hack. CHECK_${language}_SOURCE_* thinks that if the # *name* of the return variable doesn't change, then the test does # not need to be re-run. We keep an internal count which we diff --git a/cmake/FindSUNDIALS.cmake b/cmake/FindSUNDIALS.cmake index 1ecb5db429..15b266d06a 100644 --- a/cmake/FindSUNDIALS.cmake +++ b/cmake/FindSUNDIALS.cmake @@ -104,16 +104,8 @@ endforeach() if (SUNDIALS_INCLUDE_DIR) file(READ "${SUNDIALS_INCLUDE_DIR}/sundials_config.h" SUNDIALS_CONFIG_FILE) - string(FIND "${SUNDIALS_CONFIG_FILE}" "SUNDIALS_PACKAGE_VERSION" index) - if("${index}" LESS 0) - # Version >3 - set(SUNDIALS_VERSION_REGEX_PATTERN - ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") - else() - # Version <3 - set(SUNDIALS_VERSION_REGEX_PATTERN - ".*#define SUNDIALS_PACKAGE_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") - endif() + set(SUNDIALS_VERSION_REGEX_PATTERN + ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") string(REGEX MATCH ${SUNDIALS_VERSION_REGEX_PATTERN} _ "${SUNDIALS_CONFIG_FILE}") set(SUNDIALS_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE STRING "") set(SUNDIALS_VERSION_MINOR ${CMAKE_MATCH_2} CACHE STRING "") diff --git a/cmake/SetupBOUTThirdParty.cmake b/cmake/SetupBOUTThirdParty.cmake index 53adbec92d..9c49fe6fdc 100644 --- a/cmake/SetupBOUTThirdParty.cmake +++ b/cmake/SetupBOUTThirdParty.cmake @@ -190,10 +190,10 @@ endif() message(STATUS "NetCDF support: ${BOUT_USE_NETCDF}") set(BOUT_HAS_NETCDF ${BOUT_USE_NETCDF}) -option(BOUT_USE_ADIOS "Enable support for ADIOS output" ON) -option(BOUT_DOWNLOAD_ADIOS "Download and build ADIOS2" OFF) -if (BOUT_USE_ADIOS) - if (BOUT_DOWNLOAD_ADIOS) +option(BOUT_USE_ADIOS2 "Enable support for ADIOS output" ON) +option(BOUT_DOWNLOAD_ADIOS2 "Download and build ADIOS2" OFF) +if (BOUT_USE_ADIOS2) + if (BOUT_DOWNLOAD_ADIOS2) message(STATUS "Downloading and configuring ADIOS2") include(FetchContent) FetchContent_Declare( @@ -220,12 +220,12 @@ if (BOUT_USE_ADIOS) find_package(MPI REQUIRED COMPONENTS C) target_link_libraries(bout++ PUBLIC adios2::cxx11_mpi MPI::MPI_C) else() - set(BOUT_USE_ADIOS OFF) + set(BOUT_USE_ADIOS2 OFF) endif() endif() endif() -message(STATUS "ADIOS support: ${BOUT_USE_ADIOS}") -set(BOUT_HAS_ADIOS ${BOUT_USE_ADIOS}) +message(STATUS "ADIOS2 support: ${BOUT_USE_ADIOS2}") +set(BOUT_HAS_ADIOS2 ${BOUT_USE_ADIOS2}) option(BOUT_USE_FFTW "Enable support for FFTW" ON) @@ -281,8 +281,8 @@ if (BOUT_USE_SUNDIALS) include(FetchContent) FetchContent_Declare( sundials - GIT_REPOSITORY https://github.com/ZedThree/sundials - GIT_TAG cmake-export-fixes + GIT_REPOSITORY https://github.com/LLNL/sundials + GIT_TAG v7.0.0 ) # Note: These are settings for building SUNDIALS set(EXAMPLES_ENABLE_C OFF CACHE BOOL "" FORCE) @@ -297,7 +297,11 @@ if (BOUT_USE_SUNDIALS) FetchContent_MakeAvailable(sundials) message(STATUS "SUNDIALS done configuring") else() + enable_language(C) find_package(SUNDIALS REQUIRED) + if (SUNDIALS_VERSION VERSION_LESS 4.0.0) + message(FATAL_ERROR "SUNDIALS_VERSION 4.0.0 or newer is required. Found version ${SUNDIALS_VERSION}.") + endif() endif() target_link_libraries(bout++ PUBLIC SUNDIALS::nvecparallel) target_link_libraries(bout++ PUBLIC SUNDIALS::cvode) diff --git a/cmake_build_defines.hxx.in b/cmake_build_defines.hxx.in index ed6e8685f6..4d63a01b7d 100644 --- a/cmake_build_defines.hxx.in +++ b/cmake_build_defines.hxx.in @@ -13,7 +13,7 @@ #cmakedefine01 BOUT_HAS_IDA #cmakedefine01 BOUT_HAS_LAPACK #cmakedefine01 BOUT_HAS_NETCDF -#cmakedefine01 BOUT_HAS_ADIOS +#cmakedefine01 BOUT_HAS_ADIOS2 #cmakedefine01 BOUT_HAS_PETSC #cmakedefine01 BOUT_HAS_PRETTY_FUNCTION #cmakedefine01 BOUT_HAS_PVODE diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3849d34852..022b16e248 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(backtrace) add_subdirectory(blob2d) add_subdirectory(blob2d-outerloop) add_subdirectory(blob2d-laplacexz) +add_subdirectory(boutpp) add_subdirectory(boundary-conditions/advection) add_subdirectory(conducting-wall-mode) add_subdirectory(conduction) diff --git a/examples/blob2d/blob2d.cxx b/examples/blob2d/blob2d.cxx index f41f857d46..7007bbeb77 100644 --- a/examples/blob2d/blob2d.cxx +++ b/examples/blob2d/blob2d.cxx @@ -25,7 +25,6 @@ class Blob2D : public PhysicsModel { BoutReal rho_s; ///< Bohm gyro radius BoutReal Omega_i; ///< Ion cyclotron frequency BoutReal c_s; ///< Bohm sound speed - BoutReal n0; ///< Reference density // Constants to calculate the parameters BoutReal Te0; ///< Isothermal temperature [eV] @@ -61,7 +60,6 @@ class Blob2D : public PhysicsModel { m_i = options["m_i"].withDefault(2 * 1.667e-27); m_e = options["m_e"].withDefault(9.11e-31); - n0 = options["n0"].doc("Background density in cubic m").withDefault(1e19); D_vort = options["D_vort"].doc("Viscous diffusion coefficient").withDefault(0.0); D_n = options["D_n"].doc("Density diffusion coefficient").withDefault(0.0); diff --git a/examples/blob2d/delta_0.25/BOUT.inp b/examples/blob2d/delta_0.25/BOUT.inp index 58d1e36741..841fcaf235 100644 --- a/examples/blob2d/delta_0.25/BOUT.inp +++ b/examples/blob2d/delta_0.25/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/blob2d/delta_1/BOUT.inp b/examples/blob2d/delta_1/BOUT.inp index 417911271d..39213ddd36 100644 --- a/examples/blob2d/delta_1/BOUT.inp +++ b/examples/blob2d/delta_1/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/blob2d/delta_10/BOUT.inp b/examples/blob2d/delta_10/BOUT.inp index 353c28c3b2..f4507b871b 100644 --- a/examples/blob2d/delta_10/BOUT.inp +++ b/examples/blob2d/delta_10/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/boutpp/CMakeLists.txt b/examples/boutpp/CMakeLists.txt new file mode 100644 index 0000000000..e46a7ae990 --- /dev/null +++ b/examples/boutpp/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.13) + +if (NOT TARGET bout++::bout++) + find_package(bout++ REQUIRED) +endif() + +bout_copy_file(runexample) +bout_copy_file(blob2d.py) +bout_copy_file(simulation.py) +bout_copy_file(data/BOUT.inp) diff --git a/examples/boutpp/blob2d.py b/examples/boutpp/blob2d.py index d5f370a454..4dc8ea60ac 100755 --- a/examples/boutpp/blob2d.py +++ b/examples/boutpp/blob2d.py @@ -24,7 +24,7 @@ def init(self, restart): self.phiSolver = bc.Laplacian() - options = bc.Options("model") + options = bc.Options.root("model") # Temperature in eV Te0 = options.get("Te0", 30) e = options.get("e", 1.602e-19) @@ -70,12 +70,20 @@ def init(self, restart): # /************ Create a solver for potential ********/ + opts_boussinesq = bc.Options.root("phiBoussinesq") + opts_non_boussinesq = bc.Options.root("phiSolver") + if self.boussinesq: # BOUT.inp section "phiBoussinesq" - self.phiSolver = bc.Laplacian(bc.Options("phiBoussinesq")) + opts_used = opts_boussinesq + opts_unused = opts_non_boussinesq else: # BOUT.inp section "phiSolver" - self.phiSolver = bc.Laplacian(bc.Options("phiSolver")) + opts_used = opts_non_boussinesq + opts_unused = opts_boussinesq + + self.phiSolver = bc.Laplacian(opts_used) + opts_unused.setConditionallyUsed() # Starting guess for first solve (if iterative) self.phi = bc.create3D("0") @@ -165,8 +173,8 @@ def ensure_blob(): # settings used by the core code -NOUT = 50 # number of time-steps -TIMESTEP = 50 # time between outputs [1/wci] +nout = 50 # number of time-steps +timestep = 50 # time between outputs [1/wci] MXG = 2 # Number of X guard cells @@ -198,8 +206,8 @@ def ensure_blob(): [mesh:ddz] -first = FFT -second = FFT +first = C2 +second = C2 upwind = W3 ################################################### @@ -207,8 +215,8 @@ def ensure_blob(): [solver] -ATOL = 1.0e-10 # absolute tolerance -RTOL = 1.0e-5 # relative tolerance +atol = 1e-10 # absolute tolerance +rtol = 1e-05 # relative tolerance mxstep = 10000 # Maximum internal steps per output ################################################### @@ -221,22 +229,20 @@ def ensure_blob(): fourth_order = true # 4th order or 2nd order -flags = 0 # inversion flags for phi - # 0 = Zero value - # 10 = Zero gradient AC inner & outer - # 15 = Zero gradient AC and DC - # 768 = Zero laplace inner & outer +# 0 = Zero value +# 10 = Zero gradient AC inner & outer +# 15 = Zero gradient AC and DC +# 768 = Zero laplace inner & outer [phiSolver:precon] # Preconditioner (if pctype=user) -filter = 0. # Must not filter solution -flags = 49152 # set_rhs i.e. identity matrix in boundaries +filter = 0.0 # Must not filter solution +flags = 49152 # set_rhs i.e. identity matrix in boundaries ################################################### # Electrostatic potential solver (Boussinesq) [phiBoussinesq] # By default type is tri (serial) or spt (parallel) -flags = 0 ################################################## # general settings for the model @@ -245,14 +251,12 @@ def ensure_blob(): Te0 = 5 # Electron Temperature (eV) -n0 = 2e18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) -D_vort = 1e-6 # Viscosity -D_n = 1e-6 # Diffusion +D_vort = 1e-06 # Viscosity +D_n = 1e-06 # Diffusion R_c = 1.5 # Radius of curvature (m) @@ -261,7 +265,7 @@ def ensure_blob(): # These can be overridden for individual variables in # a section of that name. -[All] +[all] scale = 0.0 # default size of initial perturbations bndry_all = neumann # Zero-gradient on all boundaries @@ -278,9 +282,8 @@ def ensure_blob(): if __name__ == "__main__": - if "--create" in sys.argv: - sys.argv.remove("--create") - ensure_blob() + ensure_blob() + bc.init("-d blob".split(" ") + sys.argv[1:]) # Create an instance diff --git a/examples/boutpp/data/BOUT.inp b/examples/boutpp/data/BOUT.inp new file mode 100644 index 0000000000..d91707ec1b --- /dev/null +++ b/examples/boutpp/data/BOUT.inp @@ -0,0 +1,9 @@ +nout=10 +timestep=10 + +[mesh] +nx=160 +ny=1 +nz=n/n + +MYG=0 diff --git a/examples/fci-wave-logn/boundary/BOUT.inp b/examples/fci-wave-logn/boundary/BOUT.inp index 11e57ec47d..0632aa949b 100644 --- a/examples/fci-wave-logn/boundary/BOUT.inp +++ b/examples/fci-wave-logn/boundary/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = false background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/div-integrate/BOUT.inp b/examples/fci-wave-logn/div-integrate/BOUT.inp index a37bf3e2a5..66bdbce5f2 100644 --- a/examples/fci-wave-logn/div-integrate/BOUT.inp +++ b/examples/fci-wave-logn/div-integrate/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = false background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/expanded/BOUT.inp b/examples/fci-wave-logn/expanded/BOUT.inp index 3a2935c6e8..e084511d24 100644 --- a/examples/fci-wave-logn/expanded/BOUT.inp +++ b/examples/fci-wave-logn/expanded/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = true background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/fci-wave.cxx b/examples/fci-wave-logn/fci-wave.cxx index 731897ad4e..2ea9048421 100644 --- a/examples/fci-wave-logn/fci-wave.cxx +++ b/examples/fci-wave-logn/fci-wave.cxx @@ -62,7 +62,7 @@ class FCIwave : public PhysicsModel { // Neumann boundaries simplifies parallel derivatives Bxyz.applyBoundary("neumann"); - Bxyz.applyParallelBoundary("parallel_neumann"); + Bxyz.applyParallelBoundary("parallel_neumann_o2"); SAVE_ONCE(Bxyz); Options::getRoot()->getSection("fciwave")->get("expand_divergence", expand_divergence, diff --git a/examples/fci-wave/div-integrate/BOUT.inp b/examples/fci-wave/div-integrate/BOUT.inp index eb41d5f228..68f2326f52 100644 --- a/examples/fci-wave/div-integrate/BOUT.inp +++ b/examples/fci-wave/div-integrate/BOUT.inp @@ -21,7 +21,7 @@ log_density = false # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave/div/BOUT.inp b/examples/fci-wave/div/BOUT.inp index 70b60757eb..3f497df6c7 100644 --- a/examples/fci-wave/div/BOUT.inp +++ b/examples/fci-wave/div/BOUT.inp @@ -21,7 +21,7 @@ log_density = false # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave/fci-wave.cxx b/examples/fci-wave/fci-wave.cxx index 226b52c808..2fd383ed3f 100644 --- a/examples/fci-wave/fci-wave.cxx +++ b/examples/fci-wave/fci-wave.cxx @@ -69,7 +69,7 @@ class FCIwave : public PhysicsModel { // Neumann boundaries simplifies parallel derivatives Bxyz.applyBoundary("neumann"); - Bxyz.applyParallelBoundary("parallel_neumann"); + Bxyz.applyParallelBoundary("parallel_neumann_o2"); SAVE_ONCE(Bxyz); SOLVE_FOR(nv); diff --git a/examples/fci-wave/logn/BOUT.inp b/examples/fci-wave/logn/BOUT.inp index f97d8cc891..26f8a99d63 100644 --- a/examples/fci-wave/logn/BOUT.inp +++ b/examples/fci-wave/logn/BOUT.inp @@ -21,7 +21,7 @@ log_density = true # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [nv] diff --git a/examples/laplace-petsc3d/data/BOUT.inp b/examples/laplace-petsc3d/data/BOUT.inp index 86a52c69f2..7e81d992a2 100644 --- a/examples/laplace-petsc3d/data/BOUT.inp +++ b/examples/laplace-petsc3d/data/BOUT.inp @@ -6,7 +6,7 @@ mz = 128 function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.) bndry_xin = none bndry_xout = none -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [rhs] function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.) @@ -22,7 +22,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 @@ -46,7 +46,7 @@ transform_from_field_aligned = false [initial] bndry_xin = neumann bndry_xout = neumann -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [input1] function = mixmode(x, 1.)*mixmode(z, 2.) diff --git a/include/bout/adios_object.hxx b/include/bout/adios_object.hxx index 9d2f545b46..4750930373 100755 --- a/include/bout/adios_object.hxx +++ b/include/bout/adios_object.hxx @@ -14,7 +14,7 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include #include @@ -79,5 +79,5 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue, } // namespace bout -#endif //BOUT_HAS_ADIOS +#endif //BOUT_HAS_ADIOS2 #endif //ADIOS_OBJECT_HXX diff --git a/include/bout/boundary_factory.hxx b/include/bout/boundary_factory.hxx index 9fc2d7f256..5f1f6e06a6 100644 --- a/include/bout/boundary_factory.hxx +++ b/include/bout/boundary_factory.hxx @@ -4,10 +4,13 @@ class BoundaryFactory; #ifndef BOUT_BNDRY_FACTORY_H #define BOUT_BNDRY_FACTORY_H -#include "bout/boundary_op.hxx" -#include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_op.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryOpBase; +class BoundaryOpPar; +class BoundaryOp; +class BoundaryRegionBase; +class BoundaryRegionPar; +class BoundaryRegion; +class BoundaryModifier; #include #include diff --git a/include/bout/bout.hxx b/include/bout/bout.hxx index d929a19c2f..09433bcc3b 100644 --- a/include/bout/bout.hxx +++ b/include/bout/bout.hxx @@ -34,6 +34,7 @@ #ifndef BOUT_H #define BOUT_H +// IWYU pragma: begin_keep, begin_export #include "bout/build_config.hxx" #include "bout/boutcomm.hxx" @@ -53,6 +54,7 @@ #include "bout/vector3d.hxx" #include "bout/version.hxx" #include "bout/where.hxx" +// IWYU pragma: end_keep, end_export // BOUT++ main functions diff --git a/include/bout/bout_enum_class.hxx b/include/bout/bout_enum_class.hxx index f8c9e364c5..585e5b020e 100644 --- a/include/bout/bout_enum_class.hxx +++ b/include/bout/bout_enum_class.hxx @@ -86,7 +86,11 @@ BOUT_ENUM_CLASS_MAP_ARGS(BOUT_STR_ENUM_CLASS, enumname, __VA_ARGS__)}; \ auto found = fromString_map.find(s); \ if (found == fromString_map.end()) { \ - throw BoutException("Did not find enum {:s}", s); \ + std::string valid_values {}; \ + for (auto const& entry : fromString_map) { \ + valid_values += std::string(" ") + entry.first; \ + } \ + throw BoutException("Did not find enum {:s}. Valid values: {:s}", s, valid_values); \ } \ return found->second; \ } \ diff --git a/include/bout/build_config.hxx b/include/bout/build_config.hxx index c97962f7cf..08158d00e9 100644 --- a/include/bout/build_config.hxx +++ b/include/bout/build_config.hxx @@ -17,7 +17,7 @@ constexpr auto has_gettext = static_cast(BOUT_HAS_GETTEXT); constexpr auto has_lapack = static_cast(BOUT_HAS_LAPACK); constexpr auto has_legacy_netcdf = static_cast(BOUT_HAS_LEGACY_NETCDF); constexpr auto has_netcdf = static_cast(BOUT_HAS_NETCDF); -constexpr auto has_adios = static_cast(BOUT_HAS_ADIOS); +constexpr auto has_adios2 = static_cast(BOUT_HAS_ADIOS2); constexpr auto has_petsc = static_cast(BOUT_HAS_PETSC); constexpr auto has_hypre = static_cast(BOUT_HAS_HYPRE); constexpr auto has_umpire = static_cast(BOUT_HAS_UMPIRE); diff --git a/include/bout/field_data.hxx b/include/bout/field_data.hxx index 03b9d6759b..185dcabf2d 100644 --- a/include/bout/field_data.hxx +++ b/include/bout/field_data.hxx @@ -44,7 +44,8 @@ class Coordinates; class Mesh; #include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryRegionPar; +enum class BndryLoc; #include "bout/sys/expressionparser.hxx" diff --git a/include/bout/invert_laplace.hxx b/include/bout/invert_laplace.hxx index f7b9501a81..0b416d4aab 100644 --- a/include/bout/invert_laplace.hxx +++ b/include/bout/invert_laplace.hxx @@ -238,6 +238,10 @@ public: virtual void setInnerBoundaryFlags(int f) { inner_boundary_flags = f; } virtual void setOuterBoundaryFlags(int f) { outer_boundary_flags = f; } + virtual int getGlobalFlags() const { return global_flags; } + virtual int getInnerBoundaryFlags() const { return inner_boundary_flags; } + virtual int getOuterBoundaryFlags() const { return outer_boundary_flags; } + /// Does this solver use Field3D coefficients (true) or only their DC component (false) virtual bool uses3DCoefs() const { return false; } @@ -308,9 +312,23 @@ protected: int extra_yguards_lower; ///< exclude some number of points at the lower boundary, useful for staggered grids or when boundary conditions make inversion redundant int extra_yguards_upper; ///< exclude some number of points at the upper boundary, useful for staggered grids or when boundary conditions make inversion redundant - int global_flags; ///< Default flags - int inner_boundary_flags; ///< Flags to set inner boundary condition - int outer_boundary_flags; ///< Flags to set outer boundary condition + /// Return true if global/default \p flag is set + bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } + /// Return true if \p flag is set for the inner boundary condition + bool isInnerBoundaryFlagSet(int flag) const { + return (inner_boundary_flags & flag) != 0; + } + /// Return true if \p flag is set for the outer boundary condition + bool isOuterBoundaryFlagSet(int flag) const { + return (outer_boundary_flags & flag) != 0; + } + + /// Return true if \p flag is set for the inner boundary condition + /// and this is the first proc in X direction + bool isInnerBoundaryFlagSetOnFirstX(int flag) const; + /// Return true if \p flag is set for the outer boundary condition + /// and this the last proc in X direction + bool isOuterBoundaryFlagSetOnLastX(int flag) const; void tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomplex& b, dcomplex& c, const Field2D* ccoef = nullptr, const Field2D* d = nullptr, @@ -322,15 +340,13 @@ protected: CELL_LOC loc = CELL_DEFAULT); void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy, - int kz, BoutReal kwave, int flags, int inner_boundary_flags, - int outer_boundary_flags, const Field2D* a, const Field2D* ccoef, + int kz, BoutReal kwave, const Field2D* a, const Field2D* ccoef, const Field2D* d, bool includeguards = true, bool zperiodic = true) { - tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, flags, inner_boundary_flags, - outer_boundary_flags, a, ccoef, ccoef, d, includeguards, zperiodic); + tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, a, ccoef, ccoef, d, includeguards, + zperiodic); } void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy, - int kz, BoutReal kwave, int flags, int inner_boundary_flags, - int outer_boundary_flags, const Field2D* a, const Field2D* c1coef, + int kz, BoutReal kwave, const Field2D* a, const Field2D* c1coef, const Field2D* c2coef, const Field2D* d, bool includeguards = true, bool zperiodic = true); CELL_LOC location; ///< staggered grid location of this solver @@ -339,6 +355,10 @@ protected: /// localmesh->getCoordinates(location) once private: + int global_flags; ///< Default flags + int inner_boundary_flags; ///< Flags to set inner boundary condition + int outer_boundary_flags; ///< Flags to set outer boundary condition + /// Singleton instance static std::unique_ptr instance; /// Name for writing performance infomation; default taken from diff --git a/include/bout/invert_pardiv.hxx b/include/bout/invert_pardiv.hxx index 23ea59e943..0153cc1987 100644 --- a/include/bout/invert_pardiv.hxx +++ b/include/bout/invert_pardiv.hxx @@ -31,11 +31,11 @@ #ifndef INV_PARDIV_H #define INV_PARDIV_H -#include "field2d.hxx" -#include "field3d.hxx" -#include "options.hxx" -#include "unused.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" #include "bout/generic_factory.hxx" +#include "bout/options.hxx" +#include "bout/unused.hxx" // Pardivergence implementations constexpr auto PARDIVCYCLIC = "cyclic"; diff --git a/include/bout/mask.hxx b/include/bout/mask.hxx index 4250d21105..fd90ae7345 100644 --- a/include/bout/mask.hxx +++ b/include/bout/mask.hxx @@ -66,6 +66,8 @@ public: inline bool& operator()(int jx, int jy, int jz) { return mask(jx, jy, jz); } inline const bool& operator()(int jx, int jy, int jz) const { return mask(jx, jy, jz); } + + inline bool& operator[](const Ind3D& i) { return mask[i]; } inline const bool& operator[](const Ind3D& i) const { return mask[i]; } }; diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index 3bc01d3787..c80716fc12 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -55,22 +55,24 @@ class Mesh; #include "bout/field_data.hxx" #include "bout/options.hxx" -#include "fieldgroup.hxx" +#include "bout/fieldgroup.hxx" -#include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryRegion; +class BoundaryRegionPar; -#include "sys/range.hxx" // RangeIterator +#include "bout/sys/range.hxx" // RangeIterator #include -#include "coordinates.hxx" // Coordinates class +#include "bout/coordinates.hxx" // Coordinates class #include "bout/unused.hxx" #include "bout/generic_factory.hxx" #include +#include + #include #include #include @@ -90,6 +92,9 @@ public: ReturnType create(Options* options = nullptr, GridDataSource* source = nullptr) const; }; +BOUT_ENUM_CLASS(BoundaryParType, all, xin, xout, fwd, bwd, xin_fwd, xout_fwd, xin_bwd, + xout_bwd, SIZE); + template using RegisterMesh = MeshFactory::RegisterInFactory; @@ -485,11 +490,20 @@ public: /// Add a boundary region to this processor virtual void addBoundary(BoundaryRegion* UNUSED(bndry)) {} - /// Get all the parallel (Y) boundaries on this processor - virtual std::vector getBoundariesPar() = 0; + /// Get the list of parallel boundary regions. The option specifies with + /// region to get. Default is to get all regions. All possible options are + /// listed at the top of this file, see BoundaryParType. + /// For example: + /// get all regions: + /// mesh->getBoundariesPar(Mesh::BoundaryParType::all) + /// get only xout: + /// mesh->getBoundariesPar(Mesh::BoundaryParType::xout) + virtual std::vector> + getBoundariesPar(BoundaryParType type = BoundaryParType::all) = 0; /// Add a parallel(Y) boundary to this processor - virtual void addBoundaryPar(BoundaryRegionPar* UNUSED(bndry)) {} + virtual void addBoundaryPar(std::shared_ptr UNUSED(bndry), + BoundaryParType UNUSED(type)) {} /// Branch-cut special handling (experimental) virtual Field3D smoothSeparatrix(const Field3D& f) { return f; } diff --git a/include/bout/options.hxx b/include/bout/options.hxx index 839c847289..4a32907b17 100644 --- a/include/bout/options.hxx +++ b/include/bout/options.hxx @@ -241,7 +241,8 @@ public: /// /// Option option2 = option1.copy(); /// - Options(const Options& other) = delete; // Use a reference or .copy() method + [[deprecated("Please use a reference or .copy() instead")]] Options( + const Options& other); /// Copy assignment must be explicit /// @@ -251,7 +252,8 @@ public: /// /// option2.value = option1.value; /// - Options& operator=(const Options& other) = delete; // Use a reference or .copy() method + [[deprecated("Please use a reference or .copy() instead")]] Options& + operator=(const Options& other); // Use a reference or .copy() method /// Make a deep copy of this Options, /// recursively copying children. @@ -364,7 +366,8 @@ public: /// {"long_name", "some velocity"} /// }); Options& setAttributes( - std::initializer_list> attrs) { + const std::initializer_list>& + attrs) { for (const auto& attr : attrs) { attributes[attr.first] = attr.second; } @@ -978,7 +981,7 @@ namespace details { /// avoiding lengthy recompilation if we change it struct OptionsFormatterBase { auto parse(fmt::format_parse_context& ctx) -> fmt::format_parse_context::iterator; - auto format(const Options& options, fmt::format_context& ctx) + auto format(const Options& options, fmt::format_context& ctx) const -> fmt::format_context::iterator; private: diff --git a/include/bout/options_io.hxx b/include/bout/options_io.hxx index 4c70159514..57be8bbaae 100644 --- a/include/bout/options_io.hxx +++ b/include/bout/options_io.hxx @@ -111,7 +111,7 @@ public: static constexpr auto default_type = #if BOUT_HAS_NETCDF "netcdf"; -#elif BOUT_HAS_ADIOS +#elif BOUT_HAS_ADIOS2 "adios"; #else "invalid"; diff --git a/include/bout/output_bout_types.hxx b/include/bout/output_bout_types.hxx index 6b1829b088..b67762521b 100644 --- a/include/bout/output_bout_types.hxx +++ b/include/bout/output_bout_types.hxx @@ -34,7 +34,7 @@ struct fmt::formatter> { // Formats the point p using the parsed format specification (presentation) // stored in this formatter. template - auto format(const SpecificInd& ind, FormatContext& ctx) { + auto format(const SpecificInd& ind, FormatContext& ctx) const { // ctx.out() is an output iterator to write to. if (presentation == 'c') { switch (N) { diff --git a/include/bout/parallel_boundary_op.hxx b/include/bout/parallel_boundary_op.hxx index 2bcb660802..d8620e892b 100644 --- a/include/bout/parallel_boundary_op.hxx +++ b/include/bout/parallel_boundary_op.hxx @@ -52,7 +52,7 @@ protected: BoutReal getValue(const BoundaryRegionPar& bndry, BoutReal t); }; -template +template class BoundaryOpParTemp : public BoundaryOpPar { public: using BoundaryOpPar::BoundaryOpPar; @@ -89,51 +89,74 @@ public: throw BoutException("Can't apply parallel boundary conditions to Field2D!"); } void apply(Field3D& f) override { return apply(f, 0); } + + void apply(Field3D& f, BoutReal t) override { + f.ynext(bndry->dir).allocate(); // Ensure unique before modifying + + auto dy = f.getCoordinates()->dy; + + for (bndry->first(); !bndry->isDone(); bndry->next()) { + BoutReal value = getValue(*bndry, t); + if (isNeumann) { + value *= dy[bndry->ind()]; + } + static_cast(this)->apply_stencil(f, bndry, value); + } + } }; ////////////////////////////////////////////////// // Implementations -class BoundaryOpPar_dirichlet : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o1 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o1(f, value); + } }; -class BoundaryOpPar_dirichlet_O3 : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o2 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o2(f, value); + } }; -class BoundaryOpPar_dirichlet_interp - : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o3 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o3(f, value); + } }; -class BoundaryOpPar_neumann : public BoundaryOpParTemp { +class BoundaryOpPar_neumann_o1 + : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o1(f, value); + } }; -class BoundaryOpPar_neumann_c2_simple - : public BoundaryOpParTemp { +class BoundaryOpPar_neumann_o2 + : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o2(f, value); + } +}; - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; +class BoundaryOpPar_neumann_o3 + : public BoundaryOpParTemp { +public: + using BoundaryOpParTemp::BoundaryOpParTemp; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o3(f, value); + } }; #endif // BOUT_PAR_BNDRY_OP_H diff --git a/include/bout/parallel_boundary_region.hxx b/include/bout/parallel_boundary_region.hxx index ea609c7b55..4d5278d00f 100644 --- a/include/bout/parallel_boundary_region.hxx +++ b/include/bout/parallel_boundary_region.hxx @@ -5,18 +5,54 @@ #include "bout/bout_types.hxx" #include +#include +#include + /** * Boundary region for parallel direction. This contains a vector of points that are * inside the boundary. * */ -class BoundaryRegionPar : public BoundaryRegionBase { - struct IndexPoint { - int jx; - int jy; - int jz; - }; +namespace parallel_stencil { +// generated by src/mesh/parallel_boundary_stencil.cxx.py +inline BoutReal pow(BoutReal val, int exp) { + // constexpr int expval = exp; + // static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); + if (exp == 2) { + return val * val; + } + ASSERT3(exp == 3); + return val * val * val; +} +inline BoutReal dirichlet_o1(BoutReal UNUSED(spacing0), BoutReal value0) { + return value0; +} +inline BoutReal dirichlet_o2(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return (spacing0 * value1 - spacing1 * value0) / (spacing0 - spacing1); +} +inline BoutReal neumann_o2(BoutReal UNUSED(spacing0), BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return -spacing1 * value0 + value1; +} +inline BoutReal dirichlet_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (pow(spacing0, 2) * spacing1 * value2 - pow(spacing0, 2) * spacing2 * value1 + - spacing0 * pow(spacing1, 2) * value2 + spacing0 * pow(spacing2, 2) * value1 + + pow(spacing1, 2) * spacing2 * value0 - spacing1 * pow(spacing2, 2) * value0) + / ((spacing0 - spacing1) * (spacing0 - spacing2) * (spacing1 - spacing2)); +} +inline BoutReal neumann_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (2 * spacing0 * spacing1 * value2 - 2 * spacing0 * spacing2 * value1 + + pow(spacing1, 2) * spacing2 * value0 - pow(spacing1, 2) * value2 + - spacing1 * pow(spacing2, 2) * value0 + pow(spacing2, 2) * value1) + / ((spacing1 - spacing2) * (2 * spacing0 - spacing1 - spacing2)); +} +} // namespace parallel_stencil + +class BoundaryRegionPar : public BoundaryRegionBase { struct RealPoint { BoutReal s_x; @@ -26,13 +62,15 @@ class BoundaryRegionPar : public BoundaryRegionBase { struct Indices { // Indices of the boundary point - IndexPoint index; + Ind3D index; // Intersection with boundary in index space RealPoint intersection; // Distance to intersection BoutReal length; // Angle between field line and boundary - BoutReal angle; + // BoutReal angle; + // How many points we can go in the opposite direction + signed char valid; }; using IndicesVec = std::vector; @@ -46,28 +84,122 @@ class BoundaryRegionPar : public BoundaryRegionBase { public: BoundaryRegionPar(const std::string& name, int dir, Mesh* passmesh) : BoundaryRegionBase(name, passmesh), dir(dir) { + ASSERT0(std::abs(dir) == 1); BoundaryRegionBase::isParallel = true; } BoundaryRegionPar(const std::string& name, BndryLoc loc, int dir, Mesh* passmesh) : BoundaryRegionBase(name, loc, passmesh), dir(dir) { BoundaryRegionBase::isParallel = true; + ASSERT0(std::abs(dir) == 1); } /// Add a point to the boundary - void add_point(int jx, int jy, int jz, BoutReal x, BoutReal y, BoutReal z, - BoutReal length, BoutReal angle); + void add_point(Ind3D ind, BoutReal x, BoutReal y, BoutReal z, BoutReal length, + signed char valid) { + bndry_points.push_back({ind, {x, y, z}, length, valid}); + } + void add_point(int ix, int iy, int iz, BoutReal x, BoutReal y, BoutReal z, + BoutReal length, signed char valid) { + bndry_points.push_back({xyz2ind(ix, iy, iz, localmesh), {x, y, z}, length, valid}); + } + + // final, so they can be inlined + void first() final { bndry_position = begin(bndry_points); } + void next() final { ++bndry_position; } + bool isDone() final { return (bndry_position == end(bndry_points)); } - void first() override; - void next() override; - bool isDone() override; + // getter + Ind3D ind() const { return bndry_position->index; } + BoutReal s_x() const { return bndry_position->intersection.s_x; } + BoutReal s_y() const { return bndry_position->intersection.s_y; } + BoutReal s_z() const { return bndry_position->intersection.s_z; } + BoutReal length() const { return bndry_position->length; } + signed char valid() const { return bndry_position->valid; } - /// Index of the point in the boundary - int x, y, z; - BoutReal s_x, s_y, s_z; - BoutReal length; - BoutReal angle; + // setter + void setValid(signed char val) { bndry_position->valid = val; } + + bool contains(const BoundaryRegionPar& bndry) const { + return std::binary_search( + begin(bndry_points), end(bndry_points), *bndry.bndry_position, + [](const Indices& i1, const Indices& i2) { return i1.index < i2.index; }); + } + + // extrapolate a given point to the boundary + BoutReal extrapolate_o1(const Field3D& f) const { return f[ind()]; } + BoutReal extrapolate_o2(const Field3D& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_o1(f); + } + return f[ind()] * (1 + length()) - f.ynext(-dir)[ind().yp(-dir)] * length(); + } + + // dirichlet boundary code + void dirichlet_o1(Field3D& f, BoutReal value) const { + f.ynext(dir)[ind().yp(dir)] = value; + } + + void dirichlet_o2(Field3D& f, BoutReal value) const { + if (length() < small_value) { + return dirichlet_o1(f, value); + } + ynext(f) = parallel_stencil::dirichlet_o2(1, f[ind()], 1 - length(), value); + // ynext(f) = f[ind()] * (1 + 1/length()) + value / length(); + } + + void dirichlet_o3(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return dirichlet_o2(f, value); + } + if (length() < small_value) { + ynext(f) = parallel_stencil::dirichlet_o2(2, yprev(f), 1 - length(), value); + } else { + ynext(f) = + parallel_stencil::dirichlet_o3(2, yprev(f), 1, f[ind()], 1 - length(), value); + } + } + + // NB: value needs to be scaled by dy + // neumann_o1 is actually o2 if we would use an appropriate one-sided stencil. + // But in general we do not, and thus for normal C2 stencils, this is 1st order. + void neumann_o1(Field3D& f, BoutReal value) const { ynext(f) = f[ind()] + value; } + + // NB: value needs to be scaled by dy + void neumann_o2(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return neumann_o1(f, value); + } + ynext(f) = yprev(f) + 2 * value; + } + + // NB: value needs to be scaled by dy + void neumann_o3(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return neumann_o1(f, value); + } + ynext(f) = + parallel_stencil::neumann_o3(1 - length(), value, 1, f[ind()], 2, yprev(f)); + } const int dir; + +private: + constexpr static BoutReal small_value = 1e-2; + + // BoutReal get(const Field3D& f, int off) + const BoutReal& ynext(const Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + BoutReal& ynext(Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + const BoutReal& yprev(const Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } + BoutReal& yprev(Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } + static Ind3D xyz2ind(int x, int y, int z, Mesh* mesh) { + const int ny = mesh->LocalNy; + const int nz = mesh->LocalNz; + return Ind3D{(x * ny + y) * nz + z, ny, nz}; + } }; #endif // BOUT_PAR_BNDRY_H diff --git a/include/bout/petsclib.hxx b/include/bout/petsclib.hxx index 35334ce773..2008671286 100644 --- a/include/bout/petsclib.hxx +++ b/include/bout/petsclib.hxx @@ -59,7 +59,7 @@ class Options; // means we _must_ `#include` this header _before_ any PETSc header! #define PETSC_HAVE_BROKEN_RECURSIVE_MACRO -#include +#include // IWYU pragma: export #include #include "bout/boutexception.hxx" diff --git a/include/bout/region.hxx b/include/bout/region.hxx index 68810324e3..bb1cf82bf1 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -116,16 +116,16 @@ class BoutMask; /// } // -#define BOUT_FOR_SERIAL(index, region) \ - for (auto block = region.getBlocks().cbegin(), end = region.getBlocks().cend(); \ - block < end; ++block) \ +#define BOUT_FOR_SERIAL(index, region) \ + for (auto block = (region).getBlocks().cbegin(), end = (region).getBlocks().cend(); \ + block < end; ++block) \ for (auto index = block->first; index < block->second; ++index) #if BOUT_USE_OPENMP -#define BOUT_FOR_OMP(index, region, omp_pragmas) \ - BOUT_OMP_PERF(omp_pragmas) \ - for (auto block = region.getBlocks().cbegin(); block < region.getBlocks().cend(); \ - ++block) \ +#define BOUT_FOR_OMP(index, region, omp_pragmas) \ + BOUT_OMP_PERF(omp_pragmas) \ + for (auto block = (region).getBlocks().cbegin(); block < (region).getBlocks().cend(); \ + ++block) \ for (auto index = block->first; index < block->second; ++index) #else // No OpenMP, so fall back to slightly more efficient serial form @@ -133,10 +133,10 @@ class BoutMask; #endif #define BOUT_FOR(index, region) \ - BOUT_FOR_OMP(index, region, parallel for schedule(BOUT_OPENMP_SCHEDULE)) + BOUT_FOR_OMP(index, (region), parallel for schedule(BOUT_OPENMP_SCHEDULE)) #define BOUT_FOR_INNER(index, region) \ - BOUT_FOR_OMP(index, region, for schedule(BOUT_OPENMP_SCHEDULE) nowait) + BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 }; diff --git a/include/bout/single_index_ops.hxx b/include/bout/single_index_ops.hxx index 6a9089510b..60bd78bc36 100644 --- a/include/bout/single_index_ops.hxx +++ b/include/bout/single_index_ops.hxx @@ -16,7 +16,7 @@ using EXEC_POL = RAJA::cuda_exec; using EXEC_POL = RAJA::loop_exec; #endif // end BOUT_USE_CUDA ////-----------CUDA settings------------------------------------------------------end -#endif +#endif // end BOUT_HAS_RAJA // Ind3D: i.zp(): BOUT_HOST_DEVICE inline int i_zp(const int id, const int nz) { diff --git a/include/bout/solver.hxx b/include/bout/solver.hxx index 896ce62965..47fef7ce73 100644 --- a/include/bout/solver.hxx +++ b/include/bout/solver.hxx @@ -429,6 +429,8 @@ protected: bool has_constraints{false}; /// Has init been called yet? bool initialised{false}; + /// If calling user RHS for the first time + bool first_rhs_call{true}; /// Current simulation time BoutReal simtime{0.0}; diff --git a/include/bout/sundials_backports.hxx b/include/bout/sundials_backports.hxx index c4f4aa59ef..4ec334f4d4 100644 --- a/include/bout/sundials_backports.hxx +++ b/include/bout/sundials_backports.hxx @@ -1,81 +1,74 @@ -// Backports for SUNDIALS compatibility between versions 3-6 +// Backports for SUNDIALS compatibility between versions 4-7 // // These are common backports shared between the CVode, ARKode, and IDA solvers // // Copyright 2022 Peter Hill, BOUT++ Team -// SPDX-License-Identifier: LGPLv3 +// SPDX-License-Identifier: LGPL-3.0-or-later #ifndef BOUT_SUNDIALS_BACKPORTS_H #define BOUT_SUNDIALS_BACKPORTS_H +#include "bout/bout_types.hxx" + +#include + #include #include #include +#include #include - -#if SUNDIALS_VERSION_MAJOR >= 3 #include -#endif - -#if SUNDIALS_VERSION_MAJOR >= 4 -#include #include #include -#endif - -#include "bout/unused.hxx" -#if SUNDIALS_VERSION_MAJOR < 3 -using SUNLinearSolver = int*; -inline void SUNLinSolFree([[maybe_unused]] SUNLinearSolver solver) {} -using sunindextype = long int; +#if SUNDIALS_VERSION_MAJOR >= 6 +#include #endif -#if SUNDIALS_VERSION_MAJOR < 4 -using SUNNonlinearSolver = int*; -inline void SUNNonlinSolFree([[maybe_unused]] SUNNonlinearSolver solver) {} +#if SUNDIALS_VERSION_MAJOR < 6 +using sundials_real_type = realtype; +#else +using sundials_real_type = sunrealtype; #endif -#if SUNDIALS_VERSION_MAJOR < 6 -namespace sundials { -struct Context { - Context(void* comm [[maybe_unused]]) {} -}; -} // namespace sundials +static_assert(std::is_same_v, + "BOUT++ and SUNDIALS real types do not match"); -using SUNContext = sundials::Context; +#define SUNDIALS_CONTROLLER_SUPPORT \ + (SUNDIALS_VERSION_MAJOR > 6 \ + || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 7) +#define SUNDIALS_TABLE_BY_NAME_SUPPORT \ + (SUNDIALS_VERSION_MAJOR > 6 \ + || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 4) +#if SUNDIALS_VERSION_MAJOR < 6 constexpr auto SUN_PREC_RIGHT = PREC_RIGHT; constexpr auto SUN_PREC_LEFT = PREC_LEFT; constexpr auto SUN_PREC_NONE = PREC_NONE; -inline N_Vector N_VNew_Parallel(MPI_Comm comm, sunindextype local_length, - sunindextype global_length, - [[maybe_unused]] SUNContext sunctx) { - return N_VNew_Parallel(comm, local_length, global_length); -} +namespace sundials { +using Context = std::nullptr_t; +} // namespace sundials +#endif -#if SUNDIALS_VERSION_MAJOR >= 3 -inline SUNLinearSolver SUNLinSol_SPGMR(N_Vector y, int pretype, int maxl, - [[maybe_unused]] SUNContext sunctx) { -#if SUNDIALS_VERSION_MAJOR == 3 - return SUNSPGMR(y, pretype, maxl); +inline sundials::Context createSUNContext([[maybe_unused]] MPI_Comm& comm) { +#if SUNDIALS_VERSION_MAJOR < 6 + return nullptr; +#elif SUNDIALS_VERSION_MAJOR < 7 + return sundials::Context(static_cast(&comm)); #else - return SUNLinSol_SPGMR(y, pretype, maxl); + return sundials::Context(comm); #endif } -#if SUNDIALS_VERSION_MAJOR >= 4 -inline SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m, - [[maybe_unused]] SUNContext sunctx) { - return SUNNonlinSol_FixedPoint(y, m); -} -inline SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y, - [[maybe_unused]] SUNContext sunctx) { - return SUNNonlinSol_Newton(y); +template +inline decltype(auto) callWithSUNContext(Func f, [[maybe_unused]] sundials::Context& ctx, + Args&&... args) { +#if SUNDIALS_VERSION_MAJOR < 6 + return f(std::forward(args)...); +#else + return f(std::forward(args)..., ctx); +#endif } -#endif // SUNDIALS_VERSION_MAJOR >= 4 -#endif // SUNDIALS_VERSION_MAJOR >= 3 -#endif // SUNDIALS_VERSION_MAJOR < 6 #endif // BOUT_SUNDIALS_BACKPORTS_H diff --git a/include/bout/utils.hxx b/include/bout/utils.hxx index 3172f1cf7a..b45152fbcc 100644 --- a/include/bout/utils.hxx +++ b/include/bout/utils.hxx @@ -362,6 +362,14 @@ public: return data[i.ind]; } + T& operator[](Ind3D i) { + // ny and nz are private :-( + // ASSERT2(i.nz == n3); + // ASSERT2(i.ny == n2); + ASSERT2(0 <= i.ind && i.ind < n1 * n2 * n3); + return data[i.ind]; + } + Tensor& operator=(const T& val) { for (auto& i : data) { i = val; diff --git a/manual/sphinx/conf.py b/manual/sphinx/conf.py index 29c0985841..d27e8ab1fd 100755 --- a/manual/sphinx/conf.py +++ b/manual/sphinx/conf.py @@ -88,7 +88,7 @@ def __getattr__(cls, name): + " -DBOUT_UPDATE_GIT_SUBMODULE=OFF" + " -DBOUT_TESTS=OFF" + " -DBOUT_ALLOW_INSOURCE_BUILD=ON" - + f" -DPython_ROOT_DIR={pydir}" + + f" -DPython3_ROOT_DIR={pydir}" + f" -Dmpark_variant_DIR={pwd}/externalpackages/mpark.variant/" + f" -Dfmt_DIR={pwd}/externalpackages/fmt/" ) diff --git a/manual/sphinx/user_docs/adios2.rst b/manual/sphinx/user_docs/adios2.rst index 8a6228cd3a..d8e0135c0d 100644 --- a/manual/sphinx/user_docs/adios2.rst +++ b/manual/sphinx/user_docs/adios2.rst @@ -11,14 +11,14 @@ Installation The easiest way to configure BOUT++ with ADIOS2 is to tell CMake to download and build it with this flag:: - -DBOUT_DOWNLOAD_ADIOS=ON + -DBOUT_DOWNLOAD_ADIOS2=ON The ``master`` branch will be downloaded from `Github `_, configured and built with BOUT++. -Alternatively, if ADIOS is already installed then the following flags can be used:: +Alternatively, if ADIOS2 is already installed then the following flags can be used:: - -DBOUT_USE_ADIOS=ON -DADIOS2_ROOT=/path/to/adios2 + -DBOUT_USE_ADIOS2=ON -DADIOS2_ROOT=/path/to/adios2 Output files ------------ diff --git a/manual/sphinx/user_docs/advanced_install.rst b/manual/sphinx/user_docs/advanced_install.rst index e25be12b4b..048a26a6e3 100644 --- a/manual/sphinx/user_docs/advanced_install.rst +++ b/manual/sphinx/user_docs/advanced_install.rst @@ -145,13 +145,12 @@ where ```` is the path to the build directory MPCDF HPC Systems ~~~~~~~~~~~~~~~~~ +After cloning BOUT-dev and checking out the branch you want (e.g. db-outer), run: .. code-block:: bash - module purge # or at least onload intel and impi and mkl - module load gcc/10 cmake/3.18 openmpi/4 - # ensure python3 is >= python3.6 - skip if you have a newer python3 loaded - mkdir -p $HOME/bin ; test -e $HOME/bin/python3 || ln -s $(which python3.6) $HOME/bin/python3 - BUILD=/ptmp/$USER/bout-deps bin/bout-build-deps.sh + module purge # or at least onload intel + module load gcc/13 anaconda/3/2021.11 impi/2021.9 hdf5-serial/1.12.2 mkl/2022.0 netcdf-serial/4.8.1 fftw-mpi/3.3.10 + BUILD=/ptmp/$USER/bout-deps NO_HDF5=1 NO_NETCDF=1 NO_FFTW=1 bin/bout-build-deps.sh and follow the instructions for configuring BOUT++. To enable openMP for a production run use: @@ -159,11 +158,11 @@ for a production run use: .. code-block:: bash module load bout-dep - cmake .. -DBOUT_USE_NETCDF=ON -DnetCDF_ROOT=$BOUT_DEP -DnetCDFCxx_ROOT=$BOUT_DEP \ + cmake .. -DBOUT_USE_NETCDF=ON -DnetCDFCxx_ROOT=$BOUT_DEP \ -DBOUT_USE_PETSC=ON -DPETSC_DIR=$BOUT_DEP \ - -DBOUT_USE_FFTW=ON -DFFTW_ROOT=$BOUT_DEP \ + -DBOUT_USE_FFTW=ON \ -DBOUT_USE_SUNDIALS=ON -DSUNDIALS_ROOT=$BOUT_DEP \ - -DBOUT_ENABLE_OPENMP=ON \ + -DBOUT_ENABLE_OPENMP=OFF \ -DCMAKE_BUILD_TYPE=Release @@ -306,9 +305,10 @@ solver. Currently, BOUT++ also supports the SUNDIALS solvers CVODE, IDA and ARKODE which are available from https://computation.llnl.gov/casc/sundials/main.html. -.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 5.4.0 as of - September 2020. It is advisable to use the highest possible - version +.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 6.7.0 as of + January 2024. It is advisable to use the highest possible + version. Support for SUNDIALS versions < 4 will be removed + in the next release. The full installation guide is found in the downloaded ``.tar.gz``, but we will provide a step-by-step guide to install it and make it diff --git a/manual/sphinx/user_docs/installing.rst b/manual/sphinx/user_docs/installing.rst index eb155909bf..10f5d9b9f1 100644 --- a/manual/sphinx/user_docs/installing.rst +++ b/manual/sphinx/user_docs/installing.rst @@ -373,7 +373,7 @@ For SUNDIALS, use ``-DBOUT_DOWNLOAD_SUNDIALS=ON``. If using ``ccmake`` this opti may not appear initially. This automatically sets ``BOUT_USE_SUNDIALS=ON``, and configures SUNDIALS to use MPI. -For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS=ON``. This will download and +For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS2=ON``. This will download and configure `ADIOS2 `_, enabling BOUT++ to read and write this high-performance parallel file format. diff --git a/manual/sphinx/user_docs/laplacian.rst b/manual/sphinx/user_docs/laplacian.rst index e422fa82bd..5365ba14a7 100644 --- a/manual/sphinx/user_docs/laplacian.rst +++ b/manual/sphinx/user_docs/laplacian.rst @@ -536,7 +536,6 @@ Fourth order approximation &+ c_{i-2,j} f_{i-2,j} + c_{i-2,j+1} f_{i-2,j+1} \\ &+ c_{i-2,j+2} f_{i-2,j+2} + c_{i-1,j-2} f_{i-1,j-2} \\ &+ c_{i-1,j-1} f_{i-1,j-1} + c_{i-1,j} f_{i-1,j} \\ - &+ c_{i-1,j-1} f_{i-1,j-1} + c_{i-1,j} f_{i-1,j} \\ &+ c_{i-1,j+1} f_{i-1,j+1} + c_{i-1,j+2} f_{i-1,j+2} \\ &+ c_{i,j-2} f_{i,j-2} + c_{i,j-1} f_{i,j-1} \\ &+ c_{i,j+1} f_{i,j+1} + c_{i,j+2} f_{i,j+2} \\ @@ -573,9 +572,9 @@ Fourth order approximation (9-point stencil) .. math:: \texttt{ddx\_c} = \frac{-\texttt{c2}_{x+2} + 8\texttt{c2}_{x+1} - - 8\texttt{c2}_{x-1} + \texttt{c2}_{x-1} }{ 12\texttt{c1}\text{d}x} \\ + 8\texttt{c2}_{x-1} + \texttt{c2}_{x-2} }{ 12\texttt{c1}\text{d}x} \\ \texttt{ddz\_c} = \frac{-\texttt{c2}_{z+2} + 8\texttt{c2}_{z+1} - - 8\texttt{c2}_{z-1} + \texttt{c2}_{z-1} }{ 12\texttt{c1}\text{d}z} + 8\texttt{c2}_{z-1} + \texttt{c2}_{z-2} }{ 12\texttt{c1}\text{d}z} This gives diff --git a/requirements.txt b/requirements.txt index 75358b10db..dcbe5cef5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -Jinja2>=2.11.3 -numpy>=1.14.1 -scipy>=1.0.0 -netcdf4~=1.6.0 -matplotlib>=2.0.0 +Jinja2~=3.1.0 +numpy~=2.0.0 +scipy>=1.11.0 +netcdf4~=1.7.1 +matplotlib>=3.7.0 Cython~=3.0.0 boututils~=0.2.1 boutdata~=0.2.1 diff --git a/src/bout++.cxx b/src/bout++.cxx index a83e278d9c..ff25b1163e 100644 --- a/src/bout++.cxx +++ b/src/bout++.cxx @@ -59,7 +59,7 @@ const char DEFAULT_DIR[] = "data"; #include "bout/bout.hxx" #undef BOUT_NO_USING_NAMESPACE_BOUTGLOBALS -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "bout/adios_object.hxx" #endif @@ -165,7 +165,7 @@ int BoutInitialise(int& argc, char**& argv) { savePIDtoFile(args.data_dir, MYPE); -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 bout::ADIOSInit(BoutComm::get()); #endif @@ -572,7 +572,7 @@ void printCompileTimeOptions() { constexpr auto netcdf_flavour = has_netcdf ? (has_legacy_netcdf ? " (Legacy)" : " (NetCDF4)") : ""; output_info.write(_("\tNetCDF support {}{}\n"), is_enabled(has_netcdf), netcdf_flavour); - output_info.write(_("\tADIOS support {}\n"), is_enabled(has_adios)); + output_info.write(_("\tADIOS2 support {}\n"), is_enabled(has_adios2)); output_info.write(_("\tPETSc support {}\n"), is_enabled(has_petsc)); output_info.write(_("\tPretty function name support {}\n"), is_enabled(has_pretty_function)); @@ -698,7 +698,7 @@ void addBuildFlagsToOptions(Options& options) { options["has_gettext"].force(bout::build::has_gettext); options["has_lapack"].force(bout::build::has_lapack); options["has_netcdf"].force(bout::build::has_netcdf); - options["has_adios"].force(bout::build::has_adios); + options["has_adios2"].force(bout::build::has_adios2); options["has_petsc"].force(bout::build::has_petsc); options["has_hypre"].force(bout::build::has_hypre); options["has_umpire"].force(bout::build::has_umpire); @@ -795,7 +795,7 @@ int BoutFinalise(bool write_settings) { // Call HYPER_Finalize if not already called bout::HypreLib::cleanup(); -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 bout::ADIOSFinalize(); #endif diff --git a/src/field/field3d.cxx b/src/field/field3d.cxx index 011353f34a..4ed9641f44 100644 --- a/src/field/field3d.cxx +++ b/src/field/field3d.cxx @@ -32,6 +32,8 @@ #include +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include #include #include @@ -504,7 +506,7 @@ void Field3D::applyParallelBoundary(const std::string& condition) { /// Loop over the mesh boundary regions for (const auto& reg : fieldmesh->getBoundariesPar()) { auto op = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; op->apply(*this); } } @@ -524,7 +526,7 @@ void Field3D::applyParallelBoundary(const std::string& region, for (const auto& reg : fieldmesh->getBoundariesPar()) { if (reg->label == region) { auto op = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; op->apply(*this); break; } @@ -548,9 +550,9 @@ void Field3D::applyParallelBoundary(const std::string& region, // BoundaryFactory can't create boundaries using Field3Ds, so get temporary // boundary of the right type auto tmp = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; // then clone that with the actual argument - auto op = std::unique_ptr{tmp->clone(reg, f)}; + auto op = std::unique_ptr{tmp->clone(reg.get(), f)}; op->apply(*this); break; } diff --git a/src/field/field_data.cxx b/src/field/field_data.cxx index ee8bd97b30..529f595316 100644 --- a/src/field/field_data.cxx +++ b/src/field/field_data.cxx @@ -1,4 +1,6 @@ +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include "bout/unused.hxx" #include #include @@ -151,10 +153,9 @@ void FieldData::setBoundary(const std::string& name) { } /// Get the mesh boundary regions - std::vector par_reg = mesh->getBoundariesPar(); /// Loop over the mesh parallel boundary regions for (const auto& reg : mesh->getBoundariesPar()) { - auto* op = dynamic_cast(bfact->createFromOptions(name, reg)); + auto* op = dynamic_cast(bfact->createFromOptions(name, reg.get())); if (op != nullptr) { bndry_op_par.push_back(op); } diff --git a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx index cf16240c0c..5ce4e540b7 100644 --- a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx +++ b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx @@ -33,11 +33,13 @@ * */ -#include "cyclic_laplace.hxx" -#include "bout/build_config.hxx" +#include "bout/build_defines.hxx" #if not BOUT_USE_METRIC_3D +#include "cyclic_laplace.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" #include #include #include @@ -47,7 +49,7 @@ #include #include -#include "cyclic_laplace.hxx" +#include LaplaceCyclic::LaplaceCyclic(Options* opt, const CELL_LOC loc, Mesh* mesh_in, Solver* UNUSED(solver)) @@ -120,13 +122,13 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart, outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -143,9 +145,9 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -169,8 +171,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false, // Don't include guard cells in arrays false); // Z domain not periodic } @@ -218,9 +219,9 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -241,8 +242,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -275,7 +275,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // ZFFT routine expects input of this length auto k1d = Array((localmesh->LocalNz) / 2 + 1); - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { @@ -316,13 +316,13 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart, outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -350,6 +350,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { const int nsys = nmode * ny; // Number of systems of equations to solve const int nxny = nx * ny; // Number of points in X-Y + // This is just to silence static analysis + ASSERT0(ny > 0); + auto a3D = Matrix(nsys, nx); auto b3D = Matrix(nsys, nx); auto c3D = Matrix(nsys, nx); @@ -374,10 +377,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -405,8 +407,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false, // Don't include guard cells in arrays false); // Z domain not periodic } @@ -462,10 +463,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -490,8 +490,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -502,9 +501,8 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { if (localmesh->periodicX) { // Subtract X average of kz=0 mode - BoutReal local[ny + 1]; + std::vector local(ny + 1, 0.0); for (int y = 0; y < ny; y++) { - local[y] = 0.0; for (int ix = xs; ix <= xe; ix++) { local[y] += xcmplx3D(y * nmode, ix - xs).real(); } @@ -512,8 +510,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { local[ny] = static_cast(xe - xs + 1); // Global reduce - BoutReal global[ny + 1]; - MPI_Allreduce(local, global, ny + 1, MPI_DOUBLE, MPI_SUM, localmesh->getXcomm()); + std::vector global(ny + 1, 0.0); + MPI_Allreduce(local.data(), global.data(), ny + 1, MPI_DOUBLE, MPI_SUM, + localmesh->getXcomm()); // Subtract average from kz=0 modes for (int y = 0; y < ny; y++) { BoutReal avg = global[y] / global[ny]; @@ -530,7 +529,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y diff --git a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx index c74e184be3..d789e5e408 100644 --- a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx +++ b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx @@ -99,7 +99,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in, // Set up boundary conditions in operator BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann on inner X boundary operator3D(i, i) = -1. / coords->dx[i] / sqrt(coords->g_11[i]); operator3D(i, i.xp()) = 1. / coords->dx[i] / sqrt(coords->g_11[i]); @@ -111,7 +111,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in, } BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann on outer X boundary operator3D(i, i) = 1. / coords->dx[i] / sqrt(coords->g_11[i]); operator3D(i, i.xm()) = -1. / coords->dx[i] / sqrt(coords->g_11[i]); @@ -180,9 +180,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) { // Adjust vectors to represent boundary conditions and check that // boundary cells are finite BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { - const BoutReal val = (inner_boundary_flags & INVERT_SET) ? x0[i] : 0.; + const BoutReal val = isInnerBoundaryFlagSet(INVERT_SET) ? x0[i] : 0.; ASSERT1(std::isfinite(val)); - if (!(inner_boundary_flags & INVERT_RHS)) { + if (!(isInnerBoundaryFlagSet(INVERT_RHS))) { b[i] = val; } else { ASSERT1(std::isfinite(b[i])); @@ -190,9 +190,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) { } BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { - const BoutReal val = (outer_boundary_flags & INVERT_SET) ? x0[i] : 0.; + const BoutReal val = (isOuterBoundaryFlagSet(INVERT_SET)) ? x0[i] : 0.; ASSERT1(std::isfinite(val)); - if (!(outer_boundary_flags & INVERT_RHS)) { + if (!(isOuterBoundaryFlagSet(INVERT_RHS))) { b[i] = val; } else { ASSERT1(std::isfinite(b[i])); diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx index b09b67611b..f79463769a 100644 --- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx +++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx @@ -293,10 +293,8 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) { */ auto bcmplx = Matrix(nmode, ncx); - const bool invert_inner_boundary = - isInnerBoundaryFlagSet(INVERT_SET) and localmesh->firstX(); - const bool invert_outer_boundary = - isOuterBoundaryFlagSet(INVERT_SET) and localmesh->lastX(); + const bool invert_inner_boundary = isInnerBoundaryFlagSetOnFirstX(INVERT_SET); + const bool invert_outer_boundary = isOuterBoundaryFlagSetOnLastX(INVERT_SET); BOUT_OMP_PERF(parallel for) for (int ix = 0; ix < ncx; ix++) { @@ -345,8 +343,7 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) { kz, // wave number (different from kz only if we are taking a part // of the z-domain [and not from 0 to 2*pi]) - kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &A, &C, &D); + kz * kwaveFactor, &A, &C, &D); // Patch up internal boundaries if (not localmesh->lastX()) { diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx index 1c6bb7a02e..02e3eca06c 100644 --- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx +++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx @@ -234,14 +234,6 @@ private: /// First and last interior points xstart, xend int xs, xe; - - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } }; #endif // BOUT_USE_METRIC_3D diff --git a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx index 82273ee7ad..c5076cd499 100644 --- a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx +++ b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx @@ -84,19 +84,18 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_ // Initialize, allocate memory, etc. comms_tagbase = 385; // Some random number - int implemented_global_flags = INVERT_START_NEW; - if (global_flags & ~implemented_global_flags) { + constexpr int implemented_global_flags = INVERT_START_NEW; + if (isGlobalFlagSet(~implemented_global_flags)) { throw BoutException("Attempted to set Laplacian inversion flag that is not " "implemented in LaplaceMultigrid."); } - int implemented_boundary_flags = - INVERT_AC_GRAD + INVERT_SET - + INVERT_DC_GRAD; // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions - if (inner_boundary_flags & ~implemented_boundary_flags) { + // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions + constexpr int implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_DC_GRAD; + if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) { throw BoutException("Attempted to set Laplacian inner boundary inversion flag that " "is not implemented in LaplaceMultigrid."); } - if (outer_boundary_flags & ~implemented_boundary_flags) { + if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) { throw BoutException("Attempted to set Laplacian outer boundary inversion flag that " "is not implemented in LaplaceMultigrid."); } @@ -242,7 +241,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { int lz2 = lzz + 2; int lxx = kMG->lnx[level]; - if (global_flags & INVERT_START_NEW) { + if (isGlobalFlagSet(INVERT_START_NEW)) { // set initial guess to zero BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for collapse(2)) @@ -276,9 +275,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } if (localmesh->firstX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at inner boundary BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) @@ -299,7 +298,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at inner boundary BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) @@ -320,9 +319,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->lastX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at outer boundary BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) @@ -344,7 +343,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (outer_boundary_flags & INVERT_SET) { + if (isOuterBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at outer boundary BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) @@ -477,9 +476,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->firstX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at inner boundary int i2 = -1 + localmesh->xstart; BOUT_OMP_PERF(parallel default(shared)) @@ -503,7 +502,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at inner boundary int i2 = -1 + localmesh->xstart; BOUT_OMP_PERF(parallel default(shared)) @@ -525,9 +524,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->lastX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at outer boundary int i2 = lxx + localmesh->xstart; BOUT_OMP_PERF(parallel default(shared)) @@ -551,7 +550,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (outer_boundary_flags & INVERT_SET) { + if (isOuterBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at outer boundary int i2 = lxx + localmesh->xstart; BOUT_OMP_PERF(parallel default(shared)) @@ -651,7 +650,7 @@ void LaplaceMultigrid::generateMatrixF(int level) { // Here put boundary conditions if (kMG->rProcI == 0) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) @@ -686,7 +685,7 @@ void LaplaceMultigrid::generateMatrixF(int level) { } } if (kMG->rProcI == kMG->xNP - 1) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition BOUT_OMP_PERF(parallel default(shared)) BOUT_OMP_PERF(for) diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.cxx b/src/invert/laplace/impls/naulin/naulin_laplace.cxx index d82f874cbb..7a614c3498 100644 --- a/src/invert/laplace/impls/naulin/naulin_laplace.cxx +++ b/src/invert/laplace/impls/naulin/naulin_laplace.cxx @@ -164,6 +164,11 @@ LaplaceNaulin::LaplaceNaulin(Options* opt, const CELL_LOC loc, Mesh* mesh_in, // Get options OPTION(opt, rtol, 1.e-7); OPTION(opt, atol, 1.e-20); + rtol_accept = + (*opt)["rtol_accept"].doc("Accept this rtol after maxits").withDefault(rtol); + atol_accept = + (*opt)["atol_accept"].doc("Accept this atol after maxits").withDefault(atol); + OPTION(opt, maxits, 100); OPTION(opt, initial_underrelax_factor, 1.); ASSERT0(initial_underrelax_factor > 0. and initial_underrelax_factor <= 1.); @@ -174,9 +179,9 @@ LaplaceNaulin::LaplaceNaulin(Options* opt, const CELL_LOC loc, Mesh* mesh_in, // invert Delp2 and we will not converge ASSERT0(delp2type == "cyclic" || delp2type == "spt" || delp2type == "tri"); // Use same flags for FFT solver as for NaulinSolver - delp2solver->setGlobalFlags(global_flags); - delp2solver->setInnerBoundaryFlags(inner_boundary_flags); - delp2solver->setOuterBoundaryFlags(outer_boundary_flags); + delp2solver->setGlobalFlags(getGlobalFlags()); + delp2solver->setInnerBoundaryFlags(getInnerBoundaryFlags()); + delp2solver->setOuterBoundaryFlags(getOuterBoundaryFlags()); static int naulinsolver_count = 1; setPerformanceName(fmt::format("{}{}", "naulinsolver", ++naulinsolver_count)); @@ -258,7 +263,7 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) { // Note take a copy of the 'b' argument, because we want to return a copy of it in the // result - if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) { + if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) { // This passes in the boundary conditions from x0's guard cells copy_x_boundaries(x_guess, x0, localmesh); } @@ -289,6 +294,10 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) { ++count; if (count > maxits) { + // Perhaps accept a worse solution + if (error_rel < rtol_accept or error_abs < atol_accept) { + break; + } throw BoutException( "LaplaceNaulin error: Not converged within maxits={:d} iterations.", maxits); } @@ -313,6 +322,9 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) { // effectively another iteration, so increment the counter ++count; if (count > maxits) { + if (error_rel < rtol_accept or error_abs < atol_accept) { + break; + } throw BoutException( "LaplaceNaulin error: Not converged within maxits={:d} iterations.", maxits); } diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.hxx b/src/invert/laplace/impls/naulin/naulin_laplace.hxx index e464ef18e7..70bd2668ef 100644 --- a/src/invert/laplace/impls/naulin/naulin_laplace.hxx +++ b/src/invert/laplace/impls/naulin/naulin_laplace.hxx @@ -157,6 +157,8 @@ private: /// Solver tolerances BoutReal rtol, atol; + /// Accept these tolerances if number of iterations exceeds maxits + BoutReal rtol_accept, atol_accept; /// Maximum number of iterations int maxits; diff --git a/src/invert/laplace/impls/pcr/pcr.cxx b/src/invert/laplace/impls/pcr/pcr.cxx index 5c4f8da35b..48bbdbac4b 100644 --- a/src/invert/laplace/impls/pcr/pcr.cxx +++ b/src/invert/laplace/impls/pcr/pcr.cxx @@ -149,13 +149,13 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set inbndry = localmesh->xstart; outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -173,10 +173,9 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -199,8 +198,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } // BOUT_OMP_PERF(parallel) @@ -245,10 +243,9 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -269,8 +266,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } // BOUT_OMP_PERF(parallel) @@ -285,7 +281,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { @@ -327,13 +323,13 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set inbndry = localmesh->xstart; outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -387,10 +383,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -417,8 +412,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } // BOUT_OMP_PERF(parallel) @@ -472,10 +466,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -500,8 +493,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } // BOUT_OMP_PERF(parallel) @@ -516,7 +508,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y diff --git a/src/invert/laplace/impls/pcr/pcr.hxx b/src/invert/laplace/impls/pcr/pcr.hxx index 38b7c356d3..ec4637f56c 100644 --- a/src/invert/laplace/impls/pcr/pcr.hxx +++ b/src/invert/laplace/impls/pcr/pcr.hxx @@ -172,14 +172,6 @@ private: /// First and last interior points xstart, xend int xs, xe; - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } - bool dst{false}; }; diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx index 35a25779a7..61c8f58694 100644 --- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx +++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx @@ -145,13 +145,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart; int outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -169,10 +169,9 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -195,8 +194,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -241,10 +239,9 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -265,8 +262,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -281,7 +277,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { @@ -323,13 +319,13 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart; int outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -383,10 +379,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -413,8 +408,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -468,10 +462,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -497,8 +490,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -513,7 +505,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx index 009a1def2b..e12a647789 100644 --- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx +++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx @@ -175,14 +175,6 @@ private: /// First and last interior points xstart, xend int xs, xe; - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } - bool dst{false}; }; diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.cxx b/src/invert/laplace/impls/petsc/petsc_laplace.cxx index d125b90694..f06f4c7de6 100644 --- a/src/invert/laplace/impls/petsc/petsc_laplace.cxx +++ b/src/invert/laplace/impls/petsc/petsc_laplace.cxx @@ -23,7 +23,8 @@ * along with BOUT++. If not, see . * **************************************************************************/ -#include "bout/build_config.hxx" + +#include "bout/build_defines.hxx" #if BOUT_HAS_PETSC @@ -32,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -49,14 +52,13 @@ #define KSP_PREONLY "preonly" static PetscErrorCode laplacePCapply(PC pc, Vec x, Vec y) { - int ierr; + PetscFunctionBegin; // NOLINT - // Get the context - LaplacePetsc* s; - ierr = PCShellGetContext(pc, reinterpret_cast(&s)); + LaplacePetsc* laplace = nullptr; + const int ierr = PCShellGetContext(pc, reinterpret_cast(&laplace)); // NOLINT CHKERRQ(ierr); - PetscFunctionReturn(s->precon(x, y)); + PetscFunctionReturn(laplace->precon(x, y)); // NOLINT } LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in, @@ -79,28 +81,9 @@ LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in, } #if CHECK > 0 - // These are the implemented flags - implemented_flags = INVERT_START_NEW; - implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_RHS; // Checking flags are set to something which is not implemented - // This is done binary (which is possible as each flag is a power of 2) - if (global_flags & ~implemented_flags) { - if (global_flags & INVERT_4TH_ORDER) { - output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " - "setting INVERT_4TH_ORDER flag" - << endl; - } - throw BoutException("Attempted to set Laplacian inversion flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (inner_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (outer_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } + checkFlags(); + if (localmesh->periodicX) { throw BoutException("LaplacePetsc does not work with periodicity in the x direction " "(localmesh->PeriodicX == true). Change boundary conditions or " @@ -360,25 +343,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { ASSERT1(x0.getLocation() == location); #if CHECK > 0 - // Checking flags are set to something which is not implemented (see - // constructor for details) - if (global_flags & !implemented_flags) { - if (global_flags & INVERT_4TH_ORDER) { - output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " - "setting INVERT_4TH_ORDER flag" - << endl; - } - throw BoutException("Attempted to set Laplacian inversion flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (inner_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (outer_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } + checkFlags(); #endif int y = b.getIndex(); // Get the Y index @@ -415,7 +380,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { for (int z = 0; z < localmesh->LocalNz; z++) { PetscScalar val; // Value of element to be set in the matrix // If Neumann Boundary Conditions are set. - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Set values corresponding to nodes adjacent in x if (fourth_order) { // Fourth Order Accuracy on Boundary @@ -472,9 +437,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { // Set Components of RHS // If the inner boundary value should be set by b or x0 - if (inner_boundary_flags & INVERT_RHS) { + if (isInnerBoundaryFlagSet(INVERT_RHS)) { val = b[x][z]; - } else if (inner_boundary_flags & INVERT_SET) { + } else if (isInnerBoundaryFlagSet(INVERT_SET)) { val = x0[x][z]; } @@ -680,7 +645,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { Element(i, x, z, 0, 0, val, MatA); // If Neumann Boundary Conditions are set. - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Set values corresponding to nodes adjacent in x if (fourth_order) { // Fourth Order Accuracy on Boundary @@ -733,9 +698,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { // Set Components of RHS // If the inner boundary value should be set by b or x0 val = 0; - if (outer_boundary_flags & INVERT_RHS) { + if (isOuterBoundaryFlagSet(INVERT_RHS)) { val = b[x][z]; - } else if (outer_boundary_flags & INVERT_SET) { + } else if (isOuterBoundaryFlagSet(INVERT_SET)) { val = x0[x][z]; } @@ -812,7 +777,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { KSPSetTolerances(ksp, rtol, atol, dtol, maxits); // If the initial guess is not set to zero - if (!(global_flags & INVERT_START_NEW)) { + if (!isGlobalFlagSet(INVERT_START_NEW)) { KSPSetInitialGuessNonzero(ksp, static_cast(true)); } @@ -1194,4 +1159,24 @@ int LaplacePetsc::precon(Vec x, Vec y) { return 0; } +void LaplacePetsc::checkFlags() { + if (isGlobalFlagSet(~implemented_flags)) { + if (isGlobalFlagSet(INVERT_4TH_ORDER)) { + output_error.write( + "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " + "setting INVERT_4TH_ORDER flag\n"); + } + throw BoutException("Attempted to set Laplacian inversion flag that is not " + "implemented in petsc_laplace.cxx"); + } + if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) { + throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " + "implemented in petsc_laplace.cxx"); + } + if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) { + throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " + "implemented in petsc_laplace.cxx"); + } +} + #endif // BOUT_HAS_PETSC_3_3 diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.hxx b/src/invert/laplace/impls/petsc/petsc_laplace.hxx index 011f8971df..55482644be 100644 --- a/src/invert/laplace/impls/petsc/petsc_laplace.hxx +++ b/src/invert/laplace/impls/petsc/petsc_laplace.hxx @@ -254,10 +254,11 @@ private: void vecToField(Vec x, FieldPerp& f); // Copy a vector into a fieldperp void fieldToVec(const FieldPerp& f, Vec x); // Copy a fieldperp into a vector -#if CHECK > 0 - int implemented_flags; - int implemented_boundary_flags; -#endif + static constexpr int implemented_flags = INVERT_START_NEW; + static constexpr int implemented_boundary_flags = + INVERT_AC_GRAD | INVERT_SET | INVERT_RHS; + + void checkFlags(); }; #endif //BOUT_HAS_PETSC diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx index d1e2207725..a7bfd209ee 100644 --- a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx +++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx @@ -84,12 +84,12 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes #if CHECK > 0 // Checking flags are set to something which is not implemented // This is done binary (which is possible as each flag is a power of 2) - if (flagSet(global_flags, INVERT_4TH_ORDER)) { + if (isGlobalFlagSet(INVERT_4TH_ORDER)) { output.write("For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " "setting INVERT_4TH_ORDER flag\n"); } - if (flagSet(global_flags, ~implemented_flags)) { + if (isGlobalFlagSet(~implemented_flags)) { throw BoutException("Attempted to set global Laplacian inversion flag that is not " "implemented in petsc_laplace.cxx"); } @@ -102,8 +102,8 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes name); } }; - unimplementedBoundaryFlag(inner_boundary_flags, "inner"); - unimplementedBoundaryFlag(outer_boundary_flags, "outer"); + unimplementedBoundaryFlag(getInnerBoundaryFlags(), "inner"); + unimplementedBoundaryFlag(getOuterBoundaryFlags(), "outer"); unimplementedBoundaryFlag(lower_boundary_flags, "lower"); unimplementedBoundaryFlag(upper_boundary_flags, "upper"); @@ -119,7 +119,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes } // Set up boundary conditions in operator - const bool inner_X_neumann = flagSet(inner_boundary_flags, INVERT_AC_GRAD); + const bool inner_X_neumann = isInnerBoundaryFlagSet(INVERT_AC_GRAD); const auto inner_X_BC = inner_X_neumann ? -1. / coords->dx / sqrt(coords->g_11) : 0.5; const auto inner_X_BC_plus = inner_X_neumann ? -inner_X_BC : 0.5; @@ -128,7 +128,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes operator3D(i, i.xp()) = inner_X_BC_plus[i]; } - const bool outer_X_neumann = flagSet(outer_boundary_flags, INVERT_AC_GRAD); + const bool outer_X_neumann = isOuterBoundaryFlagSet(INVERT_AC_GRAD); const auto outer_X_BC = outer_X_neumann ? 1. / coords->dx / sqrt(coords->g_11) : 0.5; const auto outer_X_BC_minus = outer_X_neumann ? -outer_X_BC : 0.5; @@ -191,8 +191,8 @@ Field3D LaplacePetsc3dAmg::solve(const Field3D& b_in, const Field3D& x0) { // Adjust vectors to represent boundary conditions and check that // boundary cells are finite - setBC(rhs, b_in, indexer->getRegionInnerX(), inner_boundary_flags, x0); - setBC(rhs, b_in, indexer->getRegionOuterX(), outer_boundary_flags, x0); + setBC(rhs, b_in, indexer->getRegionInnerX(), getInnerBoundaryFlags(), x0); + setBC(rhs, b_in, indexer->getRegionOuterX(), getOuterBoundaryFlags(), x0); setBC(rhs, b_in, indexer->getRegionLowerY(), lower_boundary_flags, x0); setBC(rhs, b_in, indexer->getRegionUpperY(), upper_boundary_flags, x0); @@ -460,7 +460,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() { KSPSetTolerances(ksp, rtol, atol, dtol, maxits); // If the initial guess is not set to zero - if ((global_flags & INVERT_START_NEW) == 0) { + if (!isGlobalFlagSet(INVERT_START_NEW)) { KSPSetInitialGuessNonzero(ksp, (PetscBool) true); } diff --git a/src/invert/laplace/impls/serial_band/serial_band.cxx b/src/invert/laplace/impls/serial_band/serial_band.cxx index eda76498fc..4e7bb4c63f 100644 --- a/src/invert/laplace/impls/serial_band/serial_band.cxx +++ b/src/invert/laplace/impls/serial_band/serial_band.cxx @@ -99,7 +99,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { int xbndry = localmesh->xstart; // Width of the x boundary // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } @@ -107,8 +107,8 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { for (int ix = 0; ix < localmesh->LocalNx; ix++) { // for fixed ix,jy set a complex vector rho(z) - if (((ix < xbndry) && (inner_boundary_flags & INVERT_SET)) - || ((ncx - ix < xbndry) && (outer_boundary_flags & INVERT_SET))) { + if (((ix < xbndry) && isInnerBoundaryFlagSet(INVERT_SET)) + || ((ncx - ix < xbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) { // Use the values in x0 in the boundary rfft(x0[ix], ncz, &bk(ix, 0)); } else { @@ -247,10 +247,10 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { for (int ix = 0; ix < xbndry; ix++) { // Set zero-value. Change to zero-gradient if needed - if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { bk1d[ix] = 0.0; } - if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { bk1d[ncx - ix] = 0.0; } @@ -265,8 +265,8 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // DC // Inner boundary - if (inner_boundary_flags & (INVERT_DC_GRAD + INVERT_SET) - || inner_boundary_flags & (INVERT_DC_GRAD + INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_RHS)) { // Zero gradient at inner boundary. 2nd-order accurate // Boundary at midpoint for (int ix = 0; ix < xbndry; ix++) { @@ -277,7 +277,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 4) = 0.; } - } else if (inner_boundary_flags & INVERT_DC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at inner boundary. 2nd-order accurate // Boundary at midpoint for (int ix = 0; ix < xbndry; ix++) { @@ -288,7 +288,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 4) = 0.; } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -296,7 +296,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 3) = 4. / sqrt(coords->g_22(ix + 1, jy)); A(ix, 4) = -1. / sqrt(coords->g_22(ix + 2, jy)); } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -304,7 +304,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 3) = 4. * sqrt(coords->g_22(ix + 1, jy)); A(ix, 4) = -sqrt(coords->g_22(ix + 2, jy)); } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -315,7 +315,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { } // Outer boundary - if (outer_boundary_flags & INVERT_DC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < xbndry; ix++) { A(ncx - ix, 1) = -1.0; @@ -326,12 +326,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // AC // Inner boundarySQ(kwave)*coef2 - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < xbndry; ix++) { A(ix, 3) = -1.0; } - } else if (inner_boundary_flags & INVERT_AC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) { // Enforce zero laplacian for 2nd and 4th-order int ix = 1; @@ -369,12 +369,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { } // Outer boundary - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < xbndry; ix++) { A(ncx - ix, 1) = -1.0; } - } else if (outer_boundary_flags & INVERT_AC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) { // Enforce zero laplacian for 2nd and 4th-order // NOTE: Currently ignoring XZ term and coef4 assumed zero on boundary // FIX THIS IF IT WORKS @@ -417,7 +417,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // Perform inversion cband_solve(A, localmesh->LocalNx, 2, 2, bk1d); - if ((global_flags & INVERT_KX_ZERO) && (iz == 0)) { + if (isGlobalFlagSet(INVERT_KX_ZERO) && (iz == 0)) { // Set the Kx = 0, n = 0 component to zero. For now just subtract // Should do in the inversion e.g. Sherman-Morrison formula @@ -440,7 +440,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // Done inversion, transform back for (int ix = 0; ix <= ncx; ix++) { - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { xk(ix, 0) = 0.0; } diff --git a/src/invert/laplace/impls/serial_tri/serial_tri.cxx b/src/invert/laplace/impls/serial_tri/serial_tri.cxx index 909a47f856..f46a0a46e5 100644 --- a/src/invert/laplace/impls/serial_tri/serial_tri.cxx +++ b/src/invert/laplace/impls/serial_tri/serial_tri.cxx @@ -91,13 +91,13 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { int inbndry = localmesh->xstart, outbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if (inner_boundary_flags & INVERT_BNDRY_ONE) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if (outer_boundary_flags & INVERT_BNDRY_ONE) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -140,8 +140,8 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { * If the INVERT_SET flag is set (meaning that x0 will be used to set the * bounadry values), */ - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET)) - || ((ncx - 1 - ix < outbndry) && (outer_boundary_flags & INVERT_SET))) { + if (((ix < inbndry) && isInnerBoundaryFlagSet(INVERT_SET)) + || ((ncx - 1 - ix < outbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) { // Use the values in x0 in the boundary // x0 is the input @@ -185,8 +185,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { kz, // wave number (different from kz only if we are taking a part // of the z-domain [and not from 0 to 2*pi]) - kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &A, &C, &D); + kz * kwaveFactor, &A, &C, &D); ///////// PERFORM INVERSION ///////// if (!localmesh->periodicX) { @@ -208,7 +207,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { } // If the global flag is set to INVERT_KX_ZERO - if ((global_flags & INVERT_KX_ZERO) && (kz == 0)) { + if (isGlobalFlagSet(INVERT_KX_ZERO) && (kz == 0)) { dcomplex offset(0.0); for (int ix = localmesh->xstart; ix <= localmesh->xend; ix++) { offset += xk1d[ix]; @@ -228,7 +227,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { // Done inversion, transform back for (int ix = 0; ix < ncx; ix++) { - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { xk(ix, 0) = 0.0; } diff --git a/src/invert/laplace/impls/spt/spt.cxx b/src/invert/laplace/impls/spt/spt.cxx index 56ac496271..2e4c844c94 100644 --- a/src/invert/laplace/impls/spt/spt.cxx +++ b/src/invert/laplace/impls/spt/spt.cxx @@ -65,10 +65,9 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in, ye = localmesh->LocalNy - 1; // Contains upper boundary } - alldata = new SPT_data[ye - ys + 1]; - alldata -= ys; // Re-number indices to start at ys + alldata.reallocate(ye - ys + 1); for (int jy = ys; jy <= ye; jy++) { - alldata[jy].comm_tag = SPT_DATA + jy; // Give each one a different tag + alldata[jy - ys].comm_tag = SPT_DATA + jy; // Give each one a different tag } // Temporary array for taking FFTs @@ -76,11 +75,6 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in, dc1d.reallocate(ncz / 2 + 1); } -LaplaceSPT::~LaplaceSPT() { - alldata += ys; // Return to index from 0 - delete[] alldata; -} - FieldPerp LaplaceSPT::solve(const FieldPerp& b) { return solve(b, b); } FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { @@ -90,15 +84,15 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { FieldPerp x{emptyFrom(b)}; - if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) { + if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) { FieldPerp bs = copy(b); int xbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } - if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) { + if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) { // Copy x0 inner boundary into bs for (int ix = 0; ix < xbndry; ix++) { for (int iz = 0; iz < localmesh->LocalNz; iz++) { @@ -106,7 +100,7 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { } } } - if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) { + if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { // Copy x0 outer boundary into bs for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) { for (int iz = 0; iz < localmesh->LocalNz; iz++) { @@ -141,29 +135,29 @@ Field3D LaplaceSPT::solve(const Field3D& b) { for (int jy = ys; jy <= ye; jy++) { // And start another one going - start(sliceXZ(b, jy), alldata[jy]); + start(sliceXZ(b, jy), alldata[jy - ys]); // Move each calculation along one processor for (int jy2 = ys; jy2 < jy; jy2++) { - next(alldata[jy2]); + next(alldata[jy2 - ys]); } } bool running = true; - do { + while (running) { // Move each calculation along until the last one is finished - for (int jy = ys; jy <= ye; jy++) { - running = next(alldata[jy]) == 0; + for (auto& data : alldata) { + running = next(data) == 0; } - } while (running); + } FieldPerp xperp(localmesh); xperp.setLocation(location); xperp.allocate(); // All calculations finished. Get result - for (int jy = ys; jy <= ye; jy++) { - finish(alldata[jy], xperp); + for (auto& data : alldata) { + finish(data, xperp); x = xperp; } @@ -173,17 +167,17 @@ Field3D LaplaceSPT::solve(const Field3D& b) { Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) { ASSERT1(localmesh == b.getMesh() && localmesh == x0.getMesh()); - if (((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) - || ((outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + if ((isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) + || isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { Field3D bs = copy(b); int xbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } - if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) { + if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) { // Copy x0 inner boundary into bs for (int ix = 0; ix < xbndry; ix++) { for (int iy = 0; iy < localmesh->LocalNy; iy++) { @@ -193,7 +187,7 @@ Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) { } } } - if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) { + if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { // Copy x0 outer boundary into bs for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) { for (int iy = 0; iy < localmesh->LocalNy; iy++) { @@ -323,8 +317,7 @@ int LaplaceSPT::start(const FieldPerp& b, SPT_data& data) { /// Set matrix elements for (int kz = 0; kz <= maxmode; kz++) { tridagMatrix(&data.avec(kz, 0), &data.bvec(kz, 0), &data.cvec(kz, 0), &data.bk(kz, 0), - data.jy, kz, kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &Acoef, &Ccoef, &Dcoef); + data.jy, kz, kz * kwaveFactor, &Acoef, &Ccoef, &Dcoef); } data.proc = 0; //< Starts at processor 0 @@ -516,7 +509,7 @@ void LaplaceSPT::finish(SPT_data& data, FieldPerp& x) { dc1d[kz] = 0.0; } - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { dc1d[0] = 0.0; } diff --git a/src/invert/laplace/impls/spt/spt.hxx b/src/invert/laplace/impls/spt/spt.hxx index c6aa8fd404..a9d5b2583f 100644 --- a/src/invert/laplace/impls/spt/spt.hxx +++ b/src/invert/laplace/impls/spt/spt.hxx @@ -69,7 +69,6 @@ class LaplaceSPT : public Laplacian { public: LaplaceSPT(Options* opt = nullptr, const CELL_LOC = CELL_CENTRE, Mesh* mesh_in = nullptr, Solver* solver = nullptr); - ~LaplaceSPT(); using Laplacian::setCoefA; void setCoefA(const Field2D& val) override { @@ -106,17 +105,15 @@ public: Field3D solve(const Field3D& b, const Field3D& x0) override; private: - enum { SPT_DATA = 1123 }; ///< 'magic' number for SPT MPI messages + constexpr static int SPT_DATA = 1123; ///< 'magic' number for SPT MPI messages Field2D Acoef, Ccoef, Dcoef; /// Data structure for SPT algorithm struct SPT_data { - SPT_data() : comm_tag(SPT_DATA) {} void allocate(int mm, int nx); // Allocates memory - ~SPT_data(){}; // Free memory - int jy; ///< Y index + int jy = 0; ///< Y index Matrix bk; ///< b vector in Fourier space Matrix xk; @@ -125,19 +122,19 @@ private: Matrix avec, bvec, cvec; ///< Diagonal bands of matrix - int proc; // Which processor has this reached? - int dir; // Which direction is it going? + int proc = 0; // Which processor has this reached? + int dir = 1; // Which direction is it going? - comm_handle recv_handle; // Handle for receives + comm_handle recv_handle = nullptr; // Handle for receives - int comm_tag; // Tag for communication + int comm_tag = SPT_DATA; // Tag for communication Array buffer; }; int ys, ye; // Range of Y indices SPT_data slicedata; // Used to solve for a single FieldPerp - SPT_data* alldata; // Used to solve a Field3D + Array alldata; // Used to solve a Field3D Array dc1d; ///< 1D in Z for taking FFTs diff --git a/src/invert/laplace/invert_laplace.cxx b/src/invert/laplace/invert_laplace.cxx index 505b04cc4f..4032499781 100644 --- a/src/invert/laplace/invert_laplace.cxx +++ b/src/invert/laplace/invert_laplace.cxx @@ -424,20 +424,16 @@ void Laplacian::tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomple #if BOUT_USE_METRIC_3D void Laplacian::tridagMatrix(dcomplex* /*avec*/, dcomplex* /*bvec*/, dcomplex* /*cvec*/, dcomplex* /*bk*/, int /*jy*/, int /*kz*/, BoutReal /*kwave*/, - int /*global_flags*/, int /*inner_boundary_flags*/, - int /*outer_boundary_flags*/, const Field2D* /*a*/, - const Field2D* /*c1coef*/, const Field2D* /*c2coef*/, - const Field2D* /*d*/, bool /*includeguards*/, - bool /*zperiodic*/) { + const Field2D* /*a*/, const Field2D* /*c1coef*/, + const Field2D* /*c2coef*/, const Field2D* /*d*/, + bool /*includeguards*/, bool /*zperiodic*/) { throw BoutException("Error: tridagMatrix does not yet work with 3D metric."); } #else void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, - int jy, int kz, BoutReal kwave, int global_flags, - int inner_boundary_flags, int outer_boundary_flags, - const Field2D* a, const Field2D* c1coef, - const Field2D* c2coef, const Field2D* d, bool includeguards, - bool zperiodic) { + int jy, int kz, BoutReal kwave, const Field2D* a, + const Field2D* c1coef, const Field2D* c2coef, + const Field2D* d, bool includeguards, bool zperiodic) { ASSERT1(a->getLocation() == location); ASSERT1(c1coef->getLocation() == location); ASSERT1(c2coef->getLocation() == location); @@ -469,13 +465,13 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco int inbndry = localmesh->xstart, outbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if (inner_boundary_flags & INVERT_BNDRY_ONE) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if (outer_boundary_flags & INVERT_BNDRY_ONE) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -497,7 +493,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // If no user specified value is set on inner boundary, set the first // element in b (in the equation AX=b) to 0 - if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { for (int ix = 0; ix < inbndry; ix++) { bk[ix] = 0.; } @@ -506,34 +502,35 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // DC i.e. kz = 0 (the offset mode) if (kz == 0) { - if (inner_boundary_flags & INVERT_DC_GRAD - && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_DC_GRAD) + && (isInnerBoundaryFlagSet(INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = -1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); cvec[ix] = 1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); } - } else if (inner_boundary_flags & INVERT_DC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = -1.; cvec[ix] = 1.; } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; bvec[ix] = 1.0 / sqrt(coords->g_22(ix, jy)); cvec[ix] = -1.0 / sqrt(coords->g_22(ix + 1, jy)); } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; bvec[ix] = sqrt(coords->g_22(ix, jy)); cvec[ix] = -sqrt(coords->g_22(ix + 1, jy)); } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) { // Decaying boundary conditions BoutReal k = 0.0; if (a != nullptr) { @@ -548,7 +545,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco bvec[ix] = 1.; cvec[ix] = -exp(-k * coords->dx(ix, jy) / sqrt(coords->g11(ix, jy))); } - } else if (inner_boundary_flags & INVERT_IN_CYLINDER) { + } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) { // Condition for inner radial boundary for cylindrical coordinates /* Explanation: * The discrete fourier transform is defined as @@ -602,8 +599,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // AC i.e. kz =/= 0 (all other modes than the offset mode) else { - if (inner_boundary_flags & INVERT_AC_GRAD - && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD) + && (isInnerBoundaryFlagSet(INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = dcomplex(0., 0.); @@ -611,14 +609,14 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco dcomplex(-1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); cvec[ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); } - } else if (inner_boundary_flags & INVERT_AC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = dcomplex(0., 0.); bvec[ix] = dcomplex(-1., 0.); cvec[ix] = dcomplex(1., 0.); } - } else if (inner_boundary_flags & INVERT_AC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) { // Use decaying zero-Laplacian solution in the boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; @@ -626,9 +624,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco cvec[ix] = -exp(-1.0 * sqrt(coords->g33(ix, jy) / coords->g11(ix, jy)) * kwave * coords->dx(ix, jy)); } - } else if (inner_boundary_flags & INVERT_IN_CYLINDER) { + } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) { // Condition for inner radial boundary for cylindrical coordinates - // Explanation under "if (inner_boundary_flags & INVERT_IN_CYLINDER)" + // Explanation under "if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER))" for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = 1.; @@ -655,7 +653,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // If no user specified value is set on outer boundary, set the last // element in b (in the equation AX=b) to 0 - if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { for (int ix = 0; ix < outbndry; ix++) { bk[ncx - ix] = 0.; } @@ -664,36 +662,37 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // DC i.e. kz = 0 (the offset mode) if (kz == 0) { - if (outer_boundary_flags & INVERT_DC_GRAD - && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) { + if (isOuterBoundaryFlagSet(INVERT_DC_GRAD) + && (isOuterBoundaryFlagSet(INVERT_SET) + || isOuterBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { - avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); - bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); + avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); + bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_DC_GRAD) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = dcomplex(1., 0.); bvec[ncx - ix] = dcomplex(-1., 0.); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < inbndry; ix++) { - avec[ncx - ix] = 1.0 / sqrt(coords->g_22(ncx - ix + 1, jy)); - bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(ncx - ix, jy)); + avec[ncx - ix] = 1.0 / sqrt(coords->g_22(xe - ix - 1, jy)); + bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(xe - ix, jy)); cvec[ncx - ix] = 0.0; } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < inbndry; ix++) { - avec[ncx - ix] = sqrt(coords->g_22(ncx - ix - 1, jy)); - bvec[ncx - ix] = -sqrt(coords->g_22(ncx - ix, jy)); + avec[ncx - ix] = sqrt(coords->g_22(xe - ix - 1, jy)); + bvec[ncx - ix] = -sqrt(coords->g_22(xe - ix, jy)); cvec[ncx - ix] = 0.0; } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_LAP)) { // Decaying boundary conditions BoutReal k = 0.0; if (a != nullptr) { @@ -707,7 +706,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco cvec[ncx - ix] = 0.; bvec[ncx - ix] = 1.; avec[ncx - ix] = - -exp(-k * coords->dx(ncx - ix, jy) / sqrt(coords->g11(ncx - ix, jy))); + -exp(-k * coords->dx(xe - ix, jy) / sqrt(coords->g11(xe - ix, jy))); } } else { // Order 2 dirichlet BC (boundary half between points) @@ -722,24 +721,25 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // AC i.e. kz =/= 0 (all other modes than the offset mode) else { - if (outer_boundary_flags & INVERT_AC_GRAD - && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD) + && (isOuterBoundaryFlagSet(INVERT_SET) + || isOuterBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { - avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); - bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); + avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); + bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_AC_GRAD) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = dcomplex(1., 0.); bvec[ncx - ix] = dcomplex(-1., 0.); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_AC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) { // Use decaying zero-Laplacian solution in the boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = @@ -795,6 +795,13 @@ void Laplacian::LaplacianMonitor::outputVars(Options& output_options, laplacian->outputVars(output_options, time_dimension); } +bool Laplacian::isInnerBoundaryFlagSetOnFirstX(int flag) const { + return isInnerBoundaryFlagSet(flag) and localmesh->firstX(); +} +bool Laplacian::isOuterBoundaryFlagSetOnLastX(int flag) const { + return isOuterBoundaryFlagSet(flag) and localmesh->lastX(); +} + /********************************************************************************** * LEGACY INTERFACE * diff --git a/src/mesh/boundary_factory.cxx b/src/mesh/boundary_factory.cxx index 5f5978f132..00282566a9 100644 --- a/src/mesh/boundary_factory.cxx +++ b/src/mesh/boundary_factory.cxx @@ -1,3 +1,5 @@ +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include #include #include @@ -41,10 +43,12 @@ BoundaryFactory::BoundaryFactory() { addMod(new BoundaryFromFieldAligned(), "fromFieldAligned"); // Parallel boundaries - add(new BoundaryOpPar_dirichlet(), "parallel_dirichlet"); - add(new BoundaryOpPar_dirichlet_O3(), "parallel_dirichlet_O3"); - add(new BoundaryOpPar_dirichlet_interp(), "parallel_dirichlet_interp"); - add(new BoundaryOpPar_neumann(), "parallel_neumann"); + add(new BoundaryOpPar_dirichlet_o1(), "parallel_dirichlet_o1"); + add(new BoundaryOpPar_dirichlet_o2(), "parallel_dirichlet_o2"); + add(new BoundaryOpPar_dirichlet_o3(), "parallel_dirichlet_o3"); + add(new BoundaryOpPar_neumann_o1(), "parallel_neumann_o1"); + add(new BoundaryOpPar_neumann_o2(), "parallel_neumann_o2"); + add(new BoundaryOpPar_neumann_o3(), "parallel_neumann_o3"); } BoundaryFactory::~BoundaryFactory() { diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx index 01f0fe46ca..4e515449ca 100644 --- a/src/mesh/coordinates.cxx +++ b/src/mesh/coordinates.cxx @@ -1502,7 +1502,7 @@ Field3D Coordinates::DDY(const Field3D& f, CELL_LOC outloc, const std::string& m if (!f.hasParallelSlices() and !transform->canToFromFieldAligned()) { Field3D f_parallel = f; transform->calcParallelSlices(f_parallel); - f_parallel.applyParallelBoundary("parallel_neumann"); + f_parallel.applyParallelBoundary("parallel_neumann_o2"); return bout::derivatives::index::DDY(f_parallel, outloc, method, region); } #endif @@ -1908,7 +1908,7 @@ Coordinates::Grad2_par2_DDY_invSg(CELL_LOC outloc, const std::string& method) co // Communicate to get parallel slices localmesh->communicate(*invSgCache); - invSgCache->applyParallelBoundary("parallel_neumann"); + invSgCache->applyParallelBoundary("parallel_neumann_o2"); // cache auto ptr = std::make_unique(); diff --git a/src/mesh/fv_ops.cxx b/src/mesh/fv_ops.cxx index 0a5d5f9624..cd5b924e9e 100644 --- a/src/mesh/fv_ops.cxx +++ b/src/mesh/fv_ops.cxx @@ -22,7 +22,7 @@ Slices makeslices(bool use_slices, const T& field) { namespace FV { -// Div ( a Grad_perp(f) ) -- ∇⊥ ( a ⋅ ∇⊥ f) -- Vorticity +// Div ( a Grad_perp(f) ) -- ∇ ⋅ ( a ∇⊥ f) -- Vorticity Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { ASSERT2(a.getLocation() == f.getLocation()); diff --git a/src/mesh/impls/bout/boutmesh.cxx b/src/mesh/impls/bout/boutmesh.cxx index 956aba0f79..16061cd47e 100644 --- a/src/mesh/impls/bout/boutmesh.cxx +++ b/src/mesh/impls/bout/boutmesh.cxx @@ -35,6 +35,7 @@ #include "boutmesh.hxx" +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include @@ -80,9 +82,6 @@ BoutMesh::~BoutMesh() { for (const auto& bndry : boundary) { delete bndry; } - for (const auto& bndry : par_boundary) { - delete bndry; - } if (comm_x != MPI_COMM_NULL) { MPI_Comm_free(&comm_x); @@ -3011,11 +3010,36 @@ RangeIterator BoutMesh::iterateBndryUpperY() const { std::vector BoutMesh::getBoundaries() { return boundary; } -std::vector BoutMesh::getBoundariesPar() { return par_boundary; } +std::vector> +BoutMesh::getBoundariesPar(BoundaryParType type) { + return par_boundary[static_cast(type)]; +} -void BoutMesh::addBoundaryPar(BoundaryRegionPar* bndry) { +void BoutMesh::addBoundaryPar(std::shared_ptr bndry, + BoundaryParType type) { output_info << "Adding new parallel boundary: " << bndry->label << endl; - par_boundary.push_back(bndry); + switch (type) { + case BoundaryParType::xin_fwd: + par_boundary[static_cast(BoundaryParType::xin)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::fwd)].push_back(bndry); + break; + case BoundaryParType::xin_bwd: + par_boundary[static_cast(BoundaryParType::xin)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::bwd)].push_back(bndry); + break; + case BoundaryParType::xout_fwd: + par_boundary[static_cast(BoundaryParType::xout)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::fwd)].push_back(bndry); + break; + case BoundaryParType::xout_bwd: + par_boundary[static_cast(BoundaryParType::xout)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::bwd)].push_back(bndry); + break; + default: + throw BoutException("Unexpected type of boundary {}", toString(type)); + } + par_boundary[static_cast(type)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::all)].push_back(bndry); } Field3D BoutMesh::smoothSeparatrix(const Field3D& f) { diff --git a/src/mesh/impls/bout/boutmesh.hxx b/src/mesh/impls/bout/boutmesh.hxx index 59c6ecbfbd..cc674d401a 100644 --- a/src/mesh/impls/bout/boutmesh.hxx +++ b/src/mesh/impls/bout/boutmesh.hxx @@ -158,8 +158,10 @@ public: // Boundary regions std::vector getBoundaries() override; - std::vector getBoundariesPar() override; - void addBoundaryPar(BoundaryRegionPar* bndry) override; + std::vector> + getBoundariesPar(BoundaryParType type) override; + void addBoundaryPar(std::shared_ptr bndry, + BoundaryParType type) override; std::set getPossibleBoundaries() const override; Field3D smoothSeparatrix(const Field3D& f) override; @@ -393,8 +395,10 @@ protected: void addBoundaryRegions(); private: - std::vector boundary; // Vector of boundary regions - std::vector par_boundary; // Vector of parallel boundary regions + std::vector boundary; // Vector of boundary regions + std::array>, + static_cast(BoundaryParType::SIZE)> + par_boundary; // Vector of parallel boundary regions ////////////////////////////////////////////////// // Communications diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx index 23b2b91eab..cb8c19bbd7 100644 --- a/src/mesh/parallel/fci.cxx +++ b/src/mesh/parallel/fci.cxx @@ -47,9 +47,9 @@ #include -FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, - int offset_, BoundaryRegionPar* inner_boundary, - BoundaryRegionPar* outer_boundary, bool zperiodic) +FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& options, + int offset_, const std::shared_ptr& inner_boundary, + const std::shared_ptr& outer_boundary, bool zperiodic) : map_mesh(mesh), offset(offset_), region_no_boundary(map_mesh.getRegion("RGN_NOBNDRY")), corner_boundary_mask(map_mesh) { @@ -222,13 +222,16 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, const BoutReal dx = (dZ_dz * dR - dR_dz * dZ) / det; const BoutReal dz = (dR_dx * dZ - dZ_dx * dR) / det; - // Negative xt_prime means we've hit the inner boundary, otherwise - // the outer boundary - auto* boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary; + // Negative xt_prime means we've hit the inner boundary, otherwise the + // outer boundary. However, if any of the surrounding points are negative, + // that also means inner. So to differentiate between inner and outer we + // need at least 2 points in the domain. + ASSERT2(map_mesh.xend - map_mesh.xstart >= 2); + auto boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary; boundary->add_point(x, y, z, x + dx, y + 0.5 * offset, - z + dz, // Intersection point in local index space - 0.5 * dy[i], // Distance to intersection - PI // Right-angle intersection + z + dz, // Intersection point in local index space + 0.5, // Distance to intersection + 1 // Default to that there is a point in the other direction ); } region_no_boundary = region_no_boundary.mask(to_remove); diff --git a/src/mesh/parallel/fci.hxx b/src/mesh/parallel/fci.hxx index dd647d939d..3ec3321a6a 100644 --- a/src/mesh/parallel/fci.hxx +++ b/src/mesh/parallel/fci.hxx @@ -44,8 +44,8 @@ class FCIMap { public: FCIMap() = delete; FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, int offset, - BoundaryRegionPar* inner_boundary, BoundaryRegionPar* outer_boundary, - bool zperiodic); + const std::shared_ptr& inner_boundary, + const std::shared_ptr& outer_boundary, bool zperiodic); // The mesh this map was created on Mesh& map_mesh; @@ -79,19 +79,19 @@ public: FCITransform::checkInputGrid(); auto forward_boundary_xin = - new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh); - auto backward_boundary_xin = - new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh); + std::make_shared("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh); + auto backward_boundary_xin = std::make_shared( + "FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh); auto forward_boundary_xout = - new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh); - auto backward_boundary_xout = - new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh); + std::make_shared("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh); + auto backward_boundary_xout = std::make_shared( + "FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh); // Add the boundary region to the mesh's vector of parallel boundaries - mesh.addBoundaryPar(forward_boundary_xin); - mesh.addBoundaryPar(backward_boundary_xin); - mesh.addBoundaryPar(forward_boundary_xout); - mesh.addBoundaryPar(backward_boundary_xout); + mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd); + mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd); + mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd); + mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xout_bwd); field_line_maps.reserve(mesh.ystart * 2); for (int offset = 1; offset < mesh.ystart + 1; ++offset) { @@ -100,6 +100,22 @@ public: field_line_maps.emplace_back(mesh, dy, options, -offset, backward_boundary_xin, backward_boundary_xout, zperiodic); } + ASSERT0(mesh.ystart == 1); + std::shared_ptr bndries[]{ + forward_boundary_xin, forward_boundary_xout, backward_boundary_xin, + backward_boundary_xout}; + for (auto& bndry : bndries) { + for (const auto& bndry2 : bndries) { + if (bndry->dir == bndry2->dir) { + continue; + } + for (bndry->first(); !bndry->isDone(); bndry->next()) { + if (bndry2->contains(*bndry)) { + bndry->setValid(0); + } + } + } + } } void calcParallelSlices(Field3D& f) override; diff --git a/src/mesh/parallel/shiftedmetric.cxx b/src/mesh/parallel/shiftedmetric.cxx index 84084d9cbb..382052047d 100644 --- a/src/mesh/parallel/shiftedmetric.cxx +++ b/src/mesh/parallel/shiftedmetric.cxx @@ -6,7 +6,9 @@ * */ +#include "bout/parallel_boundary_region.hxx" #include "bout/paralleltransform.hxx" +#include #include #include #include diff --git a/src/mesh/parallel/shiftedmetricinterp.cxx b/src/mesh/parallel/shiftedmetricinterp.cxx index 214f7ded76..7f3637e79c 100644 --- a/src/mesh/parallel/shiftedmetricinterp.cxx +++ b/src/mesh/parallel/shiftedmetricinterp.cxx @@ -29,7 +29,7 @@ #include "shiftedmetricinterp.hxx" #include "bout/constants.hxx" -#include "bout/mask.hxx" +#include "bout/parallel_boundary_region.hxx" ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, Field2D zShift_in, BoutReal zlength_in, @@ -114,11 +114,16 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, interp_from_aligned->calcWeights(zt_prime_from); + int yvalid = mesh.LocalNy - 2 * mesh.ystart; + // avoid overflow - no stencil need more than 5 points + if (yvalid > 20) { + yvalid = 20; + } // Create regions for parallel boundary conditions Field2D dy; mesh.get(dy, "dy", 1.); - auto forward_boundary_xin = - new BoundaryRegionPar("parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh); + auto forward_boundary_xin = std::make_shared( + "parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh); for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { forward_boundary_xin->add_point( @@ -128,14 +133,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)), 0.25 - * (dy(it.ind, mesh.yend) // dy/2 - + dy(it.ind, mesh.yend + 1)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)), // length + yvalid); } } - auto backward_boundary_xin = - new BoundaryRegionPar("parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh); + auto backward_boundary_xin = std::make_shared( + "parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh); for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { backward_boundary_xin->add_point( @@ -145,15 +149,14 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)), 0.25 - * (dy(it.ind, mesh.ystart - 1) // dy/2 - + dy(it.ind, mesh.ystart)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart)), + yvalid); } } // Create regions for parallel boundary conditions - auto forward_boundary_xout = - new BoundaryRegionPar("parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh); + auto forward_boundary_xout = std::make_shared( + "parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh); for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { forward_boundary_xout->add_point( @@ -163,14 +166,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)), 0.25 - * (dy(it.ind, mesh.yend) // dy/2 - + dy(it.ind, mesh.yend + 1)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)), + yvalid); } } - auto backward_boundary_xout = - new BoundaryRegionPar("parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh); + auto backward_boundary_xout = std::make_shared( + "parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh); for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { backward_boundary_xout->add_point( @@ -180,18 +182,17 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)), 0.25 - * (dy(it.ind, mesh.ystart - 1) // dy/2 - + dy(it.ind, mesh.ystart)), - 0. // angle? - ); + * (dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart) // dy/2 + + 1), + yvalid); } } // Add the boundary region to the mesh's vector of parallel boundaries - mesh.addBoundaryPar(forward_boundary_xin); - mesh.addBoundaryPar(backward_boundary_xin); - mesh.addBoundaryPar(forward_boundary_xout); - mesh.addBoundaryPar(backward_boundary_xout); + mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd); + mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd); + mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd); + mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xin_bwd); } void ShiftedMetricInterp::checkInputGrid() { diff --git a/src/mesh/parallel_boundary_op.cxx b/src/mesh/parallel_boundary_op.cxx index 8b2c294a4a..ebd9852791 100644 --- a/src/mesh/parallel_boundary_op.cxx +++ b/src/mesh/parallel_boundary_op.cxx @@ -6,18 +6,15 @@ #include "bout/output.hxx" BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) { - - Mesh* mesh = bndry.localmesh; - BoutReal value; switch (value_type) { case ValueType::GEN: - return gen_values->generate( - bout::generator::Context(bndry.s_x, bndry.s_y, bndry.s_z, CELL_CENTRE, mesh, t)); + return gen_values->generate(bout::generator::Context( + bndry.s_x(), bndry.s_y(), bndry.s_z(), CELL_CENTRE, bndry.localmesh, t)); case ValueType::FIELD: // FIXME: Interpolate to s_x, s_y, s_z... - value = (*field_values)(bndry.x, bndry.y, bndry.z); + value = (*field_values)[bndry.ind()]; return value; case ValueType::REAL: return real_value; @@ -25,123 +22,3 @@ BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) { throw BoutException("Invalid value_type encountered in BoundaryOpPar::getValue"); } } - -////////////////////////////////////////// -// Dirichlet boundary - -void BoundaryOpPar_dirichlet::apply(Field3D& f, BoutReal t) { - Field3D& f_next = f.ynext(bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal value = getValue(*bndry, t); - - // Scale the field and normalise to the desired value - BoutReal y_prime = bndry->length; - BoutReal f2 = (f(x, y, z) - value) * (coord.dy(x, y, z) - y_prime) / y_prime; - - f_next(x, y + bndry->dir, z) = value - f2; - } -} - -////////////////////////////////////////// -// Dirichlet boundary - Third order - -void BoundaryOpPar_dirichlet_O3::apply(Field3D& f, BoutReal t) { - - Field3D& f_next = f.ynext(bndry->dir); - Field3D& f_prev = f.ynext(-bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal fb = getValue(*bndry, t); - BoutReal f1 = f_prev(x, y - bndry->dir, z); - BoutReal f2 = f(x, y, z); - BoutReal l1 = coord.dy(x, y, z); - BoutReal l2 = bndry->length; - BoutReal l3 = coord.dy(x, y, z) - l2; - - BoutReal denom = (l1 * l1 * l2 + l1 * l2 * l2); - BoutReal term1 = (l2 * l2 * l3 + l2 * l3 * l3); - BoutReal term2 = l1 * (l1 + l2 + l3) * (l2 + l3); - BoutReal term3 = l3 * ((l1 + l2) * l3 + (l1 + l2) * (l1 + l2)); - - f_next(x, y + bndry->dir, z) = (term1 * f1 + term2 * fb - term3 * f2) / denom; - } -} - -////////////////////////////////////////// -// Dirichlet with interpolation - -void BoundaryOpPar_dirichlet_interp::apply(Field3D& f, BoutReal t) { - - Field3D& f_next = f.ynext(bndry->dir); - Field3D& f_prev = f.ynext(-bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal fs = getValue(*bndry, t); - - // Scale the field and normalise to the desired value - BoutReal dy = coord.dy(x, y, z); - BoutReal s = bndry->length * dy; - - f_next(x, y + bndry->dir, z) = - f_prev(x, y - bndry->dir, z) * (1. - (2. * s / (dy + s))) - + 2. * f(x, y, z) * ((s - dy) / s) + fs * (dy / s - (2. / s + 1.)); - } -} - -////////////////////////////////////////// -// Neumann boundary - -void BoundaryOpPar_neumann::apply(Field3D& f, BoutReal t) { - TRACE("BoundaryOpPar_neumann::apply"); - - Field3D& f_next = f.ynext(bndry->dir); - f_next.allocate(); // Ensure unique before modifying - - Coordinates& coord = *(f.getCoordinates()); - - // If point is in boundary, then fill in f_next such that the derivative - // would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal value = getValue(*bndry, t); - BoutReal dy = coord.dy(x, y, z); - - f_next(x, y + bndry->dir, z) = f(x, y, z) + bndry->dir * value * dy; - } -} diff --git a/src/mesh/parallel_boundary_region.cxx b/src/mesh/parallel_boundary_region.cxx index 3f77d96737..e69de29bb2 100644 --- a/src/mesh/parallel_boundary_region.cxx +++ b/src/mesh/parallel_boundary_region.cxx @@ -1,37 +0,0 @@ -#include "bout/parallel_boundary_region.hxx" - -void BoundaryRegionPar::add_point(const int jx, const int jy, const int jz, - const BoutReal x, const BoutReal y, const BoutReal z, - const BoutReal length, const BoutReal angle) { - bndry_points.push_back({{jx, jy, jz}, {x, y, z}, length, angle}); -} - -void BoundaryRegionPar::first() { - bndry_position = begin(bndry_points); - if (!isDone()) { - x = bndry_position->index.jx; - y = bndry_position->index.jy; - z = bndry_position->index.jz; - s_x = bndry_position->intersection.s_x; - s_y = bndry_position->intersection.s_y; - s_z = bndry_position->intersection.s_z; - length = bndry_position->length; - angle = bndry_position->angle; - } -} - -void BoundaryRegionPar::next() { - ++bndry_position; - if (!isDone()) { - x = bndry_position->index.jx; - y = bndry_position->index.jy; - z = bndry_position->index.jz; - s_x = bndry_position->intersection.s_x; - s_y = bndry_position->intersection.s_y; - s_z = bndry_position->intersection.s_z; - length = bndry_position->length; - angle = bndry_position->angle; - } -} - -bool BoundaryRegionPar::isDone() { return (bndry_position == end(bndry_points)); } diff --git a/src/mesh/parallel_boundary_stencil.cxx.py b/src/mesh/parallel_boundary_stencil.cxx.py new file mode 100644 index 0000000000..d0988ee099 --- /dev/null +++ b/src/mesh/parallel_boundary_stencil.cxx.py @@ -0,0 +1,62 @@ +import os +from tempfile import NamedTemporaryFile as tmpf +from stencils_sympy import dirichlet, neumann, simp, Symbol, Matrix, ccode + + +def gen_code(order, matrix_type): + x = [Symbol("spacing%d" % i) for i in range(order)] + matrix = matrix_type(x) + A = Matrix(order, order, matrix) + + try: + iA = A.inv() + except: + import sys + + print(A, matrix, file=sys.stderr) + raise + return ccode(simp(sum([iA[0, i] * Symbol("value%d" % i) for i in range(order)]))) + + +def run(cmd): + print(cmd) + out = os.system(cmd) + assert out == 0 + + +if __name__ == "__main__": + with tmpf("w", dir=".", delete=False) as f: + f.write("namespace {\n") + f.write( + """ +inline BoutReal pow(BoutReal val, int exp) { + //constexpr int expval = exp; + //static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); + if (exp == 2) { + return val * val; + } + ASSERT3(exp == 3); + return val * val * val; +} +""" + ) + + for order in range(1, 4): + for matrix in dirichlet, neumann: + if order == 1 and matrix == neumann: + continue + print(f"generating {matrix.name}_o{order}") + args = ", ".join( + [ + "BoutReal spacing%d, BoutReal value%d" % (i, i) + for i in range(order) + ] + ) + f.write( + f"inline BoutReal stencil_{matrix.name}_o{order}({args}) {{\n return " + ) + f.write(gen_code(order, matrix)) + f.write(";\n}\n") + f.write("}\n") + run("clang-format -i " + f.name) + run(f"mv {f.name} {__file__[:-3]}") diff --git a/src/mesh/stencils.md b/src/mesh/stencils.md new file mode 100644 index 0000000000..0c7d181481 --- /dev/null +++ b/src/mesh/stencils.md @@ -0,0 +1,29 @@ +Notes concerning the generation of stencils +================ + +We want to create a Taylor function +$f(x-x_0)=\sum_i=0^n \frac{1}{i!}f_i(x-x_0)^i$ where $n$ +is the order of the function, $x_0$ is the point in the boundary +where we want to calculate the function. $f_i$ are some coefficients +that we need to determine. To be precise, only $f_0$ needs to be +determined. +We know that the function has at some points certain values. If the +value at some distance `spacing.f0` is a given value `val` then we +can build a linear system of equations using the above formula. +If rather the derivative is given, the above equations needs to be +differentiated once. + +stencils_sympy.py calculates the coefficients of the above matrix +which represents our system of equations. The derivative is simply +one the factor of the next smaller term (or zero if the there is no +smaller one). This is what is calculated by `taylor`, `dirichlet` +and `neumann`, the respective matrix coefficients. + +sympy does all the heavy lifting on analytically inverting the +matrix. + +With the analytic inversion we can put in the numerical offsets +`spacing.f?` in C++ and get a fast expression for the respective +coefficients. As mentioned before, we do not need the full inverse, +just the first row, as we only care about the value, not about it's +derivative. diff --git a/src/mesh/stencils_sympy.py b/src/mesh/stencils_sympy.py new file mode 100644 index 0000000000..64677f1985 --- /dev/null +++ b/src/mesh/stencils_sympy.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +from sympy import Symbol, Eq +from sympy.matrices import Matrix +from sympy.printing import ccode +from sympy.simplify import combsimp as simp +from sympy.utilities.codegen import codegen + + +def pow(a, b): + if b == 0: + return "1" + if b == 1: + return a + else: + return "%s**%d" % (a, b) + + +def factorial(a): + if a == 0 or a == 1: + return 1 + else: + assert a > 0 + return a * factorial(a - 1) + + +def gen_code(order, matrix_type): + x = [Symbol("spacing.f%d" % i) for i in range(order)] + matrix = matrix_type(x) + A = Matrix(order, order, matrix) + + try: + iA = A.inv() + except: + import sys + + print(A, matrix, file=sys.stderr) + raise + ret = "" + for i in range(order): + ret += ccode(simp(iA[0, i]), assign_to="facs.f%d" % i) + ret += "\n" + return ret + + +def taylor(x, i, j): + if j >= 0: + return x[i] ** j / factorial(j) + else: + return 0 + + +class dirichlet: + name = "dirichlet" + + def __init__(self, x): + self.x = x + + def __call__(self, i, j): + return taylor(self.x, i, j) + + +class neumann: + name = "neumann" + + def __init__(self, x): + self.x = x + + def __call__(self, i, j): + if i == 0: + return taylor(self.x, i, j - 1) + else: + return taylor(self.x, i, j) + + +if __name__ == "__main__": + print(gen_code(3, dirichlet)) diff --git a/src/solver/impls/arkode/arkode.cxx b/src/solver/impls/arkode/arkode.cxx index aabe2ae050..440f8f54f1 100644 --- a/src/solver/impls/arkode/arkode.cxx +++ b/src/solver/impls/arkode/arkode.cxx @@ -4,9 +4,7 @@ * NOTE: ARKode is still in beta testing so use with cautious optimism * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu - * - * Contact: Nick Walkden, nick.walkden@ccfe.ac.uk + * Copyright 2010-2024 BOUT++ contributors * * This file is part of BOUT++. * @@ -31,6 +29,7 @@ #if BOUT_HAS_ARKODE +#include "bout/bout_enum_class.hxx" #include "bout/boutcomm.hxx" #include "bout/boutexception.hxx" #include "bout/field3d.hxx" @@ -41,17 +40,7 @@ #include "bout/unused.hxx" #include "bout/utils.hxx" -#if SUNDIALS_VERSION_MAJOR >= 4 #include -#else -#include -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#else -#include -#endif -#endif - #include #include #include @@ -61,110 +50,21 @@ class Field2D; -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) +// NOLINTBEGIN(readability-identifier-length) +namespace { +int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); -#ifndef ARKODEINT -#if SUNDIALS_VERSION_MAJOR < 3 -using ARKODEINT = bout::utils::function_traits::arg_t<0>; -#else -using ARKODEINT = sunindextype; -#endif -#endif +int arkode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, + void* user_data); +int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int lr, void* user_data); -static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); - -static int arkode_bbd_rhs(ARKODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du, - void* user_data); -static int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, - BoutReal gamma, BoutReal delta, int lr, void* user_data); -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int arkode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int lr, - void* user_data, N_Vector UNUSED(tmp)) { - return arkode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data); -} -#else -// Alias for newer versions -constexpr auto& arkode_pre_shim = arkode_pre; -#endif - -static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy, - void* user_data, N_Vector tmp); -#if SUNDIALS_VERSION_MAJOR < 4 -// Shim for earlier versions -inline int ARKStepSetJacTimes(void* arkode_mem, std::nullptr_t, - ARKSpilsJacTimesVecFn jtimes) { -#if SUNDIALS_VERSION_MAJOR < 3 - return ARKSpilsSetJacTimesVecFn(arkode_mem, jtimes); -#else - return ARKSpilsSetJacTimes(arkode_mem, nullptr, jtimes); -#endif -} -#endif - -#if SUNDIALS_VERSION_MAJOR < 4 -void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0) { - auto arkode_mem = ARKodeCreate(); - - if (arkode_mem == nullptr) { - throw BoutException("ARKodeCreate failed\n"); - } - if (ARKodeInit(arkode_mem, fe, fi, t0, y0) != ARK_SUCCESS) { - throw BoutException("ARKodeInit failed\n"); - } - return arkode_mem; -} - -#if SUNDIALS_VERSION_MAJOR == 3 -int ARKStepSetLinearSolver(void* arkode_mem, SUNLinearSolver LS, std::nullptr_t) { - return ARKSpilsSetLinearSolver(arkode_mem, LS); -} -#endif - -// Aliases for older versions -// In SUNDIALS 4, ARKode has become ARKStep, hence all the renames -constexpr auto& ARKStepEvolve = ARKode; -constexpr auto& ARKStepFree = ARKodeFree; -constexpr auto& ARKStepGetCurrentTime = ARKodeGetCurrentTime; -constexpr auto& ARKStepGetDky = ARKodeGetDky; -constexpr auto& ARKStepGetLastStep = ARKodeGetLastStep; -constexpr auto& ARKStepGetNumLinIters = ARKSpilsGetNumLinIters; -constexpr auto& ARKStepGetNumNonlinSolvIters = ARKodeGetNumNonlinSolvIters; -constexpr auto& ARKStepGetNumPrecEvals = ARKSpilsGetNumPrecEvals; -constexpr auto& ARKStepGetNumRhsEvals = ARKodeGetNumRhsEvals; -constexpr auto& ARKStepGetNumSteps = ARKodeGetNumSteps; -constexpr auto& ARKStepReInit = ARKodeReInit; -constexpr auto& ARKStepSStolerances = ARKodeSStolerances; -constexpr auto& ARKStepSVtolerances = ARKodeSVtolerances; -constexpr auto& ARKStepSetAdaptivityMethod = ARKodeSetAdaptivityMethod; -constexpr auto& ARKStepSetCFLFraction = ARKodeSetCFLFraction; -constexpr auto& ARKStepSetEpsLin = ARKSpilsSetEpsLin; -constexpr auto& ARKStepSetExplicit = ARKodeSetExplicit; -constexpr auto& ARKStepSetFixedPoint = ARKodeSetFixedPoint; -constexpr auto& ARKStepSetFixedStep = ARKodeSetFixedStep; -constexpr auto& ARKStepSetImEx = ARKodeSetImEx; -constexpr auto& ARKStepSetImplicit = ARKodeSetImplicit; -constexpr auto& ARKStepSetInitStep = ARKodeSetInitStep; -constexpr auto& ARKStepSetLinear = ARKodeSetLinear; -constexpr auto& ARKStepSetMaxNumSteps = ARKodeSetMaxNumSteps; -constexpr auto& ARKStepSetMaxStep = ARKodeSetMaxStep; -constexpr auto& ARKStepSetMinStep = ARKodeSetMinStep; -constexpr auto& ARKStepSetOptimalParams = ARKodeSetOptimalParams; -constexpr auto& ARKStepSetOrder = ARKodeSetOrder; -constexpr auto& ARKStepSetPreconditioner = ARKSpilsSetPreconditioner; -constexpr auto& ARKStepSetUserData = ARKodeSetUserData; -#endif - -#if SUNDIALS_VERSION_MAJOR < 6 -void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0, - [[maybe_unused]] SUNContext context) { - return ARKStepCreate(fe, fi, t0, y0); -} -#endif +int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy, + void* user_data, N_Vector tmp); +} // namespace +// NOLINTEND(readability-identifier-length) ArkodeSolver::ArkodeSolver(Options* opts) : Solver(opts), diagnose((*options)["diagnose"] @@ -173,11 +73,10 @@ ArkodeSolver::ArkodeSolver(Options* opts) mxsteps((*options)["mxstep"] .doc("Maximum number of steps to take between outputs") .withDefault(500)), - imex((*options)["imex"].doc("Use ImEx capability").withDefault(true)), - solve_explicit( - (*options)["explicit"].doc("Solve only explicit part").withDefault(true)), - solve_implicit( - (*options)["implicit"].doc("Solve only implicit part").withDefault(true)), + treatment((*options)["treatment"] + .doc("Use default capability (imex) or provide a specific treatment: " + "implicit or explicit") + .withDefault(Treatment::ImEx)), set_linear( (*options)["set_linear"] .doc("Use linear implicit solver (only evaluates jacobian inversion once)") @@ -187,14 +86,22 @@ ArkodeSolver::ArkodeSolver(Options* opts) "not recommended except for code comparison") .withDefault(false)), order((*options)["order"].doc("Order of internal step").withDefault(4)), +#if SUNDIALS_TABLE_BY_NAME_SUPPORT + implicit_table((*options)["implicit_table"] + .doc("Name of the implicit Butcher table") + .withDefault("")), + explicit_table((*options)["explicit_table"] + .doc("Name of the explicit Butcher table") + .withDefault("")), +#endif cfl_frac((*options)["cfl_frac"] .doc("Fraction of the estimated explicitly stable step to use") .withDefault(-1.0)), - adap_method((*options)["adap_method"] - .doc("Set timestep adaptivity function: 0 -> PID adaptivity " - "(default); 1 -> PI; 2 -> I; 3 -> explicit Gustafsson; 4 -> " - "implicit Gustafsson; 5 -> ImEx Gustafsson;") - .withDefault(0)), + adap_method( + (*options)["adap_method"] + .doc("Set timestep adaptivity function: pid, pi, i, explicit_gustafsson, " + "implicit_gustafsson, imex_gustafsson.") + .withDefault(AdapMethod::PID)), abstol((*options)["atol"].doc("Absolute tolerance").withDefault(1.0e-12)), reltol((*options)["rtol"].doc("Relative tolerance").withDefault(1.0e-5)), use_vector_abstol((*options)["use_vector_abstol"] @@ -226,7 +133,7 @@ ArkodeSolver::ArkodeSolver(Options* opts) .withDefault(false)), optimize( (*options)["optimize"].doc("Use ARKode optimal parameters").withDefault(false)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = false; // This solver doesn't have constraints // Add diagnostics to output @@ -243,10 +150,14 @@ ArkodeSolver::ArkodeSolver(Options* opts) } ArkodeSolver::~ArkodeSolver() { - N_VDestroy_Parallel(uvec); + N_VDestroy(uvec); ARKStepFree(&arkode_mem); SUNLinSolFree(sun_solver); SUNNonlinSolFree(nonlinear_solver); + +#if SUNDIALS_CONTROLLER_SUPPORT + SUNAdaptController_Destroy(controller); +#endif } /************************************************************************** @@ -274,50 +185,55 @@ int ArkodeSolver::init() { n2Dvars(), neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); - - ASSERT1(solve_explicit or solve_implicit); - - const auto& explicit_rhs = [this]() { - if (imex) { - return arkode_rhs_explicit; - } else { - return solve_explicit ? arkode_rhs : nullptr; - } - }(); - const auto& implicit_rhs = [this]() { - if (imex) { - return arkode_rhs_implicit; - } else { - return solve_implicit ? arkode_rhs : nullptr; - } - }(); - - if ((arkode_mem = ARKStepCreate(explicit_rhs, implicit_rhs, simtime, uvec, suncontext)) - == nullptr) { + save_vars(N_VGetArrayPointer(uvec)); + + switch (treatment) { + case Treatment::ImEx: + arkode_mem = callWithSUNContext(ARKStepCreate, suncontext, arkode_rhs_explicit, + arkode_rhs_implicit, simtime, uvec); + break; + case Treatment::Explicit: + arkode_mem = + callWithSUNContext(ARKStepCreate, suncontext, arkode_rhs, nullptr, simtime, uvec); + break; + case Treatment::Implicit: + arkode_mem = + callWithSUNContext(ARKStepCreate, suncontext, nullptr, arkode_rhs, simtime, uvec); + break; + default: + throw BoutException("Invalid treatment: {}\n", toString(treatment)); + } + if (arkode_mem == nullptr) { throw BoutException("ARKStepCreate failed\n"); } - if (imex and solve_explicit and solve_implicit) { + switch (treatment) { + case Treatment::ImEx: output_info.write("\tUsing ARKode ImEx solver \n"); if (ARKStepSetImEx(arkode_mem) != ARK_SUCCESS) { throw BoutException("ARKStepSetImEx failed\n"); } - } else if (solve_explicit) { + break; + case Treatment::Explicit: output_info.write("\tUsing ARKStep Explicit solver \n"); if (ARKStepSetExplicit(arkode_mem) != ARK_SUCCESS) { throw BoutException("ARKStepSetExplicit failed\n"); } - } else { + break; + case Treatment::Implicit: output_info.write("\tUsing ARKStep Implicit solver \n"); if (ARKStepSetImplicit(arkode_mem) != ARK_SUCCESS) { throw BoutException("ARKStepSetImplicit failed\n"); } + break; + default: + throw BoutException("Invalid treatment: {}\n", toString(treatment)); } // For callbacks, need pointer to solver object @@ -325,11 +241,8 @@ int ArkodeSolver::init() { throw BoutException("ARKStepSetUserData failed\n"); } - if (set_linear) { - output.write("\tSetting ARKStep implicit solver to Linear\n"); - if (ARKStepSetLinear(arkode_mem, 1) != ARK_SUCCESS) { - throw BoutException("ARKStepSetLinear failed\n"); - } + if (ARKStepSetLinear(arkode_mem, set_linear) != ARK_SUCCESS) { + throw BoutException("ARKStepSetLinear failed\n"); } if (fixed_step) { @@ -344,13 +257,84 @@ int ArkodeSolver::init() { throw BoutException("ARKStepSetOrder failed\n"); } +#if SUNDIALS_TABLE_BY_NAME_SUPPORT + if (!implicit_table.empty() || !explicit_table.empty()) { + if (ARKStepSetTableName( + arkode_mem, + implicit_table.empty() ? "ARKODE_DIRK_NONE" : implicit_table.c_str(), + explicit_table.empty() ? "ARKODE_ERK_NONE" : explicit_table.c_str()) + != ARK_SUCCESS) { + throw BoutException("ARKStepSetTableName failed\n"); + } + } +#endif + if (ARKStepSetCFLFraction(arkode_mem, cfl_frac) != ARK_SUCCESS) { throw BoutException("ARKStepSetCFLFraction failed\n"); } - if (ARKStepSetAdaptivityMethod(arkode_mem, adap_method, 1, 1, nullptr) != ARK_SUCCESS) { +#if SUNDIALS_CONTROLLER_SUPPORT + switch (adap_method) { + case AdapMethod::PID: + controller = SUNAdaptController_PID(suncontext); + break; + case AdapMethod::PI: + controller = SUNAdaptController_PI(suncontext); + break; + case AdapMethod::I: + controller = SUNAdaptController_I(suncontext); + break; + case AdapMethod::Explicit_Gustafsson: + controller = SUNAdaptController_ExpGus(suncontext); + break; + case AdapMethod::Implicit_Gustafsson: + controller = SUNAdaptController_ImpGus(suncontext); + break; + case AdapMethod::ImEx_Gustafsson: + controller = SUNAdaptController_ImExGus(suncontext); + break; + default: + throw BoutException("Invalid adap_method\n"); + } + + if (ARKStepSetAdaptController(arkode_mem, controller) != ARK_SUCCESS) { + throw BoutException("ARKStepSetAdaptController failed\n"); + } + + if (ARKStepSetAdaptivityAdjustment(arkode_mem, 0) != ARK_SUCCESS) { + throw BoutException("ARKStepSetAdaptivityAdjustment failed\n"); + } +#else + int adap_method_int; + // Could cast to underlying integer, but this is more explicit + switch (adap_method) { + case AdapMethod::PID: + adap_method_int = 0; + break; + case AdapMethod::PI: + adap_method_int = 1; + break; + case AdapMethod::I: + adap_method_int = 2; + break; + case AdapMethod::Explicit_Gustafsson: + adap_method_int = 3; + break; + case AdapMethod::Implicit_Gustafsson: + adap_method_int = 4; + break; + case AdapMethod::ImEx_Gustafsson: + adap_method_int = 5; + break; + default: + throw BoutException("Invalid adap_method\n"); + } + + if (ARKStepSetAdaptivityMethod(arkode_mem, adap_method_int, 1, 1, nullptr) + != ARK_SUCCESS) { throw BoutException("ARKStepSetAdaptivityMethod failed\n"); } +#endif if (use_vector_abstol) { std::vector f2dtols; @@ -374,18 +358,18 @@ int ArkodeSolver::init() { return Options::root()[f3.name]["atol"].withDefault(abstol); }); - N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector abstolvec = N_VClone(uvec); if (abstolvec == nullptr) { throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n"); } - set_abstol_values(NV_DATA_P(abstolvec), f2dtols, f3dtols); + set_abstol_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols); if (ARKStepSVtolerances(arkode_mem, reltol, abstolvec) != ARK_SUCCESS) { throw BoutException("ARKStepSVtolerances failed\n"); } - N_VDestroy_Parallel(abstolvec); + N_VDestroy(abstolvec); } else { if (ARKStepSStolerances(arkode_mem, reltol, abstol) != ARK_SUCCESS) { throw BoutException("ARKStepSStolerances failed\n"); @@ -414,130 +398,94 @@ int ArkodeSolver::init() { } } - // ARKStepSetPredictorMethod(arkode_mem,4); - -#if SUNDIALS_VERSION_MAJOR < 4 - if (fixed_point) { - output.write("\tUsing accelerated fixed point solver\n"); - if (ARKodeSetFixedPoint(arkode_mem, 3.0)) { - throw BoutException("ARKodeSetFixedPoint failed\n"); - } - } else { - output.write("\tUsing Newton iteration\n"); - if (ARKodeSetNewton(arkode_mem)) { - throw BoutException("ARKodeSetNewton failed\n"); - } - } -#else - if (fixed_point) { - output.write("\tUsing accelerated fixed point solver\n"); - if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 3, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS fixed point nonlinear solver failed\n"); - } - } else { - output.write("\tUsing Newton iteration\n"); - if ((nonlinear_solver = SUNNonlinSol_Newton(uvec, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS Newton nonlinear solver failed\n"); - } - } - if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) { - throw BoutException("ARKStepSetNonlinearSolver failed\n"); - } -#endif - - /// Set Preconditioner - if (use_precon) { - const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT; - -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) { - throw BoutException("ARKStepSetLinearSolver failed\n"); - } -#else - if (ARKSpgmr(arkode_mem, prectype, maxl) != ARKSPILS_SUCCESS) { - throw BoutException("ARKSpgmr failed\n"); - } -#endif - - if (!hasPreconditioner()) { - output.write("\tUsing BBD preconditioner\n"); - - /// Get options - // Compute band_width_default from actually added fields, to allow for multiple - // Mesh objects - // - // Previous implementation was equivalent to: - // int MXSUB = mesh->xend - mesh->xstart + 1; - // int band_width_default = n3Dvars()*(MXSUB+2); - const int band_width_default = std::accumulate( - begin(f3d), end(f3d), 0, [](int a, const VarStr& fvar) { - Mesh* localmesh = fvar.var->getMesh(); - return a + localmesh->xend - localmesh->xstart + 3; - }); - - const auto mudq = (*options)["mudq"] - .doc("Upper half-bandwidth to be used in the difference " - "quotient Jacobian approximation") - .withDefault(band_width_default); - const auto mldq = (*options)["mldq"] - .doc("Lower half-bandwidth to be used in the difference " - "quotient Jacobian approximation") - .withDefault(band_width_default); - const auto mukeep = (*options)["mukeep"] - .doc("Upper half-bandwidth of the retained banded " - "approximate Jacobian block") - .withDefault(n3Dvars() + n2Dvars()); - const auto mlkeep = (*options)["mlkeep"] - .doc("Lower half-bandwidth of the retained banded " - "approximate Jacobian block") - .withDefault(n3Dvars() + n2Dvars()); - - if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, - arkode_bbd_rhs, nullptr) - != ARK_SUCCESS) { - throw BoutException("ARKBBDPrecInit failed\n"); + if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) { + if (fixed_point) { + output.write("\tUsing accelerated fixed point solver\n"); + nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 3); + if (nonlinear_solver == nullptr) { + throw BoutException("Creating SUNDIALS fixed point nonlinear solver failed\n"); + } + if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) { + throw BoutException("ARKStepSetNonlinearSolver failed\n"); } - } else { - output.write("\tUsing user-supplied preconditioner\n"); + output.write("\tUsing Newton iteration\n"); - if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre_shim) != ARK_SUCCESS) { - throw BoutException("ARKStepSetPreconditioner failed\n"); + const auto prectype = + use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE; + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl); + if (sun_solver == nullptr) { + throw BoutException("Creating SUNDIALS linear solver failed\n"); + } + if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARKLS_SUCCESS) { + throw BoutException("ARKStepSetLinearSolver failed\n"); } - } - } else { - // Not using preconditioning - - output.write("\tNo preconditioning\n"); -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) - == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) { - throw BoutException("ARKStepSetLinearSolver failed\n"); - } -#else - if (ARKSpgmr(arkode_mem, SUN_PREC_NONE, maxl) != ARKSPILS_SUCCESS) { - throw BoutException("ARKSpgmr failed\n"); + /// Set Preconditioner + if (use_precon) { + if (hasPreconditioner()) { + output.write("\tUsing user-supplied preconditioner\n"); + + if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre) + != ARKLS_SUCCESS) { + throw BoutException("ARKStepSetPreconditioner failed\n"); + } + } else { + output.write("\tUsing BBD preconditioner\n"); + + /// Get options + // Compute band_width_default from actually added fields, to allow for multiple + // Mesh objects + // + // Previous implementation was equivalent to: + // int MXSUB = mesh->xend - mesh->xstart + 1; + // int band_width_default = n3Dvars()*(MXSUB+2); + const int band_width_default = std::accumulate( + begin(f3d), end(f3d), 0, [](int acc, const VarStr& fvar) { + Mesh* localmesh = fvar.var->getMesh(); + return acc + localmesh->xend - localmesh->xstart + 3; + }); + + const auto mudq = (*options)["mudq"] + .doc("Upper half-bandwidth to be used in the difference " + "quotient Jacobian approximation") + .withDefault(band_width_default); + const auto mldq = (*options)["mldq"] + .doc("Lower half-bandwidth to be used in the difference " + "quotient Jacobian approximation") + .withDefault(band_width_default); + const auto mukeep = (*options)["mukeep"] + .doc("Upper half-bandwidth of the retained banded " + "approximate Jacobian block") + .withDefault(n3Dvars() + n2Dvars()); + const auto mlkeep = (*options)["mlkeep"] + .doc("Lower half-bandwidth of the retained banded " + "approximate Jacobian block") + .withDefault(n3Dvars() + n2Dvars()); + + if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0, + arkode_bbd_rhs, nullptr) + != ARKLS_SUCCESS) { + throw BoutException("ARKBBDPrecInit failed\n"); + } + } + } else { + // Not using preconditioning + output.write("\tNo preconditioning\n"); + } } -#endif - } - /// Set Jacobian-vector multiplication function + /// Set Jacobian-vector multiplication function - if (use_jacobian and hasJacobian()) { - output.write("\tUsing user-supplied Jacobian function\n"); + if (use_jacobian and hasJacobian()) { + output.write("\tUsing user-supplied Jacobian function\n"); - if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARK_SUCCESS) { - throw BoutException("ARKStepSetJacTimesVecFn failed\n"); + if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARKLS_SUCCESS) { + throw BoutException("ARKStepSetJacTimes failed\n"); + } + } else { + output.write("\tUsing difference quotient approximation for Jacobian\n"); } - } else { - output.write("\tUsing difference quotient approximation for Jacobian\n"); } if (optimize) { @@ -580,24 +528,27 @@ int ArkodeSolver::run() { ARKStepGetNumRhsEvals(arkode_mem, &temp_long_int, &temp_long_int2); nfe_evals = int(temp_long_int); nfi_evals = int(temp_long_int2); - ARKStepGetNumNonlinSolvIters(arkode_mem, &temp_long_int); - nniters = int(temp_long_int); - ARKStepGetNumPrecEvals(arkode_mem, &temp_long_int); - npevals = int(temp_long_int); - ARKStepGetNumLinIters(arkode_mem, &temp_long_int); - nliters = int(temp_long_int); + if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) { + ARKStepGetNumNonlinSolvIters(arkode_mem, &temp_long_int); + nniters = int(temp_long_int); + ARKStepGetNumPrecEvals(arkode_mem, &temp_long_int); + npevals = int(temp_long_int); + ARKStepGetNumLinIters(arkode_mem, &temp_long_int); + nliters = int(temp_long_int); + } if (diagnose) { output.write("\nARKODE: nsteps {:d}, nfe_evals {:d}, nfi_evals {:d}, nniters {:d}, " "npevals {:d}, nliters {:d}\n", nsteps, nfe_evals, nfi_evals, nniters, npevals, nliters); - - output.write(" -> Newton iterations per step: {:e}\n", - static_cast(nniters) / static_cast(nsteps)); - output.write(" -> Linear iterations per Newton iteration: {:e}\n", - static_cast(nliters) / static_cast(nniters)); - output.write(" -> Preconditioner evaluations per Newton: {:e}\n", - static_cast(npevals) / static_cast(nniters)); + if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) { + output.write(" -> Newton iterations per step: {:e}\n", + static_cast(nniters) / static_cast(nsteps)); + output.write(" -> Linear iterations per Newton iteration: {:e}\n", + static_cast(nliters) / static_cast(nniters)); + output.write(" -> Preconditioner evaluations per Newton: {:e}\n", + static_cast(npevals) / static_cast(nniters)); + } } if (call_monitors(simtime, i, getNumberOutputSteps())) { @@ -645,7 +596,7 @@ BoutReal ArkodeSolver::run(BoutReal tout) { } // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); // run_diffusive(simtime); @@ -718,8 +669,8 @@ void ArkodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* uda if (!hasPreconditioner()) { // Identity (but should never happen) - const int N = NV_LOCLENGTH_P(uvec); - std::copy(rvec, rvec + N, zvec); + const auto length = N_VGetLocalLength_Parallel(uvec); + std::copy(rvec, rvec + length, zvec); return; } @@ -766,10 +717,12 @@ void ArkodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* J * ARKODE explicit RHS functions **************************************************************************/ -static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +// NOLINTBEGIN(readability-identifier-length) +namespace { +int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -782,10 +735,10 @@ static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_d return 0; } -static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -798,10 +751,10 @@ static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_d return 0; } -static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -815,18 +768,17 @@ static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { } /// RHS function for BBD preconditioner -static int arkode_bbd_rhs(ARKODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - void* user_data) { +int arkode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + void* user_data) { return arkode_rhs_implicit(t, u, du, user_data); } /// Preconditioner function -static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr), - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -837,11 +789,11 @@ static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rve } /// Jacobian-vector multiplication function -static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, - N_Vector UNUSED(fy), void* user_data, N_Vector UNUSED(tmp)) { - BoutReal* ydata = NV_DATA_P(y); ///< System state - BoutReal* vdata = NV_DATA_P(v); ///< Input vector - BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output +int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy), + void* user_data, N_Vector UNUSED(tmp)) { + BoutReal* ydata = N_VGetArrayPointer(y); ///< System state + BoutReal* vdata = N_VGetArrayPointer(v); ///< Input vector + BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output auto* s = static_cast(user_data); @@ -849,6 +801,8 @@ static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) /************************************************************************** * vector abstol functions diff --git a/src/solver/impls/arkode/arkode.hxx b/src/solver/impls/arkode/arkode.hxx index 302413d8aa..4050ed377f 100644 --- a/src/solver/impls/arkode/arkode.hxx +++ b/src/solver/impls/arkode/arkode.hxx @@ -5,9 +5,9 @@ * NOTE: Only one solver can currently be compiled in * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010-2024 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -41,12 +41,17 @@ RegisterUnavailableSolver #else +#include "bout/bout_enum_class.hxx" #include "bout/bout_types.hxx" #include "bout/sundials_backports.hxx" #include #include +#if SUNDIALS_CONTROLLER_SUPPORT +#include +#endif + #include class ArkodeSolver; @@ -56,6 +61,14 @@ namespace { RegisterSolver registersolverarkode("arkode"); } +// enum describing treatment of equations +// Note: Capitalized because `explicit` is a C++ reserved keyword +BOUT_ENUM_CLASS(Treatment, ImEx, Implicit, Explicit); + +// Adaptivity method +BOUT_ENUM_CLASS(AdapMethod, PID, PI, I, Explicit_Gustafsson, Implicit_Gustafsson, + ImEx_Gustafsson); + class ArkodeSolver : public Solver { public: explicit ArkodeSolver(Options* opts = nullptr); @@ -89,12 +102,8 @@ private: /// Maximum number of steps to take between outputs int mxsteps; - /// Use ImEx capability - bool imex; - /// Solve only explicit part - bool solve_explicit; - /// Solve only implicit part - bool solve_implicit; + /// Integrator treatment enum: IMEX, Implicit or Explicit + Treatment treatment; /// Use linear implicit solver (only evaluates jacobian inversion once) bool set_linear; /// Solve explicit portion in fixed timestep mode. NOTE: This is not recommended except @@ -102,16 +111,14 @@ private: bool fixed_step; /// Order of internal step int order; + /// Name of the implicit Butcher table + std::string implicit_table; + /// Name of the explicit Butcher table + std::string explicit_table; /// Fraction of the estimated explicitly stable step to use BoutReal cfl_frac; - /// Set timestep adaptivity function: - /// - 0: PID adaptivity (default) - /// - 1: PI - /// - 2: I - /// - 3: explicit Gustafsson - /// - 4: implicit Gustafsson - /// - 5: ImEx Gustafsson - int adap_method; + /// Timestep adaptivity function + AdapMethod adap_method; /// Absolute tolerance BoutReal abstol; /// Relative tolerance @@ -153,8 +160,12 @@ private: /// SPGMR solver structure SUNLinearSolver sun_solver{nullptr}; - /// Solver for functional iterations for Adams-Moulton + /// Solver for implicit stages SUNNonlinearSolver nonlinear_solver{nullptr}; +#if SUNDIALS_CONTROLLER_SUPPORT + /// Timestep controller + SUNAdaptController controller{nullptr}; +#endif /// Context for SUNDIALS memory allocations sundials::Context suncontext; }; diff --git a/src/solver/impls/cvode/cvode.cxx b/src/solver/impls/cvode/cvode.cxx index c17bed420c..7137ce3304 100644 --- a/src/solver/impls/cvode/cvode.cxx +++ b/src/solver/impls/cvode/cvode.cxx @@ -3,9 +3,9 @@ * * ************************************************************************** - * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu + * Copyright 2010-2024 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -44,16 +44,9 @@ #include "fmt/core.h" #include - -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#include -#else -#include -#endif - #include #include +#include #include #include @@ -61,68 +54,23 @@ class Field2D; -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) - -#ifndef CVODEINT -#if SUNDIALS_VERSION_MAJOR < 3 -using CVODEINT = bout::utils::function_traits::arg_t<0>; -#else -using CVODEINT = sunindextype; -#endif -#endif - BOUT_ENUM_CLASS(positivity_constraint, none, positive, non_negative, negative, non_positive); -static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int cvode_bbd_rhs(CVODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du, - void* user_data); - -static int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, - BoutReal gamma, BoutReal delta, int lr, void* user_data); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int cvode_linear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int cvode_nonlinear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int cvode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, + void* user_data); -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int cvode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int lr, - void* user_data, N_Vector UNUSED(tmp)) { - return cvode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data); -} -#else -// Alias for newer versions -constexpr auto& cvode_pre_shim = cvode_pre; -#endif - -static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy, - void* user_data, N_Vector tmp); - -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline int CVSpilsSetJacTimes(void* arkode_mem, std::nullptr_t, - CVSpilsJacTimesVecFn jtimes) { - return CVSpilsSetJacTimesVecFn(arkode_mem, jtimes); -} -#endif +int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int lr, void* user_data); -#if SUNDIALS_VERSION_MAJOR >= 4 -// Shim for newer versions -constexpr auto CV_FUNCTIONAL = 0; -constexpr auto CV_NEWTON = 0; -#endif - -#if SUNDIALS_VERSION_MAJOR >= 3 -void* CVodeCreate(int lmm, [[maybe_unused]] int iter, - [[maybe_unused]] SUNContext context) { -#if SUNDIALS_VERSION_MAJOR == 3 - return CVodeCreate(lmm, iter); -#elif SUNDIALS_VERSION_MAJOR == 4 || SUNDIALS_VERSION_MAJOR == 5 - return CVodeCreate(lmm); -#else - return CVodeCreate(lmm, context); -#endif -} -#endif +int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy, + void* user_data, N_Vector tmp); +} // namespace +// NOLINTEND(readability-identifier-length) CvodeSolver::CvodeSolver(Options* opts) : Solver(opts), diagnose((*options)["diagnose"] @@ -136,7 +84,7 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Use functional iteration instead of Newton") .withDefault(adams_moulton)), max_order((*options)["cvode_max_order"] - .doc("Maximum order of method to use. < 0 means no limit.") + .doc("Maximum order of method to use. <= 0 means default limit.") .withDefault(-1)), stablimdet((*options)["cvode_stability_limit_detection"].withDefault(false)), abstol((*options)["atol"].doc("Absolute tolerance").withDefault(1.0e-12)), @@ -148,19 +96,18 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Maximum number of internal steps between outputs.") .withDefault(500)), max_timestep( - (*options)["max_timestep"].doc("Maximum time step size").withDefault(-1.0)), + (*options)["max_timestep"].doc("Maximum time step size").withDefault(0.0)), min_timestep( - (*options)["min_timestep"].doc("Minimum time step size").withDefault(-1.0)), + (*options)["min_timestep"].doc("Minimum time step size").withDefault(0.0)), start_timestep((*options)["start_timestep"] - .doc("Starting time step. < 0 then chosen by CVODE.") - .withDefault(-1.0)), + .doc("Starting time step. = 0 then chosen by CVODE.") + .withDefault(0.0)), mxorder((*options)["mxorder"].doc("Maximum order").withDefault(-1)), max_nonlinear_iterations( (*options)["max_nonlinear_iterations"] .doc("Maximum number of nonlinear iterations allowed by CVODE before " - "reducing " - "timestep. CVODE default (used if this option is negative) is 3.") - .withDefault(-1)), + "reducing timestep.") + .withDefault(3)), apply_positivity_constraints( (*options)["apply_positivity_constraints"] .doc("Use CVODE function CVodeSetConstraints to constrain variables - the " @@ -184,7 +131,7 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Factor by which the Krylov linear solver’s convergence test constant " "is reduced from the nonlinear solver test constant.") .withDefault(0.05)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = false; // This solver doesn't have constraints canReset = true; @@ -210,7 +157,7 @@ CvodeSolver::CvodeSolver(Options* opts) CvodeSolver::~CvodeSolver() { if (cvode_initialised) { - N_VDestroy_Parallel(uvec); + N_VDestroy(uvec); CVodeFree(&cvode_mem); SUNLinSolFree(sun_solver); SUNNonlinSolFree(nonlinear_solver); @@ -242,12 +189,13 @@ int CvodeSolver::init() { n3Dvars(), n2Dvars(), neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); if (adams_moulton) { // By default use functional iteration for Adams-Moulton @@ -258,31 +206,43 @@ int CvodeSolver::init() { } const auto lmm = adams_moulton ? CV_ADAMS : CV_BDF; - const auto iter = func_iter ? CV_FUNCTIONAL : CV_NEWTON; - if ((cvode_mem = CVodeCreate(lmm, iter, suncontext)) == nullptr) { + cvode_mem = callWithSUNContext(CVodeCreate, suncontext, lmm); + if (cvode_mem == nullptr) { throw BoutException("CVodeCreate failed\n"); } // For callbacks, need pointer to solver object - if (CVodeSetUserData(cvode_mem, this) < 0) { + if (CVodeSetUserData(cvode_mem, this) != CV_SUCCESS) { throw BoutException("CVodeSetUserData failed\n"); } - if (CVodeInit(cvode_mem, cvode_rhs, simtime, uvec) < 0) { +#if SUNDIALS_VERSION_MAJOR >= 6 + // Set the default RHS to linear, then pass nonlinear rhs to NL solver + if (CVodeInit(cvode_mem, cvode_linear_rhs, simtime, uvec) != CV_SUCCESS) { + throw BoutException("CVodeInit failed\n"); + } +#else + if (CVodeInit(cvode_mem, cvode_nonlinear_rhs, simtime, uvec) != CV_SUCCESS) { throw BoutException("CVodeInit failed\n"); } +#endif + if (mxorder > 0) { + output_warn << "WARNING: Option 'mxorder' is deprecated. Please use " + "'cvode_max_order' instead\n"; + if (CVodeSetMaxOrd(cvode_mem, mxorder) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxOrder failed\n"); + } + } if (max_order > 0) { - if (CVodeSetMaxOrd(cvode_mem, max_order) < 0) { + if (CVodeSetMaxOrd(cvode_mem, max_order) != CV_SUCCESS) { throw BoutException("CVodeSetMaxOrder failed\n"); } } - if (stablimdet) { - if (CVodeSetStabLimDet(cvode_mem, stablimdet) < 0) { - throw BoutException("CVodeSetStabLimDet failed\n"); - } + if (CVodeSetStabLimDet(cvode_mem, static_cast(stablimdet)) != CV_SUCCESS) { + throw BoutException("CVodeSetStabLimDet failed\n"); } if (use_vector_abstol) { @@ -307,94 +267,97 @@ int CvodeSolver::init() { return Options::root()[f3.name]["atol"].withDefault(abstol); }); - N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector abstolvec = N_VClone(uvec); if (abstolvec == nullptr) { throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n"); } - set_vector_option_values(NV_DATA_P(abstolvec), f2dtols, f3dtols); + set_vector_option_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols); - if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) < 0) { + if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) != CV_SUCCESS) { throw BoutException("CVodeSVtolerances failed\n"); } - N_VDestroy_Parallel(abstolvec); + N_VDestroy(abstolvec); } else { - if (CVodeSStolerances(cvode_mem, reltol, abstol) < 0) { + if (CVodeSStolerances(cvode_mem, reltol, abstol) != CV_SUCCESS) { throw BoutException("CVodeSStolerances failed\n"); } } - CVodeSetMaxNumSteps(cvode_mem, mxsteps); - - if (max_timestep > 0.0) { - CVodeSetMaxStep(cvode_mem, max_timestep); + if (CVodeSetMaxNumSteps(cvode_mem, mxsteps) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxNumSteps failed\n"); } - if (min_timestep > 0.0) { - CVodeSetMinStep(cvode_mem, min_timestep); + if (CVodeSetMaxStep(cvode_mem, max_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxStep failed\n"); } - if (start_timestep > 0.0) { - CVodeSetInitStep(cvode_mem, start_timestep); + if (CVodeSetMinStep(cvode_mem, min_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetMinStep failed\n"); } - if (mxorder > 0) { - CVodeSetMaxOrd(cvode_mem, mxorder); + if (CVodeSetInitStep(cvode_mem, start_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetInitStep failed\n"); } - if (max_nonlinear_iterations > 0) { - CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations); + if (CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxNonlinIters failed\n"); } -#if not(SUNDIALS_VERSION_MAJOR >= 3 and SUNDIALS_VERSION_MINOR >= 2) - if (apply_positivity_constraints) { - throw BoutException("The apply_positivity_constraints option is only available with " - "SUNDIALS>=3.2.0"); - } -#else if (apply_positivity_constraints) { auto f2d_constraints = create_constraints(f2d); auto f3d_constraints = create_constraints(f3d); - N_Vector constraints_vec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector constraints_vec = N_VClone(uvec); if (constraints_vec == nullptr) { throw BoutException("SUNDIALS memory allocation (positivity constraints vector) " "failed\n"); } - set_vector_option_values(NV_DATA_P(constraints_vec), f2d_constraints, + set_vector_option_values(N_VGetArrayPointer(constraints_vec), f2d_constraints, f3d_constraints); - if (CVodeSetConstraints(cvode_mem, constraints_vec) < 0) { + if (CVodeSetConstraints(cvode_mem, constraints_vec) != CV_SUCCESS) { throw BoutException("CVodeSetConstraints failed\n"); } - N_VDestroy_Parallel(constraints_vec); + N_VDestroy(constraints_vec); } -#endif /// Newton method can include Preconditioners and Jacobian function - if (!func_iter) { + if (func_iter) { + output_info.write("\tUsing Functional iteration\n"); + nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 0); + if (nonlinear_solver == nullptr) { + throw BoutException("SUNNonlinSol_FixedPoint failed\n"); + } + + if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver) != 0) { + throw BoutException("CVodeSetNonlinearSolver failed\n"); + } + } else { output_info.write("\tUsing Newton iteration\n"); TRACE("Setting preconditioner"); - if (use_precon) { - const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT; -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) { - throw BoutException("CVSpilsSetLinearSolver failed\n"); - } -#else - if (CVSpgmr(cvode_mem, prectype, maxl) != CVSPILS_SUCCESS) { - throw BoutException("CVSpgmr failed\n"); - } -#endif + const auto prectype = + use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE; + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl); + if (sun_solver == nullptr) { + throw BoutException("Creating SUNDIALS linear solver failed\n"); + } + if (CVodeSetLinearSolver(cvode_mem, sun_solver, nullptr) != CVLS_SUCCESS) { + throw BoutException("CVodeSetLinearSolver failed\n"); + } + + if (use_precon) { + if (hasPreconditioner()) { + output_info.write("\tUsing user-supplied preconditioner\n"); - if (!hasPreconditioner()) { + if (CVodeSetPreconditioner(cvode_mem, nullptr, cvode_pre) != CVLS_SUCCESS) { + throw BoutException("CVodeSetPreconditioner failed\n"); + } + } else { output_info.write("\tUsing BBD preconditioner\n"); /// Get options @@ -415,62 +378,41 @@ int CvodeSolver::init() { const auto mukeep = (*options)["mukeep"].withDefault(n3Dvars() + n2Dvars()); const auto mlkeep = (*options)["mlkeep"].withDefault(n3Dvars() + n2Dvars()); - if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, - cvode_bbd_rhs, nullptr)) { + if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, + cvode_bbd_rhs, nullptr) + != CVLS_SUCCESS) { throw BoutException("CVBBDPrecInit failed\n"); } - - } else { - output_info.write("\tUsing user-supplied preconditioner\n"); - - if (CVSpilsSetPreconditioner(cvode_mem, nullptr, cvode_pre_shim)) { - throw BoutException("CVSpilsSetPreconditioner failed\n"); - } } } else { output_info.write("\tNo preconditioning\n"); - -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) - == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) { - throw BoutException("CVSpilsSetLinearSolver failed\n"); - } -#else - if (CVSpgmr(cvode_mem, SUN_PREC_NONE, maxl) != CVSPILS_SUCCESS) { - throw BoutException("CVSpgmr failed\n"); - } -#endif } /// Set Jacobian-vector multiplication function if (use_jacobian and hasJacobian()) { output_info.write("\tUsing user-supplied Jacobian function\n"); - if (CVSpilsSetJacTimes(cvode_mem, nullptr, cvode_jac) != CV_SUCCESS) { - throw BoutException("CVSpilsSetJacTimesVecFn failed\n"); + if (CVodeSetJacTimes(cvode_mem, nullptr, cvode_jac) != CVLS_SUCCESS) { + throw BoutException("CVodeSetJacTimes failed\n"); } } else { output_info.write("\tUsing difference quotient approximation for Jacobian\n"); } - } else { - output_info.write("\tUsing Functional iteration\n"); -#if SUNDIALS_VERSION_MAJOR >= 4 - if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 0, suncontext)) == nullptr) { - throw BoutException("SUNNonlinSol_FixedPoint failed\n"); - } + } - if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver)) { - throw BoutException("CVodeSetNonlinearSolver failed\n"); - } +#if SUNDIALS_VERSION_MAJOR >= 6 + // Set the RHS function to be used in the nonlinear solver + CVodeSetNlsRhsFn(cvode_mem, cvode_nonlinear_rhs); #endif - } // Set internal tolerance factors - CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef); - CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef); + if (CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef) != CV_SUCCESS) { + throw BoutException("CVodeSetNonlinConvCoef failed\n"); + } + + if (CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef) != CV_SUCCESS) { + throw BoutException("CVodeSetEpsLin failed\n"); + } cvode_initialised = true; @@ -544,9 +486,9 @@ int CvodeSolver::run() { nfevals = int(temp_long_int); CVodeGetNumNonlinSolvIters(cvode_mem, &temp_long_int); nniters = int(temp_long_int); - CVSpilsGetNumPrecSolves(cvode_mem, &temp_long_int); + CVodeGetNumPrecSolves(cvode_mem, &temp_long_int); npevals = int(temp_long_int); - CVSpilsGetNumLinIters(cvode_mem, &temp_long_int); + CVodeGetNumLinIters(cvode_mem, &temp_long_int); nliters = int(temp_long_int); // Last step size @@ -634,7 +576,7 @@ BoutReal CvodeSolver::run(BoutReal tout) { } // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); @@ -651,7 +593,7 @@ BoutReal CvodeSolver::run(BoutReal tout) { * RHS function du = F(t, u) **************************************************************************/ -void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata) { +void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata, bool linear) { TRACE("Running RHS: CvodeSolver::res({})", t); // Load state from udata @@ -662,7 +604,7 @@ void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata) { CVodeGetLastStep(cvode_mem, &hcur); // Call RHS function - run_rhs(t); + run_rhs(t, linear); // Save derivatives to dudata save_derivs(dudata); @@ -678,11 +620,11 @@ void CvodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udat BoutReal tstart = bout::globals::mpi->MPI_Wtime(); - int N = NV_LOCLENGTH_P(uvec); + const auto length = N_VGetLocalLength_Parallel(uvec); if (!hasPreconditioner()) { // Identity (but should never happen) - for (int i = 0; i < N; i++) { + for (int i = 0; i < length; i++) { zvec[i] = rvec[i]; } return; @@ -731,16 +673,34 @@ void CvodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jv * CVODE RHS functions **************************************************************************/ -static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +// NOLINTBEGIN(readability-identifier-length) +namespace { +int cvode_linear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { + + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); + + auto* s = static_cast(user_data); + + // Calculate RHS function + try { + s->rhs(t, udata, dudata, true); + } catch (BoutRhsFail& error) { + return 1; + } + return 0; +} + +int cvode_nonlinear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); // Calculate RHS function try { - s->rhs(t, udata, dudata); + s->rhs(t, udata, dudata, false); } catch (BoutRhsFail& error) { return 1; } @@ -748,18 +708,17 @@ static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { } /// RHS function for BBD preconditioner -static int cvode_bbd_rhs(CVODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - void* user_data) { - return cvode_rhs(t, u, du, user_data); +int cvode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + void* user_data) { + return cvode_linear_rhs(t, u, du, user_data); } /// Preconditioner function -static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr), - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -770,11 +729,11 @@ static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec } /// Jacobian-vector multiplication function -static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector UNUSED(fy), - void* user_data, N_Vector UNUSED(tmp)) { - BoutReal* ydata = NV_DATA_P(y); ///< System state - BoutReal* vdata = NV_DATA_P(v); ///< Input vector - BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output +int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy), + void* user_data, N_Vector UNUSED(tmp)) { + BoutReal* ydata = N_VGetArrayPointer(y); ///< System state + BoutReal* vdata = N_VGetArrayPointer(v); ///< Input vector + BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output auto* s = static_cast(user_data); @@ -782,6 +741,8 @@ static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector U return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) /************************************************************************** * CVODE vector option functions @@ -829,9 +790,9 @@ void CvodeSolver::loop_vector_option_values_op(Ind2D UNUSED(i2d), BoutReal* opti void CvodeSolver::resetInternalFields() { TRACE("CvodeSolver::resetInternalFields"); - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); - if (CVodeReInit(cvode_mem, simtime, uvec) < 0) { + if (CVodeReInit(cvode_mem, simtime, uvec) != CV_SUCCESS) { throw BoutException("CVodeReInit failed\n"); } } diff --git a/src/solver/impls/cvode/cvode.hxx b/src/solver/impls/cvode/cvode.hxx index 89c3a613a8..d44fcf2335 100644 --- a/src/solver/impls/cvode/cvode.hxx +++ b/src/solver/impls/cvode/cvode.hxx @@ -68,8 +68,8 @@ public: void resetInternalFields() override; - // These functions used internally (but need to be public) - void rhs(BoutReal t, BoutReal* udata, BoutReal* dudata); + // These functions are used internally (but need to be public) + void rhs(BoutReal t, BoutReal* udata, BoutReal* dudata, bool linear); void pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udata, BoutReal* rvec, BoutReal* zvec); void jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jvdata); @@ -138,7 +138,7 @@ private: int nonlin_fails{0}; int stab_lims{0}; - bool cvode_initialised = false; + bool cvode_initialised{false}; void set_vector_option_values(BoutReal* option_data, std::vector& f2dtols, std::vector& f3dtols); diff --git a/src/solver/impls/ida/ida.cxx b/src/solver/impls/ida/ida.cxx index 189a103bbe..cfc978f755 100644 --- a/src/solver/impls/ida/ida.cxx +++ b/src/solver/impls/ida/ida.cxx @@ -40,53 +40,23 @@ #include "bout/unused.hxx" #include - -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#include -#else -#include -#endif - #include #include #include +#include #include -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) - -#ifndef IDAINT -#if SUNDIALS_VERSION_MAJOR < 3 -using IDAINT = bout::utils::function_traits::arg_t<0>; -#else -using IDAINT = sunindextype; -#endif -#endif - -static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data); -static int ida_bbd_res(IDAINT Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr, - void* user_data); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data); +int ida_bbd_res(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr, + void* user_data); -static int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec, - N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data); - -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int ida_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, - N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, - void* user_data, N_Vector UNUSED(tmp)) { - return ida_pre(t, yy, yp, rr, rvec, zvec, cj, delta, user_data); -} -#else -// Alias for newer versions -constexpr auto& ida_pre_shim = ida_pre; -#endif - -#if SUNDIALS_VERSION_MAJOR < 6 -void* IDACreate([[maybe_unused]] SUNContext) { return IDACreate(); } -#endif +int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec, + N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data); +} // namespace +// NOLINTEND(readability-identifier-length) IdaSolver::IdaSolver(Options* opts) : Solver(opts), @@ -101,15 +71,15 @@ IdaSolver::IdaSolver(Options* opts) correct_start((*options)["correct_start"] .doc("Correct the initial values") .withDefault(true)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = true; // This solver has constraints } IdaSolver::~IdaSolver() { if (initialised) { - N_VDestroy_Parallel(uvec); - N_VDestroy_Parallel(duvec); - N_VDestroy_Parallel(id); + N_VDestroy(uvec); + N_VDestroy(duvec); + N_VDestroy(id); IDAFree(&idamem); SUNLinSolFree(sun_solver); } @@ -144,69 +114,75 @@ int IdaSolver::init() { neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } - if ((duvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + duvec = N_VClone(uvec); + if (duvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } - if ((id = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + id = N_VClone(uvec); + if (id == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); // Get the starting time derivative run_rhs(simtime); // Put the time-derivatives into duvec - save_derivs(NV_DATA_P(duvec)); + save_derivs(N_VGetArrayPointer(duvec)); // Set the equation type in id(Differential or Algebraic. This is optional) - set_id(NV_DATA_P(id)); + set_id(N_VGetArrayPointer(id)); // Call IDACreate to initialise - if ((idamem = IDACreate(suncontext)) == nullptr) { + idamem = callWithSUNContext(IDACreate, suncontext); + if (idamem == nullptr) { throw BoutException("IDACreate failed\n"); } // For callbacks, need pointer to solver object - if (IDASetUserData(idamem, this) < 0) { + if (IDASetUserData(idamem, this) != IDA_SUCCESS) { throw BoutException("IDASetUserData failed\n"); } - if (IDASetId(idamem, id) < 0) { + if (IDASetId(idamem, id) != IDA_SUCCESS) { throw BoutException("IDASetID failed\n"); } - if (IDAInit(idamem, idares, simtime, uvec, duvec) < 0) { + if (IDAInit(idamem, idares, simtime, uvec, duvec) != IDA_SUCCESS) { throw BoutException("IDAInit failed\n"); } - if (IDASStolerances(idamem, reltol, abstol) < 0) { + if (IDASStolerances(idamem, reltol, abstol) != IDA_SUCCESS) { throw BoutException("IDASStolerances failed\n"); } - IDASetMaxNumSteps(idamem, mxsteps); + if (IDASetMaxNumSteps(idamem, mxsteps) != IDA_SUCCESS) { + throw BoutException("IDASetMaxNumSteps failed\n"); + } // Call IDASpgmr to specify the IDA linear solver IDASPGMR const auto maxl = (*options)["maxl"].withDefault(6 * n3d); -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) == nullptr) { + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, SUN_PREC_NONE, maxl); + if (sun_solver == nullptr) { throw BoutException("Creating SUNDIALS linear solver failed\n"); } - if (IDASpilsSetLinearSolver(idamem, sun_solver) != IDA_SUCCESS) { - throw BoutException("IDASpilsSetLinearSolver failed\n"); + if (IDASetLinearSolver(idamem, sun_solver, nullptr) != IDALS_SUCCESS) { + throw BoutException("IDASetLinearSolver failed\n"); } -#else - if (IDASpgmr(idamem, maxl)) { - throw BoutException("IDASpgmr failed\n"); - } -#endif if (use_precon) { - if (!hasPreconditioner()) { + if (hasPreconditioner()) { + output.write("\tUsing user-supplied preconditioner\n"); + if (IDASetPreconditioner(idamem, nullptr, ida_pre) != IDALS_SUCCESS) { + throw BoutException("IDASetPreconditioner failed\n"); + } + } else { output.write("\tUsing BBD preconditioner\n"); /// Get options // Compute band_width_default from actually added fields, to allow for multiple Mesh @@ -225,21 +201,17 @@ int IdaSolver::init() { const auto mldq = (*options)["mldq"].withDefault(band_width_default); const auto mukeep = (*options)["mukeep"].withDefault(n3d); const auto mlkeep = (*options)["mlkeep"].withDefault(n3d); - if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, ida_bbd_res, - nullptr)) { + if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, ida_bbd_res, + nullptr) + != IDALS_SUCCESS) { throw BoutException("IDABBDPrecInit failed\n"); } - } else { - output.write("\tUsing user-supplied preconditioner\n"); - if (IDASpilsSetPreconditioner(idamem, nullptr, ida_pre_shim)) { - throw BoutException("IDASpilsSetPreconditioner failed\n"); - } } } // Call IDACalcIC (with default options) to correct the initial values if (correct_start) { - if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6)) { + if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6) != IDA_SUCCESS) { throw BoutException("IDACalcIC failed\n"); } } @@ -291,7 +263,7 @@ BoutReal IdaSolver::run(BoutReal tout) { const int flag = IDASolve(idamem, tout, &simtime, uvec, duvec, IDA_NORMAL); // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); @@ -322,9 +294,9 @@ void IdaSolver::res(BoutReal t, BoutReal* udata, BoutReal* dudata, BoutReal* rda save_derivs(rdata); // If a differential equation, subtract dudata - const int N = NV_LOCLENGTH_P(id); - const BoutReal* idd = NV_DATA_P(id); - for (int i = 0; i < N; i++) { + const auto length = N_VGetLocalLength_Parallel(id); + const BoutReal* idd = N_VGetArrayPointer(id); + for (int i = 0; i < length; i++) { if (idd[i] > 0.5) { // 1 -> differential, 0 -> algebraic rdata[i] -= dudata[i]; } @@ -343,8 +315,8 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata, if (!hasPreconditioner()) { // Identity (but should never happen) - const int N = NV_LOCLENGTH_P(id); - std::copy(rvec, rvec + N, zvec); + const auto length = N_VGetLocalLength_Parallel(id); + std::copy(rvec, rvec + length, zvec); return; } @@ -367,10 +339,12 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata, * IDA res function **************************************************************************/ -static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); - BoutReal* rdata = NV_DATA_P(rr); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) { + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); + BoutReal* rdata = N_VGetArrayPointer(rr); auto* s = static_cast(user_data); @@ -381,18 +355,17 @@ static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_d } /// Residual function for BBD preconditioner -static int ida_bbd_res(IDAINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - N_Vector rr, void* user_data) { +int ida_bbd_res(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + N_Vector rr, void* user_data) { return idares(t, u, du, rr, user_data); } // Preconditioner function -static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr), - N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr), + N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -401,5 +374,7 @@ static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) #endif diff --git a/src/solver/solver.cxx b/src/solver/solver.cxx index 1b7ec1fd74..8a75ff43a4 100644 --- a/src/solver/solver.cxx +++ b/src/solver/solver.cxx @@ -1364,6 +1364,12 @@ int Solver::run_rhs(BoutReal t, bool linear) { Timer timer("rhs"); + if (first_rhs_call) { + // Ensure that nonlinear terms are calculated on first call + linear = false; + first_rhs_call = false; + } + if (model->splitOperator()) { // Run both parts diff --git a/src/sys/adios_object.cxx b/src/sys/adios_object.cxx index c7d6dab9aa..477dae14ef 100644 --- a/src/sys/adios_object.cxx +++ b/src/sys/adios_object.cxx @@ -1,6 +1,6 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "bout/adios_object.hxx" #include "bout/boutexception.hxx" @@ -95,4 +95,4 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue, } } // namespace bout -#endif //BOUT_HAS_ADIOS +#endif //BOUT_HAS_ADIOS2 diff --git a/src/sys/options.cxx b/src/sys/options.cxx index 49a81cfa88..e2f39542fd 100644 --- a/src/sys/options.cxx +++ b/src/sys/options.cxx @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -221,6 +222,36 @@ Options::fuzzyFind(const std::string& name, std::string::size_type distance) con return matches; } +Options::Options(const Options& other) { (*this) = other.copy(); } + +Options& Options::operator=(const Options& other) { + if (this == &other) { + return *this; + } + + // Note: Here can't do copy-and-swap because pointers to parents are stored + + value = other.value; + + // Assigning the attributes. + // The simple assignment operator fails to compile with Apple Clang 12 + // attributes = other.attributes; + attributes.clear(); + attributes.insert(other.attributes.begin(), other.attributes.end()); + + full_name = other.full_name; + is_section = other.is_section; + children = other.children; + value_used = other.value_used; + + // Ensure that this is the parent of all children, + // otherwise will point to the original Options instance + for (auto& child : children) { + child.second.parent_instance = this; + } + return *this; +} + Options& Options::operator=(Options&& other) noexcept { if (this == &other) { return *this; @@ -938,7 +969,7 @@ bout::details::OptionsFormatterBase::parse(fmt::format_parse_context& ctx) { fmt::format_context::iterator bout::details::OptionsFormatterBase::format(const Options& options, - fmt::format_context& ctx) { + fmt::format_context& ctx) const { const auto conditionally_used = [](const Options& option) -> bool { if (not option.hasAttribute(conditionally_used_attribute)) { diff --git a/src/sys/options/options_adios.cxx b/src/sys/options/options_adios.cxx index b313d7bc79..88df92df04 100644 --- a/src/sys/options/options_adios.cxx +++ b/src/sys/options/options_adios.cxx @@ -1,6 +1,6 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "options_adios.hxx" #include "bout/adios_object.hxx" @@ -628,4 +628,4 @@ void OptionsADIOS::write(const Options& options, const std::string& time_dim) { } // namespace bout -#endif // BOUT_HAS_ADIOS +#endif // BOUT_HAS_ADIOS2 diff --git a/src/sys/options/options_adios.hxx b/src/sys/options/options_adios.hxx index eddb3976ff..a942e6fed9 100644 --- a/src/sys/options/options_adios.hxx +++ b/src/sys/options/options_adios.hxx @@ -8,7 +8,7 @@ #include "bout/options.hxx" #include "bout/options_io.hxx" -#if !BOUT_HAS_ADIOS +#if !BOUT_HAS_ADIOS2 namespace { bout::RegisterUnavailableOptionsIO @@ -79,5 +79,5 @@ RegisterOptionsIO registeroptionsadios("adios"); } // namespace bout -#endif // BOUT_HAS_ADIOS +#endif // BOUT_HAS_ADIOS2 #endif // OPTIONS_ADIOS_H diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 712442a795..204a9cc271 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -27,7 +27,7 @@ nx = 3 # Not changed for these tests nlist = [8, 16, 32, 64, 128] # Number of parallel slices (in each direction) -nslices = [1, 2] +nslices = [1] directory = "data" diff --git a/tests/gitlab/ci-tests.sh b/tests/gitlab/ci-tests.sh new file mode 100755 index 0000000000..a237d85be9 --- /dev/null +++ b/tests/gitlab/ci-tests.sh @@ -0,0 +1,88 @@ +#!/bin/bash +set -e + +echo "===> Building BOUT-dev CUDA minimal" +cmake -S . -B build \ + -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ + -DBOUT_ENABLE_RAJA=on \ + -DBOUT_ENABLE_UMPIRE=on \ + -DBOUT_ENABLE_CUDA=on \ + -DCMAKE_CUDA_ARCHITECTURES=70 \ + -DCUDA_ARCH=compute_70,code=sm_70 \ + -DBOUT_ENABLE_WARNINGS=off \ + -DBOUT_USE_SYSTEM_FMT=on + +pushd build +make -j + +echo "===> Building and running blob2d-outerloop" +pushd examples/blob2d-outerloop +make -j +# Check the output using Sim Time and RHS evals. Must be careful splitting the +# regex string in mulitple lines and escaping characters. +if ./blob2d-outerloop | grep -Pzoq "(?s)Sim Time \| RHS evals \| Wall Time \| Calc Inv Comm I/O SOLVER\n.*\n"\ +"0\.000e\+00 2 .*"\ +"5\.000e\+01 53 .*"\ +"1\.000e\+02 17 .*"\ +"1\.500e\+02 27 .*"; then + echo "Sim Time and RHS evals match" +else + echo "Sim Time and RHS evals DO NOT match" + exit 1 +fi +popd + +echo "===> Building and running elm-pb-outerloop" +pushd examples/elm-pb-outerloop +make -j +if ./elm_pb_outerloop | grep -Pzoq "(?s)Sim Time \| RHS evals \| Wall Time \| Calc Inv Comm I/O SOLVER\n.*\n"\ +"0\.000e\+00 2 .*"\ +"1\.000e\+00 44 .*"\ +"2\.000e\+00 37 .*"\ +"3\.000e\+00 37 .*"\ +"4\.000e\+00 37 .*"\ +"5\.000e\+00 30 .*"\ +"6\.000e\+00 31 .*"\ +"7\.000e\+00 31 .*"\ +"8\.000e\+00 25 .*"\ +"9\.000e\+00 21 .*"\ +"1\.000e\+01 24 .*"\ +"1\.100e\+01 19 .*"\ +"1\.200e\+01 25 .*"\ +"1\.300e\+01 25 .*"\ +"1\.400e\+01 25 .*"\ +"1\.500e\+01 25 .*"\ +"1\.600e\+01 25 .*"\ +"1\.700e\+01 25 .*"\ +"1\.800e\+01 25 .*"\ +"1\.900e\+01 20 .*"\ +"2\.000e\+01 29 .*"\ +"2\.100e\+01 29 .*"\ +"2\.200e\+01 29 .*"\ +"2\.300e\+01 29 .*"\ +"2\.400e\+01 29 .*"\ +"2\.500e\+01 29 .*"\ +"2\.600e\+01 29 .*"\ +"2\.700e\+01 22 .*"\ +"2\.800e\+01 29 .*"\ +"2\.900e\+01 29 .*"\ +"3\.000e\+01 29 .*"\ +"3\.100e\+01 29 .*"\ +"3\.200e\+01 29 .*"\ +"3\.300e\+01 32 .*"\ +"3\.400e\+01 25 .*"\ +"3\.500e\+01 33 .*"\ +"3\.600e\+01 33 .*"\ +"3\.700e\+01 39 .*"\ +"3\.800e\+01 31 .*"\ +"3\.900e\+01 31 .*"\ +"4\.000e\+01 36 .*"; then + echo "Sim Time and RHS evals match" +else + echo "Sim Time and RHS evals DO NOT match" + exit 1 +fi +popd + +popd \ No newline at end of file diff --git a/tests/integrated/CMakeLists.txt b/tests/integrated/CMakeLists.txt index 7d3e8e81ce..ef173db7df 100644 --- a/tests/integrated/CMakeLists.txt +++ b/tests/integrated/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(test-datafilefacade) add_subdirectory(test-drift-instability) add_subdirectory(test-drift-instability-staggered) add_subdirectory(test-fieldgroupComm) +add_subdirectory(test-fci-boundary) add_subdirectory(test-griddata) add_subdirectory(test-griddata-yboundary-guards) add_subdirectory(test-gyro) diff --git a/tests/integrated/test-fci-boundary/CMakeLists.txt b/tests/integrated/test-fci-boundary/CMakeLists.txt new file mode 100644 index 0000000000..bf25cd7c57 --- /dev/null +++ b/tests/integrated/test-fci-boundary/CMakeLists.txt @@ -0,0 +1,22 @@ +bout_add_mms_test(test-fci-boundary + SOURCES get_par_bndry.cxx + USE_RUNTEST + USE_DATA_BOUT_INP + REQUIRES zoidberg_FOUND + PROCESSORS 1 + ) + +if (zoidberg_FOUND) + set(gridfile ${CMAKE_CURRENT_BINARY_DIR}/grid.fci.nc) + add_custom_command(OUTPUT ${gridfile} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${BOUT_PYTHONPATH}:$ENV{PYTHONPATH} ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/grid.py ${gridfile} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/../../../tools/pylib/boutconfig/__init__.py + DEPENDS grid.py + IMPLICIT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Creating test-fci-boundary grid file" + ) + add_custom_target(test-fci-boundary-grid DEPENDS ${gridfile}) + add_dependencies(test-fci-boundary + test-fci-boundary-grid) +endif() diff --git a/tests/integrated/test-fci-boundary/data/BOUT.inp b/tests/integrated/test-fci-boundary/data/BOUT.inp new file mode 100644 index 0000000000..b631f16295 --- /dev/null +++ b/tests/integrated/test-fci-boundary/data/BOUT.inp @@ -0,0 +1,20 @@ +grid = grid.fci.nc + +MXG = 1 +NXPE = 1 +MYG = 1 + +[mesh] +symmetricglobalx = true + +[mesh:ddy] +first = C2 +second = C2 + +[mesh:paralleltransform] +type = fci +y_periodic = true +z_periodic = true + +[mesh:paralleltransform:xzinterpolation] +type = lagrange4pt diff --git a/tests/integrated/test-fci-boundary/get_par_bndry.cxx b/tests/integrated/test-fci-boundary/get_par_bndry.cxx new file mode 100644 index 0000000000..ac0f5de2a6 --- /dev/null +++ b/tests/integrated/test-fci-boundary/get_par_bndry.cxx @@ -0,0 +1,34 @@ +#include "bout/bout.hxx" +#include "bout/derivs.hxx" +#include "bout/field_factory.hxx" +#include "bout/parallel_boundary_region.hxx" + +int main(int argc, char** argv) { + BoutInitialise(argc, argv); + + using bout::globals::mesh; + + std::vector fields; + fields.resize(static_cast(BoundaryParType::SIZE)); + Options dump; + for (int i = 0; i < fields.size(); i++) { + fields[i] = Field3D{0.0}; + mesh->communicate(fields[i]); + for (const auto& bndry_par : + mesh->getBoundariesPar(static_cast(i))) { + output.write("{:s} region\n", toString(static_cast(i))); + for (bndry_par->first(); !bndry_par->isDone(); bndry_par->next()) { + fields[i][bndry_par->ind()] += 1; + output.write("{:s} increment\n", toString(static_cast(i))); + } + } + output.write("{:s} done\n", toString(static_cast(i))); + + dump[fmt::format("field_{:s}", toString(static_cast(i)))] = + fields[i]; + } + + bout::writeDefaultOutputFile(dump); + + BoutFinalise(); +} diff --git a/tests/integrated/test-fci-boundary/grid.py b/tests/integrated/test-fci-boundary/grid.py new file mode 100644 index 0000000000..d544f0cdf7 --- /dev/null +++ b/tests/integrated/test-fci-boundary/grid.py @@ -0,0 +1,55 @@ +import zoidberg as zb +import numpy as np +import sys +import boutconfig as bc + + +def rotating_ellipse( + nx=68, + ny=16, + nz=128, + npoints=421, + xcentre=5.5, + I_coil=0.01, + curvilinear=True, + rectangular=False, + fname="rotating-ellipse.fci.nc", + a=0.4, + Btor=2.5, +): + yperiod = 2 * np.pi / 5.0 + field = zb.field.RotatingEllipse( + xcentre=xcentre, + I_coil=I_coil, + radius=2 * a, + yperiod=yperiod, + Btor=Btor, + ) + # Define the y locations + ycoords = np.linspace(0.0, yperiod, ny, endpoint=False) + + if rectangular: + print("Making rectangular poloidal grid") + poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid( + nx, nz, 1.0, 1.0, Rcentre=xcentre + ) + elif curvilinear: + print("Making curvilinear poloidal grid") + inner = zb.rzline.shaped_line( + R0=xcentre, a=a / 2.0, elong=0, triang=0.0, indent=0, n=npoints + ) + outer = zb.rzline.shaped_line( + R0=xcentre, a=a, elong=0, triang=0.0, indent=0, n=npoints + ) + + print("creating grid...") + poloidal_grid = zb.poloidal_grid.grid_elliptic(inner, outer, nx, nz) + + # Create the 3D grid by putting together 2D poloidal grids + grid = zb.grid.Grid(poloidal_grid, ycoords, yperiod, yperiodic=True) + maps = zb.make_maps(grid, field, quiet=True) + zb.write_maps(grid, field, maps, str(fname), metric2d=bc.isMetric2D()) + + +if __name__ == "__main__": + rotating_ellipse(fname=sys.argv[1]) diff --git a/tests/integrated/test-fci-boundary/runtest b/tests/integrated/test-fci-boundary/runtest new file mode 100755 index 0000000000..16cb4ee443 --- /dev/null +++ b/tests/integrated/test-fci-boundary/runtest @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# +# Python script to run and analyse MMS test +# + +# Cores: 2 +# only working with cmake +# requires: False +from boututils.run_wrapper import launch_safe +from boututils.datafile import DataFile +from boutdata.collect import collect as _collect + +import numpy as np + + +def collect(var): + return _collect( + var, + info=False, + path=directory, + xguards=False, + yguards=False, + ) + + +nprocs = [1] # , 2, 4] +mthread = 2 + +directory = "data" + +with DataFile("grid.fci.nc") as grid: + xfwd = grid.read("forward_xt_prime")[1:-1] + xbwd = grid.read("backward_xt_prime")[1:-1] + +nx = xfwd.shape[0] + +regions = { + "xin_fwd": xfwd < 1, + "xout_fwd": xfwd > nx, + "xin_bwd": xbwd < 1, + "xout_bwd": xbwd > nx, +} +regions = {k: v.astype(int) for k, v in regions.items()} + +# for x in "xout", "xin": +# regions[x] = np.logical_or(regions[f"{x}_fwd"], regions[f"{x}_bwd"]) +# for x in "fwd", "bwd": +# regions[x] = np.logical_or(regions[f"xin_{x}"], regions[f"xout_{x}"]) +# regions["all"] = np.logical_or(regions["xin"], regions["xout"]) +for x in "xout", "xin": + regions[x] = regions[f"{x}_fwd"] + regions[f"{x}_bwd"] +for x in "fwd", "bwd": + regions[x] = regions[f"xin_{x}"] + regions[f"xout_{x}"] +regions["all"] = regions["xin"] + regions["xout"] + +for nproc in nprocs: + cmd = "./get_par_bndry" + + # Launch using MPI + _, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True) + + for k, v in regions.items(): + # Collect data + data = collect(f"field_{k}") + assert np.allclose(data, v), ( + k + " does not match", + np.sum(data), + np.sum(v), + np.max(data), + ) diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp index 9a6ac24fa1..46d3cb55ba 100644 --- a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp +++ b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp @@ -1,7 +1,7 @@ [f] #function = 0. function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [rhs] function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.) @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0. diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp index eb78644f0f..be0c697d80 100644 --- a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp +++ b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp @@ -16,7 +16,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0. diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp index da1918dcc7..bc3c47eac7 100644 --- a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp +++ b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp index 6474b2604b..601531de84 100644 --- a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp +++ b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 diff --git a/tests/integrated/test-options-adios/CMakeLists.txt b/tests/integrated/test-options-adios/CMakeLists.txt index 110773d6fd..cc61fabe57 100644 --- a/tests/integrated/test-options-adios/CMakeLists.txt +++ b/tests/integrated/test-options-adios/CMakeLists.txt @@ -2,5 +2,5 @@ bout_add_integrated_test(test-options-adios SOURCES test-options-adios.cxx USE_RUNTEST USE_DATA_BOUT_INP - REQUIRES BOUT_HAS_ADIOS + REQUIRES BOUT_HAS_ADIOS2 ) diff --git a/tests/integrated/test-options-adios/runtest b/tests/integrated/test-options-adios/runtest index 1621c686a3..03a83fc0ba 100755 --- a/tests/integrated/test-options-adios/runtest +++ b/tests/integrated/test-options-adios/runtest @@ -34,7 +34,7 @@ assert result["int"] == 42 assert math.isclose(result["real"], 3.1415) assert result["string"] == "hello" -print("Checking saved ADIOS test-out file -- Not implemented") +print("Checking saved ADIOS2 test-out file -- Not implemented") # Check the output NetCDF file # with DataFile("test-out.nc") as f: diff --git a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx index bfd394194f..1e3cdde310 100644 --- a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx +++ b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx @@ -23,15 +23,90 @@ * **************************************************************************/ -#include -#include -// #include -#include -#include -#include +#include "bout/bout.hxx" // NOLINT +#include "bout/bout_types.hxx" +#include "bout/boutexception.hxx" +#include "bout/constants.hxx" +#include "bout/difops.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" +#include "bout/invert_laplace.hxx" +#include "bout/options.hxx" +#include "bout/options_io.hxx" +#include "bout/output.hxx" +#include "bout/traits.hxx" + +#include "fmt/core.h" +#include + #include +#include BoutReal max_error_at_ystart(const Field3D& error); +void apply_flat_boundary(Field3D& bcoef); + +template +void check_laplace(int test_num, std::string_view test_name, Laplacian& invert, + int inner_flags, int outer_flags, const T& acoef, const T& ccoef, + const T& dcoef, const U& bcoef, const Field3D& field, int ystart, + Options& dump) { + static_assert(bout::utils::is_Field_v, "check_laplace requires Field2D or Field3D"); + static_assert(bout::utils::is_Field_v, "check_laplace requires Field2D or Field3D"); + + invert.setInnerBoundaryFlags(inner_flags); + invert.setOuterBoundaryFlags(outer_flags); + invert.setCoefA(acoef); + invert.setCoefC(ccoef); + invert.setCoefD(dcoef); + + checkData(bcoef); + + Field3D sol; + Field3D error; + Field3D abs_error; + BoutReal max_error = -1; + + try { + sol = invert.solve(sliceXZ(bcoef, ystart)); + error = (field - sol) / field; + abs_error = field - sol; + max_error = max_error_at_ystart(abs(abs_error)); + } catch (BoutException& err) { + output.write("BoutException occured in invert->solve(b1): {}\n", err.what()); + } + + output.write("\nTest {}: {}\n", test_num, test_name); + output.write("Magnitude of maximum absolute error is {}\n", max_error); + + dump[fmt::format("a{}", test_num)] = acoef; + dump[fmt::format("b{}", test_num)] = bcoef; + dump[fmt::format("c{}", test_num)] = ccoef; + dump[fmt::format("d{}", test_num)] = dcoef; + dump[fmt::format("f{}", test_num)] = field; + dump[fmt::format("sol{}", test_num)] = sol; + dump[fmt::format("error{}", test_num)] = error; + dump[fmt::format("absolute_error{}", test_num)] = abs_error; + dump[fmt::format("max_error{}", test_num)] = max_error; +} + +template +Field3D forward_laplace(const Field3D& field, const T& acoef, const T& ccoef, + const T& dcoef) { + auto bcoef = + dcoef * Delp2(field) + Grad_perp(ccoef) * Grad_perp(field) / ccoef + acoef * field; + apply_flat_boundary(bcoef); + return bcoef; +} + +Field3D generate_f1(const Mesh& mesh); +Field3D generate_a1(const Mesh& mesh); +Field3D generate_c1(const Mesh& mesh); +Field3D generate_d1(const Mesh& mesh); + +Field3D generate_f5(const Mesh& mesh); +Field3D generate_a5(const Mesh& mesh); +Field3D generate_c5(const Mesh& mesh); +Field3D generate_d5(const Mesh& mesh); int main(int argc, char** argv) { @@ -42,829 +117,553 @@ int main(int argc, char** argv) { options = Options::getRoot()->getSection("petsc4th"); auto invert_4th = Laplacian::create(options); - // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d - Field3D f1, a1, b1, c1, d1, sol1; - BoutReal p, q; //Use to set parameters in constructing trial functions - Field3D error1, - absolute_error1; //Absolute value of relative error: abs( (f1-sol1)/f1 ) - BoutReal max_error1; //Output of test + Options dump; + // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d using bout::globals::mesh; // Only Neumann x-boundary conditions are implemented so far, so test functions should be Neumann in x and periodic in z. // Use Field3D's, but solver only works on FieldPerp slices, so only use 1 y-point - BoutReal nx = mesh->GlobalNx - 2 * mesh->xstart - 1; - BoutReal nz = mesh->GlobalNz; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////// // Test 1: Gaussian x-profiles, 2nd order Krylov - p = 0.39503274; - q = 0.20974396; - f1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-100. * pow(-p, 2)) * x - + (-p * exp(-100. * pow(-p, 2)) - - (1 - p) * exp(-100. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos(2. * PI - * (z - q)))) //make the gradients zero at both x-boundaries - ; - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-60. * pow(-p, 2)) * x - + (-p * exp(-60. * pow(-p, 2)) - - (1 - p) * exp(-60. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-60. * pow(-p, 2)) * x - + (-p * exp(-60. * pow(-p, 2)) - - (1 - p) * exp(-60. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - } + Field3D f_1 = generate_f1(*mesh); + Field3D a_1 = generate_a1(*mesh); + Field3D c_1 = generate_c1(*mesh); + Field3D d_1 = generate_d1(*mesh); - f1.applyBoundary("neumann"); - - p = 0.512547; - q = 0.30908712; - d1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - // d1(jx, jy, jz) = d1(jx+1, jy, jz); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - // d1(jx, jy, jz) = d1(jx-1, jy, jz); - } - } - } - } + mesh->communicate(f_1, a_1, c_1, d_1); - p = 0.18439023; - q = 0.401089473; - c1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - // c1(jx, jy, jz) = c1(jx+1, jy, jz); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - // c1(jx, jy, jz) = c1(jx-1, jy, jz); - } - } + const Field3D b_1 = forward_laplace(f_1, a_1, c_1, d_1); + + int test_num = 0; + check_laplace(++test_num, "PETSc 2nd order", *invert, INVERT_AC_GRAD, INVERT_AC_GRAD, + a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump); + + ///////////////////////////////////////////////// + // Test 2: Gaussian x-profiles, 4th order Krylov + + check_laplace(++test_num, "PETSc 4th order", *invert_4th, INVERT_AC_GRAD, + INVERT_AC_GRAD, a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump); + + //////////////////////////////////////////////////////////////////////////////////////// + // Test 3+4: Gaussian x-profiles, z-independent coefficients and compare with SPT method + + const Field2D a_3 = DC(a_1); + const Field2D c_3 = DC(c_1); + const Field2D d_3 = DC(d_1); + const Field3D b_3 = forward_laplace(f_1, a_3, c_3, d_3); + + check_laplace(++test_num, "with coefficients constant in z, PETSc 2nd order", *invert, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_3, c_3, d_3, b_3, f_1, mesh->ystart, + dump); + + Options* SPT_options = Options::getRoot()->getSection("SPT"); + auto invert_SPT = Laplacian::create(SPT_options); + + check_laplace(++test_num, "with coefficients constant in z, default solver", + *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_3, c_3, + d_3, b_3, f_1, mesh->ystart, dump); + + ////////////////////////////////////////////// + // Test 5: Cosine x-profiles, 2nd order Krylov + Field3D f_5 = generate_f5(*mesh); + Field3D a_5 = generate_a5(*mesh); + Field3D c_5 = generate_c5(*mesh); + Field3D d_5 = generate_d5(*mesh); + + mesh->communicate(f_5, a_5, c_5, d_5); + + const Field3D b_5 = forward_laplace(f_5, a_5, c_5, d_5); + + check_laplace(++test_num, "different profiles, PETSc 2nd order", *invert, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart, + dump); + + ////////////////////////////////////////////// + // Test 6: Cosine x-profiles, 4th order Krylov + + check_laplace(++test_num, "different profiles, PETSc 4th order", *invert_4th, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart, + dump); + + ////////////////////////////////////////////////////////////////////////////////////// + // Test 7+8: Cosine x-profiles, z-independent coefficients and compare with SPT method + + const Field2D a_7 = DC(a_5); + const Field2D c_7 = DC(c_5); + const Field2D d_7 = DC(d_5); + const Field3D b_7 = forward_laplace(f_5, a_7, c_7, d_7); + + check_laplace(++test_num, + "different profiles, with coefficients constant in z, PETSc 2nd order", + *invert, INVERT_AC_GRAD, INVERT_AC_GRAD, a_7, c_7, d_7, b_7, f_5, + mesh->ystart, dump); + + check_laplace(++test_num, + "different profiles, with coefficients constant in z, default solver", + *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_7, c_7, + d_7, b_7, f_5, mesh->ystart, dump); + + // Write and close the output file + bout::writeDefaultOutputFile(dump); + + MPI_Barrier(BoutComm::get()); // Wait for all processors to write data + } + + bout::checkForUnusedOptions(); + + BoutFinalise(); + return 0; +} + +BoutReal max_error_at_ystart(const Field3D& error) { + const auto* mesh = error.getMesh(); + BoutReal local_max_error = error(mesh->xstart, mesh->ystart, 0); + + for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { + for (int jz = 0; jz < mesh->LocalNz; jz++) { + if (local_max_error < error(jx, mesh->ystart, jz)) { + local_max_error = error(jx, mesh->ystart, jz); } } + } - p = 0.612547; - q = 0.30908712; - a1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); + BoutReal max_error = BoutNaN; + + MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get()); + + return max_error; +} + +void apply_flat_boundary(Field3D& bcoef) { + const Mesh& mesh = *bcoef.getMesh(); + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + bcoef(jx, jy, jz) = bcoef(jx + 1, jy, jz); } } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); - // a1(jx, jy, jz) = a1(jx+1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + bcoef(jx, jy, jz) = bcoef(jx - 1, jy, jz); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); - // a1(jx, jy, jz) = a1(jx-1, jy, jz); - } - } + } +} + +Field3D generate_f1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + + constexpr BoutReal p = 0.39503274; // NOLINT + constexpr BoutReal q = 0.20974396; // NOLINT + + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-100. * pow(-p, 2)) * x + + (-p * exp(-100. * pow(-p, 2)) + - (1 - p) * exp(-100. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; - checkData(f1); - checkData(a1); - checkData(c1); - checkData(d1); - - mesh->communicate(f1, a1, c1, d1); - - b1 = d1 * Delp2(f1) + Grad_perp(c1) * Grad_perp(f1) / c1 + a1 * f1; - - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b1(jx, jy, jz) = b1(jx + 1, jy, jz); - } + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-60. * pow(-p, 2)) * x + + (-p * exp(-60. * pow(-p, 2)) + - (1 - p) * exp(-60. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b1(jx, jy, jz) = b1(jx - 1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-60. * pow(-p, 2)) * x + + (-p * exp(-60. * pow(-p, 2)) + - (1 - p) * exp(-60. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } + } - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a1); - invert->setCoefC(c1); - invert->setCoefD(d1); - - checkData(b1); - - try { - sol1 = invert->solve(sliceXZ(b1, mesh->ystart)); - error1 = (f1 - sol1) / f1; - absolute_error1 = f1 - sol1; - // max_error1 = max_error_at_ystart(abs(error1)); - max_error1 = max_error_at_ystart(abs(absolute_error1)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b1): " << err.what() << endl; - max_error1 = -1; - } + checkData(result); + result.applyBoundary("neumann"); + return result; +} - output << endl << "Test 1: PETSc 2nd order" << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setGlobalFlags(INVERT_4TH_ORDER); - invert_4th->setCoefA(a1); - invert_4th->setCoefC(c1); - invert_4th->setCoefD(d1); - - try { - sol2 = invert_4th->solve(sliceXZ(b1, mesh->ystart)); - error2 = (f1 - sol2) / f1; - absolute_error2 = f1 - sol2; - // max_error2 = max_error_at_ystart(abs(error2)); - max_error2 = max_error_at_ystart(abs(absolute_error2)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b1): " << err.what() << endl; - max_error2 = -1; + constexpr BoutReal p = 0.512547; // NOLINT + constexpr BoutReal q = 0.30908712; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); + } } - - output << endl << "Test 2: PETSc 4th order" << endl; - // output<<"Time to set up is "<firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b3(jx, jy, jz) = b3(jx + 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b3(jx, jy, jz) = b3(jx - 1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); } } } + } + checkData(result); + return result; +} - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a3); - invert->setCoefC(c3); - invert->setCoefD(d3); - - try { - sol3 = invert->solve(sliceXZ(b3, mesh->ystart)); - error3 = (f1 - sol3) / f1; - absolute_error3 = f1 - sol3; - // max_error3 = max_error_at_ystart(abs(error3)); - max_error3 = max_error_at_ystart(abs(absolute_error3)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b3): " << err.what() << endl; - max_error3 = -1; - } +Field3D generate_c1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; - output << endl << "Test 3: with coefficients constant in z, PETSc 2nd order" << endl; - // output<<"Time to set up is "<getSection("SPT"); - auto invert_SPT = Laplacian::create(SPT_options); - invert_SPT->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD); - invert_SPT->setCoefA(a3); - invert_SPT->setCoefC(c3); - invert_SPT->setCoefD(d3); - - sol4 = invert_SPT->solve(sliceXZ(b3, mesh->ystart)); - error4 = (f1 - sol4) / f1; - absolute_error4 = f1 - sol4; - // max_error4 = max_error_at_ystart(abs(error4)); - max_error4 = max_error_at_ystart(abs(absolute_error4)); - - output << endl << "Test 4: with coefficients constant in z, default solver" << endl; - // output<<"Time to set up is "<xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos(2. * PI - * (z - q)))) //make the gradients zero at both x-boundaries - ; - } + constexpr BoutReal p = 0.18439023; // NOLINT + constexpr BoutReal q = 0.401089473; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } } + } - p = 0.63298589; - q = 0.889237890; - d5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + checkData(result); + return result; +} + +Field3D generate_a1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + + constexpr BoutReal p = 0.612547; // NOLINT + constexpr BoutReal q = 0.30908712; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } } + } - p = 0.160983834; - q = 0.73050121087; - c5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + checkData(result); + return result; +} + +Field3D generate_f5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.623901; // NOLINT + constexpr BoutReal q = 0.01209489; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = + 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) + - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) + - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } + } + result.applyBoundary("neumann"); + checkData(result); + return result; +} - p = 0.5378950; - q = 0.2805870; - a5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } +Field3D generate_d5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.63298589; // NOLINT + constexpr BoutReal q = 0.889237890; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = - -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = - -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } } + } + checkData(result); + return result; +} - f5.applyBoundary("neumann"); - mesh->communicate(f5, a5, c5, d5); +Field3D generate_c5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.160983834; // NOLINT + constexpr BoutReal q = 0.73050121087; // NOLINT - b5 = d5 * Delp2(f5) + Grad_perp(c5) * Grad_perp(f5) / c5 + a5 * f5; - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b5(jx, jy, jz) = b5(jx + 1, jy, jz); - } - } + Field3D result; + + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b5(jx, jy, jz) = b5(jx - 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); } } } - - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a5); - invert->setCoefC(c5); - invert->setCoefD(d5); - - try { - sol5 = invert->solve(sliceXZ(b5, mesh->ystart)); - error5 = (f5 - sol5) / f5; - absolute_error5 = f5 - sol5; - // max_error5 = max_error_at_ystart(abs(error5)); - max_error5 = max_error_at_ystart(abs(absolute_error5)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b5): " << err.what() << endl; - max_error5 = -1; - } - - output << endl << "Test 5: different profiles, PETSc 2nd order" << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setGlobalFlags(INVERT_4TH_ORDER); - invert_4th->setCoefA(a5); - invert_4th->setCoefC(c5); - invert_4th->setCoefD(d5); - - try { - sol6 = invert_4th->solve(sliceXZ(b5, mesh->ystart)); - error6 = (f5 - sol6) / f5; - absolute_error6 = f5 - sol6; - // max_error6 = max_error_at_ystart(abs(error6)); - max_error6 = max_error_at_ystart(abs(absolute_error6)); - } catch (BoutException& err) { - output - << "BoutException occured in invert->solve(b6): Laplacian inversion failed to " - "converge (probably)" - << endl; - max_error6 = -1; + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); + } + } } + } + checkData(result); + return result; +} - output << endl << "Test 6: different profiles, PETSc 4th order" << endl; - // output<<"Time to set up is "<firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b7(jx, jy, jz) = b7(jx + 1, jy, jz); - } - } +Field3D generate_a5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.5378950; // NOLINT + constexpr BoutReal q = 0.2805870; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b7(jx, jy, jz) = b7(jx - 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); } } } - - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a7); - invert->setCoefC(c7); - invert->setCoefD(d7); - - try { - sol7 = invert->solve(sliceXZ(b7, mesh->ystart)); - error7 = (f5 - sol7) / f5; - absolute_error7 = f5 - sol7; - // max_error7 = max_error_at_ystart(abs(error7)); - max_error7 = max_error_at_ystart(abs(absolute_error7)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b7): " << err.what() << endl; - max_error7 = -1; - } - - output - << endl - << "Test 7: different profiles, with coefficients constant in z, PETSc 2nd order" - << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD); - invert_SPT->setCoefA(a7); - invert_SPT->setCoefC(c7); - invert_SPT->setCoefD(d7); - - sol8 = invert_SPT->solve(sliceXZ(b7, mesh->ystart)); - error8 = (f5 - sol8) / f5; - absolute_error8 = f5 - sol8; - // max_error8 = max_error_at_ystart(abs(error8)); - max_error8 = max_error_at_ystart(abs(absolute_error8)); - - output - << endl - << "Test 8: different profiles, with coefficients constant in z, default solver" - << endl; - // output<<"Time to set up is "<xstart, mesh->ystart, 0); - - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - if (local_max_error < error(jx, mesh->ystart, jz)) { - local_max_error = error(jx, mesh->ystart, jz); + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); + } } } } - - BoutReal max_error; - - MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get()); - - return max_error; + checkData(result); + return result; } diff --git a/tests/unit/fake_parallel_mesh.hxx b/tests/unit/fake_parallel_mesh.hxx index c648bbab9c..805dcb2a0a 100644 --- a/tests/unit/fake_parallel_mesh.hxx +++ b/tests/unit/fake_parallel_mesh.hxx @@ -8,6 +8,8 @@ #include #include "../../src/mesh/impls/bout/boutmesh.hxx" +#include "bout/boundary_op.hxx" +#include "bout/boundary_region.hxx" #include "bout/boutcomm.hxx" #include "bout/coordinates.hxx" #include "bout/field2d.hxx" diff --git a/tests/unit/mesh/test_boundary_factory.cxx b/tests/unit/mesh/test_boundary_factory.cxx index 6637e73711..b552f7629e 100644 --- a/tests/unit/mesh/test_boundary_factory.cxx +++ b/tests/unit/mesh/test_boundary_factory.cxx @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include "bout/boundary_factory.hxx" +#include "bout/boundary_op.hxx" #include "bout/boundary_region.hxx" #include "test_extras.hxx" diff --git a/tests/unit/test_extras.hxx b/tests/unit/test_extras.hxx index 6f78e99fd3..700b977ac8 100644 --- a/tests/unit/test_extras.hxx +++ b/tests/unit/test_extras.hxx @@ -8,6 +8,7 @@ #include #include +#include "bout/boundary_region.hxx" #include "bout/boutcomm.hxx" #include "bout/coordinates.hxx" #include "bout/field3d.hxx" @@ -232,8 +233,9 @@ public: RangeIterator iterateBndryUpperInnerY() const override { return RangeIterator(); } void addBoundary(BoundaryRegion* region) override { boundaries.push_back(region); } std::vector getBoundaries() override { return boundaries; } - std::vector getBoundariesPar() override { - return std::vector(); + std::vector> + getBoundariesPar(BoundaryParType UNUSED(type)) override { + return std::vector>(); } BoutReal GlobalX(int jx) const override { return jx; } BoutReal GlobalY(int jy) const override { return jy; } diff --git a/tools/archiving/sdctools/sdclib/sdclib.c b/tools/archiving/sdctools/sdclib/sdclib.c index f7db255a47..7294cc0791 100644 --- a/tools/archiving/sdctools/sdclib/sdclib.c +++ b/tools/archiving/sdctools/sdclib/sdclib.c @@ -34,8 +34,6 @@ #include "sdclib.h" -//#define DEBUG - #define DEFAULT_IFRAME 10 #define DEFAULT_ORDER 4 diff --git a/tools/pylib/_boutpp_build/CMakeLists.txt b/tools/pylib/_boutpp_build/CMakeLists.txt index 6b88986a28..3be2a5d2aa 100644 --- a/tools/pylib/_boutpp_build/CMakeLists.txt +++ b/tools/pylib/_boutpp_build/CMakeLists.txt @@ -25,7 +25,7 @@ bout_python_maybe_error(${Cython_FOUND} Cython) find_package(Bash) bout_python_maybe_error(${Bash_FOUND} Bash) -execute_process(COMMAND ${Python_EXECUTABLE} -c "import jinja2" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import jinja2" RESULT_VARIABLE jinja2_FOUND) if (jinja2_FOUND EQUAL 0) # We have jinja2 - all good @@ -33,7 +33,7 @@ else() bout_python_maybe_error(OFF jinja2) endif() -execute_process(COMMAND ${Python_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])" RESULT_VARIABLE PYTHON_WORKING OUTPUT_VARIABLE PYTHON_EXT_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE @@ -73,7 +73,7 @@ foreach(file IN LISTS files) #message(FATAL_ERROR "${gen} ${src}/${file}.jinja") add_custom_command(OUTPUT ${gen} COMMAND ${CMAKE_COMMAND} -E make_directory ${tar} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python_EXECUTABLE} generate.py ${file}.jinja ${gen} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python3_EXECUTABLE} generate.py ${file}.jinja ${gen} DEPENDS ${src}/${file}.jinja DEPENDS ${src}/helper.py DEPENDS ${src}/resolve_enum_inv.pyx.jinja @@ -93,8 +93,7 @@ endforeach() add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libboutpp.cpp COMMAND ${CMAKE_COMMAND} -E copy boutpp.pyx libboutpp.pyx - COMMAND ${Python_EXECUTABLE} -m cython libboutpp.pyx --cplus -3 -X binding=True -X embedsignature=True - COMMENT "Cythonizing python interface" + COMMAND ${Python3_EXECUTABLE} -m cython libboutpp.pyx --cplus -3 -X binding=True -X embedsignature=True WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${boutpp_depends} ) @@ -120,5 +119,6 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/boutpp.py DESTINATION ${CMAKE_INSTALL_PYTHON_SITEARCH}/boutpp/ RENAME __init__.py ) + target_link_libraries(boutpp${PYTHON_EXT_SUFFIX} bout++) -target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $ ${Numpy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) +target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $ ${Numpy_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}) diff --git a/tools/pylib/_boutpp_build/bout_options.pxd b/tools/pylib/_boutpp_build/bout_options.pxd index be17608cea..365e08bcc7 100644 --- a/tools/pylib/_boutpp_build/bout_options.pxd +++ b/tools/pylib/_boutpp_build/bout_options.pxd @@ -43,6 +43,7 @@ cdef extern from "bout/options.hxx": void get(string, double&, double) void get(string, bool&, bool) void cleanCache() + void setConditionallyUsed() cdef extern from "bout/optionsreader.hxx": diff --git a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja index 12e210a5b5..8f838b864c 100644 --- a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja +++ b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja @@ -148,10 +148,10 @@ cdef extern from "bout/physicsmodel.hxx": ctypedef void (*Method)(void *param, void *user_data) cdef extern from "helper.h": cppclass PythonModel(PhysicsModel): - int rhs(double t) + int rhs(double t) except +raise_bout_py_error void pyinit() void free() - void solve() + void solve() except +raise_bout_py_error Solver * getSolver() void set_rhs_func(PythonModelCallback*) void set_init_func(PythonModelCallback*) diff --git a/tools/pylib/_boutpp_build/boutpp.pyx.jinja b/tools/pylib/_boutpp_build/boutpp.pyx.jinja index 3aeb1428eb..9aedbb291a 100644 --- a/tools/pylib/_boutpp_build/boutpp.pyx.jinja +++ b/tools/pylib/_boutpp_build/boutpp.pyx.jinja @@ -583,9 +583,9 @@ cdef class {{ field.field_type }}: {% endfor %} def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj = NULL @@ -645,9 +645,9 @@ cdef class {{ vec }}: def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj=NULL @@ -742,9 +742,9 @@ cdef class Mesh: return msh def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj and self.isSelfOwned: del self.cobj self.cobj = NULL @@ -850,9 +850,9 @@ cdef class Coordinates: {% endfor %} def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj and self.isSelfOwned: del self.cobj self.cobj = NULL @@ -931,9 +931,9 @@ cdef class FieldFactory: checkInit() cobj=< c.FieldFactory*>0 def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj != NULL: del self.cobj self.cobj = NULL @@ -965,9 +965,9 @@ cdef class PythonModelCallback: self.cobj = new c.PythonModelCallback(callback, method) def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj: del self.cobj self.cobj = NULL @@ -1037,12 +1037,12 @@ cdef class PhysicsModelBase(object): self.cmodel.set_init_func(self.callbackinit) def __dealloc__(self): - if hasattr(self, "__boutpp_dealloc"): - self.__boutpp_dealloc() + if hasattr(self, "_boutpp_dealloc"): + self._boutpp_dealloc() else: - PhysicsModelBase.__boutpp_dealloc(self) + PhysicsModelBase._boutpp_dealloc(self) - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cmodel != 0: self.cmodel.free() del self.cmodel @@ -1123,8 +1123,8 @@ class PhysicsModel(PhysicsModelBase): def __dealloc__(self): super(PhysicsModel,self).__dealloc__() - def __boutpp_dealloc(self): - super(PhysicsModel,self).__boutpp_dealloc() + def _boutpp_dealloc(self): + super(PhysicsModel,self)._boutpp_dealloc() cdef extern from "bout/bout.hxx": int BoutInitialise(int&, char **&) except +raise_bout_py_error @@ -1204,13 +1204,14 @@ def finalise(): PythonModelCallback) for obj in objects: if isinstance(obj, ourClasses): - if hasattr(obj, "__boutpp_dealloc"): - obj.__boutpp_dealloc() + if hasattr(obj, "_boutpp_dealloc"): + obj._boutpp_dealloc() else: for ourClass in ourClasses: if isinstance(obj, ourClass): - ourClass.__boutpp_dealloc(obj) - break + if hasattr(ourClass, "_boutpp_dealloc"): + ourClass._boutpp_dealloc(obj) + break del objects # Actually finalise if wasInit: @@ -1715,10 +1716,19 @@ cdef class Options: opt.get(key, ret_str, default_) return ret_str.decode() + def setConditionallyUsed(self): + """Set the attribute "conditionally used" to be true for \p options + and all its children/sections, causing `Options::getUnused` to + assume those options have been used. This is useful to ignore + options when checking for typos etc. + """ + cdef c.Options* opt = self.cobj + opt.setConditionallyUsed() + def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj = NULL