diff --git a/.ci_fedora.sh b/.ci_fedora.sh
index 452afb4b7e..b8805abb15 100755
--- a/.ci_fedora.sh
+++ b/.ci_fedora.sh
@@ -50,7 +50,7 @@ then
     cp -a /tmp/BOUT-dev /home/test/
     chown -R test /home/test
     chmod u+rwX /home/test -R
-    sudo -u test ${0/\/tmp/\/home\/test} $mpi
+    su - test -c "${0/\/tmp/\/home\/test} $mpi"
 ## If we are called as normal user, run test
 else
     . /etc/profile.d/modules.sh
diff --git a/.clang-tidy b/.clang-tidy
index 6ca5262f41..0117c20e42 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -2,7 +2,6 @@
 Checks:          'clang-diagnostic-*,clang-analyzer-*,performance-*,readability-*,bugprone-*,clang-analyzer-*,cppcoreguidelines-*,mpi-*,misc-*,-readability-magic-numbers,-cppcoreguidelines-avoid-magic-numbers,-misc-non-private-member-variables-in-classes,-clang-analyzer-optin.mpi*,-bugprone-exception-escape,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-misc-no-recursion,-bugprone-easily-swappable-parameters'
 WarningsAsErrors: ''
 HeaderFilterRegex: ''
-AnalyzeTemporaryDtors: false
 FormatStyle:     file
 CheckOptions:
 
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 42965e75e8..bdaeb3dc4f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -39,7 +39,7 @@ jobs:
         is_cron:
           - ${{ github.event_name == 'cron' }}
         config:
-          - name: "CMake, PETSc unreleased, ADIOS"
+          - name: "CMake, PETSc unreleased, ADIOS2"
             os: ubuntu-20.04
             cmake_options: "-DBUILD_SHARED_LIBS=ON
                             -DBOUT_ENABLE_METRIC_3D=ON
diff --git a/.gitignore b/.gitignore
index 7ddf9526ab..934da1c0de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,5 @@ coverage/
 /_version.txt
 /BOUT++-v*.tar.gz
 /BOUT++-v*.tar.xz
+/CMakeCache.txt
+/CMakeFiles/cmake.check_cache
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000..7830073846
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,98 @@
+###############################################################################
+# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
+# project contributors. See the COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (MIT)
+###############################################################################
+
+# DESCRIPTION:
+###############################################################################
+# General GitLab pipelines configurations for supercomputers and Linux clusters
+# at Lawrence Livermore National Laboratory (LLNL).
+# This entire pipeline is LLNL-specific
+#
+# Important note: This file is a template provided by llnl/radiuss-shared-ci.
+# Remains to set variable values, change the reference to the radiuss-shared-ci
+# repo, opt-in and out optional features. The project can then extend it with
+# additional stages.
+#
+# In addition, each project should copy over and complete:
+# - .gitlab/custom-jobs-and-variables.yml
+# - .gitlab/subscribed-pipelines.yml
+#
+# The jobs should be specified in a file local to the project,
+# - .gitlab/jobs/${CI_MACHINE}.yml
+# or generated (see LLNL/Umpire for an example).
+###############################################################################
+
+# We define the following GitLab pipeline variables:
+variables:
+##### LC GITLAB CONFIGURATION
+# Use an LLNL service user to run CI. This prevents from running pipelines as
+# an actual user.
+  LLNL_SERVICE_USER: ""
+# Use the service user workspace. Solves permission issues, stores everything
+# at the same location whoever triggers a pipeline.
+  CUSTOM_CI_BUILDS_DIR: "/usr/workspace/BOUT-GPU/gitlab-runner"
+# Tells Gitlab to recursively update the submodules when cloning the project.
+#  GIT_SUBMODULE_STRATEGY: recursive
+
+##### PROJECT VARIABLES
+# We build the projects in the CI clone directory.
+# Used in script/gitlab/build_and_test.sh script.
+# TODO: add a clean-up mechanism.
+  BUILD_ROOT: ${CI_PROJECT_DIR}
+
+##### SHARED_CI CONFIGURATION
+# Required information about GitHub repository
+  GITHUB_PROJECT_NAME: "boutproject"
+  GITHUB_PROJECT_ORG: "BOUT-dev"
+# Set the build-and-test command.
+# Nested variables are allowed and useful to customize the job command. We
+# prevent variable expansion so that you can define them at job level.
+  JOB_CMD:
+    value: "tests/gitlab/ci-tests.sh"
+    expand: false
+# Override the pattern describing branches that will skip the "draft PR filter
+# test".  Add protected branches here. See default value in
+# preliminary-ignore-draft-pr.yml.
+#  ALWAYS_RUN_PATTERN: ""
+
+# We organize the build-and-test stage with sub-pipelines. Each sub-pipeline
+# corresponds to a test batch on a given machine.
+
+# High level stages
+stages:
+  - prerequisites
+  - build-and-test
+
+# Template for jobs triggering a build-and-test sub-pipeline:
+.build-and-test:
+  stage: build-and-test
+  trigger:
+    include:
+      - local: '.gitlab/custom-jobs-and-variables.yml'
+      - project: 'radiuss/radiuss-shared-ci'
+        ref: 'v2024.07.0'
+        file: 'pipelines/${CI_MACHINE}.yml'
+      # Add your jobs
+      # you can use a local file
+      - local: '.gitlab/jobs/${CI_MACHINE}.yml'
+      # or a file generated in the previous steps
+      # - artifact: '${CI_MACHINE}-jobs.yml'
+      #   job: 'generate-job-file'
+      # (See Umpire CI setup for an example).
+    strategy: depend
+    forward:
+      pipeline_variables: true
+
+include:
+  # Sets ID tokens for every job using `default:`
+  - project: 'lc-templates/id_tokens'
+    file: 'id_tokens.yml'
+  # [Optional] checks preliminary to running the actual CI test
+  - project: 'radiuss/radiuss-shared-ci'
+    ref: 'v2024.07.0'
+    file: 'utilities/preliminary-ignore-draft-pr.yml'
+  # pipelines subscribed by the project
+  - local: '.gitlab/subscribed-pipelines.yml'
diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml
new file mode 100644
index 0000000000..5af33aee04
--- /dev/null
+++ b/.gitlab/custom-jobs-and-variables.yml
@@ -0,0 +1,62 @@
+###############################################################################
+# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
+# project contributors. See the COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (MIT)
+###############################################################################
+
+# We define the following GitLab pipeline variables:
+variables:
+# In some pipelines we create only one allocation shared among jobs in
+# order to save time and resources. This allocation has to be uniquely
+# named so that we are sure to retrieve it and avoid collisions.
+  ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID}
+
+# Ruby
+# Arguments for top level allocation
+  RUBY_SHARED_ALLOC: "--mpi=none --exclusive --reservation=ci --time=20 --nodes=1"
+# Arguments for job level allocation
+  RUBY_JOB_ALLOC: "--mpi=none --reservation=ci --nodes=1"
+# Add variables that should apply to all the jobs on a machine:
+#  RUBY_MY_VAR: "..."
+
+# Poodle
+# Arguments for top level allocation
+  POODLE_SHARED_ALLOC: "--exclusive --partition=pdebug --time=10 --nodes=1"
+# Arguments for job level allocation
+  POODLE_JOB_ALLOC: "--nodes=1"
+# Add variables that should apply to all the jobs on a machine:
+#  POODLE_MY_VAR: "..."
+
+# Corona
+# Arguments for top level allocation
+# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node.
+  CORONA_SHARED_ALLOC: "--exclusive --time-limit=15m --nodes=1"
+# Arguments for job level allocation
+  CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
+# Add variables that should apply to all the jobs on a machine:
+#  CORONA_MY_VAR: "..."
+
+# Tioga
+# Arguments for top level allocation
+# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node.
+  TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=15m --nodes=1"
+# Arguments for job level allocation
+  TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
+# Add variables that should apply to all the jobs on a machine:
+#  TIOGA_MY_VAR: "..."
+
+# Lassen uses a different job scheduler (spectrum lsf) that does not allow
+# pre-allocation the same way slurm does. Arguments for job level allocation
+  LASSEN_JOB_ALLOC: "1 -W 30 -q pci"
+# Add variables that should apply to all the jobs on a machine:
+#  LASSEN_MY_VAR: "..."
+
+
+# Configuration shared by build and test jobs specific to this project.
+# Not all configuration can be shared. Here projects can fine tune the
+# CI behavior.
+# See Umpire for an example (export junit test reports).
+.custom_job:
+  variables:
+    JOB_TEMPLATE_CANNOT_BE_EMPTY: "True"
diff --git a/.gitlab/jobs/lassen.yml b/.gitlab/jobs/lassen.yml
new file mode 100644
index 0000000000..6363dd9336
--- /dev/null
+++ b/.gitlab/jobs/lassen.yml
@@ -0,0 +1,59 @@
+###############################################################################
+# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
+# project contributors. See the COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (MIT)
+###############################################################################
+
+# We require project to define their job command using a variable (JOB_CMD).
+# In customization/gitlab-ci.yml, we encourage to define this variable as
+# non-expandable, so that project can use nested variables to configure the job
+# command. The caveat is that the reproducer here cannot capture the
+# definition of these variables in a generic fashion. By overriding the
+# following section, projects can specify the variables to define in the
+# reproducer to exactly reproduce the CI build.
+.lassen_reproducer_vars:
+  script:
+    - echo -e "Running on Lassen\n"
+
+# With GitLab CI, included files cannot be empty.
+# TODO: remove when you have at least on job defined.
+variables:
+  INCLUDED_FILE_CANNOT_BE_EMPTY: "True"
+
+###############
+# Explanations:
+###############
+# RADIUSS Shared CI provides a pipeline for each machine, where a template job
+# is provided. Each of your jobs must extend this template to be added to the
+# list of jobs running on the associated machine.
+#
+# The job template then expects you to define the "JOB_CMD" variable with the
+# one line command used to trigger the build and test of your project.
+#
+# We suggest that you set your command in such a way that you can then
+# customize it per job with variables. E.g.:
+# "./path/to/my_ci_script ${A_VARIABLE}"
+
+## Adding jobs defined by the project.
+## Note: placing the extends section first allows you to override part of the
+## shared implementation if needed (and if you know what you are doing).
+#<job-name (typically build target description)>:
+#  extends: .job_on_lassen
+#  variables:
+#    <A_VARIABLE>: "<with job specific value>"
+
+.base-job:
+  extends: .job_on_lassen
+  before_script:
+    # Update BOUT-configs in the shared directory.
+    - pushd /usr/workspace/BOUT-GPU/BOUT-configs
+    - git pull
+    - popd
+    # Create the environment.
+    - source /usr/workspace/BOUT-GPU/BOUT-configs/lassen/setup-env.sh
+  after_script:
+    - rm -rf ${CI_BUILDS_DIR} ${CI_PROJECT_DIR}
+
+build-test-cuda-minimal:
+  extends: .base-job
\ No newline at end of file
diff --git a/.gitlab/subscribed-pipelines.yml b/.gitlab/subscribed-pipelines.yml
new file mode 100644
index 0000000000..265a344ba8
--- /dev/null
+++ b/.gitlab/subscribed-pipelines.yml
@@ -0,0 +1,91 @@
+###############################################################################
+# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
+# project contributors. See the COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (MIT)
+###############################################################################
+
+# The template job to test whether a machine is up.
+# Expects CI_MACHINE defined to machine name.
+.machine-check:
+  stage: prerequisites
+  tags: [shell, oslic]
+  variables:
+    GIT_STRATEGY: none
+  script:
+    - |
+      if [[ $(jq '.[env.CI_MACHINE].total_nodes_up' /usr/global/tools/lorenz/data/loginnodeStatus) == 0 ]]
+      then
+        echo -e "\e[31mNo node available on ${CI_MACHINE}\e[0m"
+        curl --url "https://api.github.com/repos/${GITHUB_PROJECT_ORG}/${GITHUB_PROJECT_NAME}/statuses/${CI_COMMIT_SHA}" \
+             --header 'Content-Type: application/json' \
+             --header "authorization: Bearer ${GITHUB_TOKEN}" \
+             --data "{ \"state\": \"failure\", \"target_url\": \"${CI_PIPELINE_URL}\", \"description\": \"GitLab ${CI_MACHINE} down\", \"context\": \"ci/gitlab/${CI_MACHINE}\" }"
+        exit 1
+      fi
+
+###
+# Trigger a build-and-test pipeline for a machine.
+# Comment the jobs for machines you don’t need.
+###
+
+# RUBY
+#ruby-up-check:
+#  variables:
+#    CI_MACHINE: "ruby"
+#  extends: [.machine-check]
+#
+#ruby-build-and-test:
+#  variables:
+#    CI_MACHINE: "ruby"
+#  needs: [ruby-up-check]
+#  extends: [.build-and-test]
+
+## POODLE
+#poodle-up-check:
+#  variables:
+#    CI_MACHINE: "poodle"
+#  extends: [.machine-check]
+#
+#poodle-build-and-test:
+#  variables:
+#    CI_MACHINE: "poodle"
+#  needs: [poodle-up-check]
+#  extends: [.build-and-test]
+#
+## CORONA
+#corona-up-check:
+#  variables:
+#    CI_MACHINE: "corona"
+#  extends: [.machine-check]
+#
+#corona-build-and-test:
+#  variables:
+#    CI_MACHINE: "corona"
+#  needs: [corona-up-check]
+#  extends: [.build-and-test]
+#
+## TIOGA
+#tioga-up-check:
+#  variables:
+#    CI_MACHINE: "tioga"
+#  extends: [.machine-check]
+#
+#tioga-build-and-test:
+#  variables:
+#    CI_MACHINE: "tioga"
+#  needs: [tioga-up-check]
+#  extends: [.build-and-test]
+
+# LASSEN
+lassen-up-check:
+  variables:
+    CI_MACHINE: "lassen"
+  extends: [.machine-check]
+
+lassen-build-and-test:
+  variables:
+    CI_MACHINE: "lassen"
+  needs: [lassen-up-check]
+  extends: [.build-and-test]
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1c82ea4e3..f57a78a14a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -935,7 +935,7 @@ message("
    SUNDIALS support         : ${BOUT_HAS_SUNDIALS}
    HYPRE support            : ${BOUT_HAS_HYPRE}
    NetCDF support           : ${BOUT_HAS_NETCDF}
-   ADIOS support            : ${BOUT_HAS_ADIOS}
+   ADIOS2 support           : ${BOUT_HAS_ADIOS2}
    FFTW support             : ${BOUT_HAS_FFTW}
    LAPACK support           : ${BOUT_HAS_LAPACK}
    OpenMP support           : ${BOUT_USE_OPENMP}
diff --git a/bin/bout-build-deps.sh b/bin/bout-build-deps.sh
index 19e3b2a0d3..d96d500dc9 100755
--- a/bin/bout-build-deps.sh
+++ b/bin/bout-build-deps.sh
@@ -98,7 +98,7 @@ netcdf() {
 
 nccxx() {
     cd $BUILD
-    wget -c ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-cxx4-$NCCXXVER.tar.gz || :
+    wget -c https://downloads.unidata.ucar.edu/netcdf-cxx/$NCCXXVER/netcdf-cxx4-$NCCXXVER.tar.gz || :
     tar -xf netcdf-cxx4-$NCCXXVER.tar.gz
     cd netcdf-cxx4-$NCCXXVER
     CPPFLAGS="-I$PREFIX/include" LDFLAGS="-L$PREFIX/lib/" ./configure --prefix=$PREFIX $NCCXXFLAGS
@@ -286,17 +286,17 @@ set -x
 ## Setup folders and links
 setup
 ## Build and install hdf5
-hdf5
+test $NO_HDF5 || hdf5
 ## Build and install netcdf
-netcdf
+test $NO_NETCDF || netcdf
 ## Build and install C++ interface for netcdf
-nccxx
+test $NO_NCXX || nccxx
 ## Build and install FFTW
-fftw
+test $NO_FFTW || fftw
 ## Build and install Sundials
-sundials
+test $NO_SUNDIALS || sundials
 ## Build and install PETSc
-petsc
+test $NO_PETSC || petsc
 ## Download BOUT++ submodules
 submod
 # Install python packages
diff --git a/bin/bout-config.in b/bin/bout-config.in
index fa19779cfe..b5a62a42eb 100755
--- a/bin/bout-config.in
+++ b/bin/bout-config.in
@@ -29,7 +29,7 @@ idlpath="@IDLCONFIGPATH@"
 pythonpath="@PYTHONCONFIGPATH@"
 
 has_netcdf="@BOUT_HAS_NETCDF@"
-has_adios="@BOUT_HAS_ADIOS@"
+has_adios2="@BOUT_HAS_ADIOS2@"
 has_legacy_netcdf="@BOUT_HAS_LEGACY_NETCDF@"
 has_pnetcdf="@BOUT_HAS_PNETCDF@"
 has_pvode="@BOUT_HAS_PVODE@"
@@ -71,18 +71,18 @@ Available values for OPTION include:
   --idl          IDL path
   --python       Python path
 
-  --has-netcdf  NetCDF file support
-  --has-adios   ADIOS file support
+  --has-netcdf   NetCDF file support
+  --has-adios2   ADIOS2 file support
   --has-legacy-netcdf  Legacy NetCDF file support
-  --has-pnetcdf Parallel NetCDF file support
-  --has-pvode   PVODE solver support
-  --has-cvode   SUNDIALS CVODE solver support
-  --has-ida     SUNDIALS IDA solver support
-  --has-lapack  LAPACK support
-  --has-petsc   PETSc support
-  --has-hypre   Hypre support
-  --has-slepc   SLEPc support
-  --has-nls     Natural Language Support
+  --has-pnetcdf  Parallel NetCDF file support
+  --has-pvode    PVODE solver support
+  --has-cvode    SUNDIALS CVODE solver support
+  --has-ida      SUNDIALS IDA solver support
+  --has-lapack   LAPACK support
+  --has-petsc    PETSc support
+  --has-hypre    Hypre support
+  --has-slepc    SLEPc support
+  --has-nls      Natural Language Support
 
   --petsc-has-sundials 
 
diff --git a/bout++Config.cmake.in b/bout++Config.cmake.in
index 3d824e455f..5af0dc43ea 100644
--- a/bout++Config.cmake.in
+++ b/bout++Config.cmake.in
@@ -15,7 +15,7 @@ set(BOUT_USE_METRIC_3D @BOUT_USE_METRIC_3D@)
 
 set(BOUT_HAS_PVODE @BOUT_HAS_PVODE@)
 set(BOUT_HAS_NETCDF @BOUT_HAS_NETCDF@)
-set(BOUT_HAS_ADIOS @BOUT_HAS_ADIOS@)
+set(BOUT_HAS_ADIOS2 @BOUT_HAS_ADIOS2@)
 set(BOUT_HAS_FFTW @BOUT_HAS_FFTW@)
 set(BOUT_HAS_LAPACK @BOUT_HAS_LAPACK@)
 set(BOUT_HAS_PETSC @BOUT_HAS_PETSC@)
diff --git a/cmake/FindCython.cmake b/cmake/FindCython.cmake
index 76f43480d9..3b98cde89e 100644
--- a/cmake/FindCython.cmake
+++ b/cmake/FindCython.cmake
@@ -10,7 +10,7 @@
 #   CYTHON_FOUND - true if Cython was found
 #   CYTHON_VERSION - Cython version
 
-execute_process(COMMAND ${Python_EXECUTABLE} -c "import cython ; print(cython.__version__)"
+execute_process(COMMAND ${Python3_EXECUTABLE} -c "import cython ; print(cython.__version__)"
   RESULT_VARIABLE _cython_runs
   OUTPUT_VARIABLE CYTHON_VERSION
   OUTPUT_STRIP_TRAILING_WHITESPACE
diff --git a/cmake/FindNumpy.cmake b/cmake/FindNumpy.cmake
index 201bc19221..b6de6e3e35 100644
--- a/cmake/FindNumpy.cmake
+++ b/cmake/FindNumpy.cmake
@@ -12,32 +12,32 @@
 #   Numpy_INCLUDE_DIR
 
 
-find_package(Python 3.6 COMPONENTS Interpreter Development)
+find_package(Python3 3.6 COMPONENTS Interpreter Development)
 
-if (NOT Python_FOUND)
+if (NOT Python3_FOUND)
   message(STATUS "Could not find numpy as python was not found. Maybe the developement package is missing?")
-  set(Numpy_FOUND ${Python_FOUND})
+  set(Numpy_FOUND ${Python3_FOUND})
   return()
 endif()
 
 if (NOT Numpy_FOUND)
-  execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.__version__)"
+  execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.__version__)"
     OUTPUT_STRIP_TRAILING_WHITESPACE
     OUTPUT_VARIABLE Numpy_VERSION
     )
-  execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.get_include())"
+  execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.get_include())"
     OUTPUT_STRIP_TRAILING_WHITESPACE
     OUTPUT_VARIABLE _numpy_include_dirs
     )
 endif()
 
 if (Numpy_DEBUG)
-  message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${PYTHON_INCLUDE_DIR}")
+  message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${Python3_INCLUDE_DIRS}")
 endif()
 
 find_path(Numpy_INCLUDE_DIR
   numpy/arrayobject.h
-  PATHS "${_numpy_include_dirs}" "${PYTHON_INCLUDE_DIR}"
+  PATHS "${_numpy_include_dirs}" "${Python3_INCLUDE_DIRS}"
   PATH_SUFFIXES numpy/core/include
   )
 
diff --git a/cmake/FindPackageMultipass.cmake b/cmake/FindPackageMultipass.cmake
index 2452096b56..99bbace448 100644
--- a/cmake/FindPackageMultipass.cmake
+++ b/cmake/FindPackageMultipass.cmake
@@ -108,7 +108,7 @@ macro (MULTIPASS_C_SOURCE_RUNS includes libraries source runs)
 endmacro (MULTIPASS_C_SOURCE_RUNS)
 
 macro (MULTIPASS_SOURCE_COMPILES includes libraries source runs language)
-  include (Check${language}SourceRuns)
+  include (Check${language}SourceCompiles)
   # This is a ridiculous hack.  CHECK_${language}_SOURCE_* thinks that if the
   # *name* of the return variable doesn't change, then the test does
   # not need to be re-run.  We keep an internal count which we
diff --git a/cmake/FindSUNDIALS.cmake b/cmake/FindSUNDIALS.cmake
index 1ecb5db429..15b266d06a 100644
--- a/cmake/FindSUNDIALS.cmake
+++ b/cmake/FindSUNDIALS.cmake
@@ -104,16 +104,8 @@ endforeach()
 
 if (SUNDIALS_INCLUDE_DIR)
   file(READ "${SUNDIALS_INCLUDE_DIR}/sundials_config.h" SUNDIALS_CONFIG_FILE)
-  string(FIND "${SUNDIALS_CONFIG_FILE}" "SUNDIALS_PACKAGE_VERSION" index)
-  if("${index}" LESS 0)
-    # Version >3
-    set(SUNDIALS_VERSION_REGEX_PATTERN
-      ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*")
-  else()
-    # Version <3
-    set(SUNDIALS_VERSION_REGEX_PATTERN
-      ".*#define SUNDIALS_PACKAGE_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*")
-  endif()
+  set(SUNDIALS_VERSION_REGEX_PATTERN
+    ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*")
   string(REGEX MATCH ${SUNDIALS_VERSION_REGEX_PATTERN} _ "${SUNDIALS_CONFIG_FILE}")
   set(SUNDIALS_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE STRING "")
   set(SUNDIALS_VERSION_MINOR ${CMAKE_MATCH_2} CACHE STRING "")
diff --git a/cmake/SetupBOUTThirdParty.cmake b/cmake/SetupBOUTThirdParty.cmake
index 53adbec92d..9c49fe6fdc 100644
--- a/cmake/SetupBOUTThirdParty.cmake
+++ b/cmake/SetupBOUTThirdParty.cmake
@@ -190,10 +190,10 @@ endif()
 message(STATUS "NetCDF support: ${BOUT_USE_NETCDF}")
 set(BOUT_HAS_NETCDF ${BOUT_USE_NETCDF})
 
-option(BOUT_USE_ADIOS "Enable support for ADIOS output" ON)
-option(BOUT_DOWNLOAD_ADIOS "Download and build ADIOS2" OFF)
-if (BOUT_USE_ADIOS)
-  if (BOUT_DOWNLOAD_ADIOS)
+option(BOUT_USE_ADIOS2 "Enable support for ADIOS output" ON)
+option(BOUT_DOWNLOAD_ADIOS2 "Download and build ADIOS2" OFF)
+if (BOUT_USE_ADIOS2)
+  if (BOUT_DOWNLOAD_ADIOS2)
     message(STATUS "Downloading and configuring ADIOS2")
     include(FetchContent)
     FetchContent_Declare(
@@ -220,12 +220,12 @@ if (BOUT_USE_ADIOS)
       find_package(MPI REQUIRED COMPONENTS C)
       target_link_libraries(bout++ PUBLIC adios2::cxx11_mpi MPI::MPI_C)
     else()
-      set(BOUT_USE_ADIOS OFF)
+      set(BOUT_USE_ADIOS2 OFF)
     endif()
   endif()
 endif()
-message(STATUS "ADIOS support: ${BOUT_USE_ADIOS}")
-set(BOUT_HAS_ADIOS ${BOUT_USE_ADIOS})
+message(STATUS "ADIOS2 support: ${BOUT_USE_ADIOS2}")
+set(BOUT_HAS_ADIOS2 ${BOUT_USE_ADIOS2})
 
 
 option(BOUT_USE_FFTW "Enable support for FFTW" ON)
@@ -281,8 +281,8 @@ if (BOUT_USE_SUNDIALS)
     include(FetchContent)
     FetchContent_Declare(
       sundials
-      GIT_REPOSITORY https://github.com/ZedThree/sundials
-      GIT_TAG        cmake-export-fixes
+      GIT_REPOSITORY https://github.com/LLNL/sundials
+      GIT_TAG        v7.0.0
       )
     # Note: These are settings for building SUNDIALS
     set(EXAMPLES_ENABLE_C OFF CACHE BOOL "" FORCE)
@@ -297,7 +297,11 @@ if (BOUT_USE_SUNDIALS)
     FetchContent_MakeAvailable(sundials)
     message(STATUS "SUNDIALS done configuring")
   else()
+    enable_language(C)
     find_package(SUNDIALS REQUIRED)
+    if (SUNDIALS_VERSION VERSION_LESS 4.0.0)
+      message(FATAL_ERROR "SUNDIALS_VERSION 4.0.0 or newer is required. Found version ${SUNDIALS_VERSION}.")
+    endif()
   endif()
   target_link_libraries(bout++ PUBLIC SUNDIALS::nvecparallel)
   target_link_libraries(bout++ PUBLIC SUNDIALS::cvode)
diff --git a/cmake_build_defines.hxx.in b/cmake_build_defines.hxx.in
index ed6e8685f6..4d63a01b7d 100644
--- a/cmake_build_defines.hxx.in
+++ b/cmake_build_defines.hxx.in
@@ -13,7 +13,7 @@
 #cmakedefine01 BOUT_HAS_IDA
 #cmakedefine01 BOUT_HAS_LAPACK
 #cmakedefine01 BOUT_HAS_NETCDF
-#cmakedefine01 BOUT_HAS_ADIOS
+#cmakedefine01 BOUT_HAS_ADIOS2
 #cmakedefine01 BOUT_HAS_PETSC
 #cmakedefine01 BOUT_HAS_PRETTY_FUNCTION
 #cmakedefine01 BOUT_HAS_PVODE
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 3849d34852..022b16e248 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -11,6 +11,7 @@ add_subdirectory(backtrace)
 add_subdirectory(blob2d)
 add_subdirectory(blob2d-outerloop)
 add_subdirectory(blob2d-laplacexz)
+add_subdirectory(boutpp)
 add_subdirectory(boundary-conditions/advection)
 add_subdirectory(conducting-wall-mode)
 add_subdirectory(conduction)
diff --git a/examples/blob2d/blob2d.cxx b/examples/blob2d/blob2d.cxx
index f41f857d46..7007bbeb77 100644
--- a/examples/blob2d/blob2d.cxx
+++ b/examples/blob2d/blob2d.cxx
@@ -25,7 +25,6 @@ class Blob2D : public PhysicsModel {
   BoutReal rho_s;   ///< Bohm gyro radius
   BoutReal Omega_i; ///< Ion cyclotron frequency
   BoutReal c_s;     ///< Bohm sound speed
-  BoutReal n0;      ///< Reference density
 
   // Constants to calculate the parameters
   BoutReal Te0; ///< Isothermal temperature [eV]
@@ -61,7 +60,6 @@ class Blob2D : public PhysicsModel {
     m_i = options["m_i"].withDefault(2 * 1.667e-27);
     m_e = options["m_e"].withDefault(9.11e-31);
 
-    n0 = options["n0"].doc("Background density in cubic m").withDefault(1e19);
     D_vort = options["D_vort"].doc("Viscous diffusion coefficient").withDefault(0.0);
     D_n = options["D_n"].doc("Density diffusion coefficient").withDefault(0.0);
 
diff --git a/examples/blob2d/delta_0.25/BOUT.inp b/examples/blob2d/delta_0.25/BOUT.inp
index 58d1e36741..841fcaf235 100644
--- a/examples/blob2d/delta_0.25/BOUT.inp
+++ b/examples/blob2d/delta_0.25/BOUT.inp
@@ -87,8 +87,6 @@ flags = 49152  # set_rhs i.e. identity matrix in boundaries
 
 Te0 = 5    # Electron Temperature (eV)
 
-n0 = 2e+18  # Background plasma density (m^-3)
-
 compressible = false  # Compressibility?
 
 boussinesq = true  # Boussinesq approximation (no perturbed n in vorticity)
diff --git a/examples/blob2d/delta_1/BOUT.inp b/examples/blob2d/delta_1/BOUT.inp
index 417911271d..39213ddd36 100644
--- a/examples/blob2d/delta_1/BOUT.inp
+++ b/examples/blob2d/delta_1/BOUT.inp
@@ -87,8 +87,6 @@ flags = 49152  # set_rhs i.e. identity matrix in boundaries
 
 Te0 = 5    # Electron Temperature (eV)
 
-n0 = 2e+18  # Background plasma density (m^-3)
-
 compressible = false  # Compressibility?
 
 boussinesq = true  # Boussinesq approximation (no perturbed n in vorticity)
diff --git a/examples/blob2d/delta_10/BOUT.inp b/examples/blob2d/delta_10/BOUT.inp
index 353c28c3b2..f4507b871b 100644
--- a/examples/blob2d/delta_10/BOUT.inp
+++ b/examples/blob2d/delta_10/BOUT.inp
@@ -87,8 +87,6 @@ flags = 49152  # set_rhs i.e. identity matrix in boundaries
 
 Te0 = 5    # Electron Temperature (eV)
 
-n0 = 2e+18  # Background plasma density (m^-3)
-
 compressible = false  # Compressibility?
 
 boussinesq = true  # Boussinesq approximation (no perturbed n in vorticity)
diff --git a/examples/boutpp/CMakeLists.txt b/examples/boutpp/CMakeLists.txt
new file mode 100644
index 0000000000..e46a7ae990
--- /dev/null
+++ b/examples/boutpp/CMakeLists.txt
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 3.13)
+
+if (NOT TARGET bout++::bout++)
+  find_package(bout++ REQUIRED)
+endif()
+
+bout_copy_file(runexample)
+bout_copy_file(blob2d.py)
+bout_copy_file(simulation.py)
+bout_copy_file(data/BOUT.inp)
diff --git a/examples/boutpp/blob2d.py b/examples/boutpp/blob2d.py
index d5f370a454..4dc8ea60ac 100755
--- a/examples/boutpp/blob2d.py
+++ b/examples/boutpp/blob2d.py
@@ -24,7 +24,7 @@ def init(self, restart):
 
         self.phiSolver = bc.Laplacian()
 
-        options = bc.Options("model")
+        options = bc.Options.root("model")
         # Temperature in eV
         Te0 = options.get("Te0", 30)
         e = options.get("e", 1.602e-19)
@@ -70,12 +70,20 @@ def init(self, restart):
 
         # /************ Create a solver for potential ********/
 
+        opts_boussinesq = bc.Options.root("phiBoussinesq")
+        opts_non_boussinesq = bc.Options.root("phiSolver")
+
         if self.boussinesq:
             # BOUT.inp section "phiBoussinesq"
-            self.phiSolver = bc.Laplacian(bc.Options("phiBoussinesq"))
+            opts_used = opts_boussinesq
+            opts_unused = opts_non_boussinesq
         else:
             # BOUT.inp section "phiSolver"
-            self.phiSolver = bc.Laplacian(bc.Options("phiSolver"))
+            opts_used = opts_non_boussinesq
+            opts_unused = opts_boussinesq
+
+        self.phiSolver = bc.Laplacian(opts_used)
+        opts_unused.setConditionallyUsed()
 
         # Starting guess for first solve (if iterative)
         self.phi = bc.create3D("0")
@@ -165,8 +173,8 @@ def ensure_blob():
 
 # settings used by the core code
 
-NOUT = 50      # number of time-steps
-TIMESTEP = 50  # time between outputs [1/wci]
+nout = 50      # number of time-steps
+timestep = 50  # time between outputs [1/wci]
 
 
 MXG = 2      # Number of X guard cells
@@ -198,8 +206,8 @@ def ensure_blob():
 
 [mesh:ddz]
 
-first = FFT
-second = FFT
+first = C2
+second = C2
 upwind = W3
 
 ###################################################
@@ -207,8 +215,8 @@ def ensure_blob():
 
 [solver]
 
-ATOL = 1.0e-10  # absolute tolerance
-RTOL = 1.0e-5   # relative tolerance
+atol = 1e-10  # absolute tolerance
+rtol = 1e-05   # relative tolerance
 mxstep = 10000  # Maximum internal steps per output
 
 ###################################################
@@ -221,22 +229,20 @@ def ensure_blob():
 
 fourth_order = true  # 4th order or 2nd order
 
-flags = 0  # inversion flags for phi
-             # 0  = Zero value
-             # 10 = Zero gradient AC inner & outer
-             # 15 = Zero gradient AC and DC
-             # 768 = Zero laplace inner & outer
+# 0  = Zero value
+# 10 = Zero gradient AC inner & outer
+# 15 = Zero gradient AC and DC
+# 768 = Zero laplace inner & outer
 
 [phiSolver:precon]  # Preconditioner (if pctype=user)
-filter     = 0.     # Must not filter solution
-flags      = 49152  # set_rhs i.e. identity matrix in boundaries
+filter = 0.0     # Must not filter solution
+flags = 49152  # set_rhs i.e. identity matrix in boundaries
 
 ###################################################
 # Electrostatic potential solver (Boussinesq)
 
 [phiBoussinesq]
 # By default type is tri (serial) or spt (parallel)
-flags = 0
 
 ##################################################
 # general settings for the model
@@ -245,14 +251,12 @@ def ensure_blob():
 
 Te0 = 5    # Electron Temperature (eV)
 
-n0 = 2e18  # Background plasma density (m^-3)
-
 compressible = false  # Compressibility?
 
 boussinesq = true  # Boussinesq approximation (no perturbed n in vorticity)
 
-D_vort = 1e-6  # Viscosity
-D_n = 1e-6    # Diffusion
+D_vort = 1e-06  # Viscosity
+D_n = 1e-06    # Diffusion
 
 R_c = 1.5  # Radius of curvature (m)
 
@@ -261,7 +265,7 @@ def ensure_blob():
 # These can be overridden for individual variables in
 # a section of that name.
 
-[All]
+[all]
 scale = 0.0 # default size of initial perturbations
 
 bndry_all = neumann # Zero-gradient on all boundaries
@@ -278,9 +282,8 @@ def ensure_blob():
 
 
 if __name__ == "__main__":
-    if "--create" in sys.argv:
-        sys.argv.remove("--create")
-        ensure_blob()
+    ensure_blob()
+
     bc.init("-d blob".split(" ") + sys.argv[1:])
 
     # Create an instance
diff --git a/examples/boutpp/data/BOUT.inp b/examples/boutpp/data/BOUT.inp
new file mode 100644
index 0000000000..d91707ec1b
--- /dev/null
+++ b/examples/boutpp/data/BOUT.inp
@@ -0,0 +1,9 @@
+nout=10
+timestep=10
+
+[mesh]
+nx=160
+ny=1
+nz=n/n
+
+MYG=0
diff --git a/examples/fci-wave-logn/boundary/BOUT.inp b/examples/fci-wave-logn/boundary/BOUT.inp
index 11e57ec47d..0632aa949b 100644
--- a/examples/fci-wave-logn/boundary/BOUT.inp
+++ b/examples/fci-wave-logn/boundary/BOUT.inp
@@ -20,7 +20,7 @@ expand_divergence = false
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -28,15 +28,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [v]
 
diff --git a/examples/fci-wave-logn/div-integrate/BOUT.inp b/examples/fci-wave-logn/div-integrate/BOUT.inp
index a37bf3e2a5..66bdbce5f2 100644
--- a/examples/fci-wave-logn/div-integrate/BOUT.inp
+++ b/examples/fci-wave-logn/div-integrate/BOUT.inp
@@ -20,7 +20,7 @@ expand_divergence = false
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -28,15 +28,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [v]
 
diff --git a/examples/fci-wave-logn/expanded/BOUT.inp b/examples/fci-wave-logn/expanded/BOUT.inp
index 3a2935c6e8..e084511d24 100644
--- a/examples/fci-wave-logn/expanded/BOUT.inp
+++ b/examples/fci-wave-logn/expanded/BOUT.inp
@@ -20,7 +20,7 @@ expand_divergence = true
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -28,15 +28,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [v]
 
diff --git a/examples/fci-wave-logn/fci-wave.cxx b/examples/fci-wave-logn/fci-wave.cxx
index 731897ad4e..2ea9048421 100644
--- a/examples/fci-wave-logn/fci-wave.cxx
+++ b/examples/fci-wave-logn/fci-wave.cxx
@@ -62,7 +62,7 @@ class FCIwave : public PhysicsModel {
 
     // Neumann boundaries simplifies parallel derivatives
     Bxyz.applyBoundary("neumann");
-    Bxyz.applyParallelBoundary("parallel_neumann");
+    Bxyz.applyParallelBoundary("parallel_neumann_o2");
     SAVE_ONCE(Bxyz);
 
     Options::getRoot()->getSection("fciwave")->get("expand_divergence", expand_divergence,
diff --git a/examples/fci-wave/div-integrate/BOUT.inp b/examples/fci-wave/div-integrate/BOUT.inp
index eb41d5f228..68f2326f52 100644
--- a/examples/fci-wave/div-integrate/BOUT.inp
+++ b/examples/fci-wave/div-integrate/BOUT.inp
@@ -21,7 +21,7 @@ log_density = false  # Evolve log(n)?
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -29,15 +29,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [v]
 
diff --git a/examples/fci-wave/div/BOUT.inp b/examples/fci-wave/div/BOUT.inp
index 70b60757eb..3f497df6c7 100644
--- a/examples/fci-wave/div/BOUT.inp
+++ b/examples/fci-wave/div/BOUT.inp
@@ -21,7 +21,7 @@ log_density = false  # Evolve log(n)?
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -29,15 +29,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [v]
 
diff --git a/examples/fci-wave/fci-wave.cxx b/examples/fci-wave/fci-wave.cxx
index 226b52c808..2fd383ed3f 100644
--- a/examples/fci-wave/fci-wave.cxx
+++ b/examples/fci-wave/fci-wave.cxx
@@ -69,7 +69,7 @@ class FCIwave : public PhysicsModel {
 
     // Neumann boundaries simplifies parallel derivatives
     Bxyz.applyBoundary("neumann");
-    Bxyz.applyParallelBoundary("parallel_neumann");
+    Bxyz.applyParallelBoundary("parallel_neumann_o2");
     SAVE_ONCE(Bxyz);
 
     SOLVE_FOR(nv);
diff --git a/examples/fci-wave/logn/BOUT.inp b/examples/fci-wave/logn/BOUT.inp
index f97d8cc891..26f8a99d63 100644
--- a/examples/fci-wave/logn/BOUT.inp
+++ b/examples/fci-wave/logn/BOUT.inp
@@ -21,7 +21,7 @@ log_density = true  # Evolve log(n)?
 background = 1e-06   # Background density
 
 [all]
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 bndry_all = neumann
 
 [n]
@@ -29,15 +29,15 @@ bndry_all = neumann
 zl = z / (2*pi)
 function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [logn]
 
 function = log(n:function)
 
-bndry_par_yup = parallel_neumann
-bndry_par_ydown = parallel_neumann
+bndry_par_yup = parallel_neumann_o2
+bndry_par_ydown = parallel_neumann_o2
 
 [nv]
 
diff --git a/examples/laplace-petsc3d/data/BOUT.inp b/examples/laplace-petsc3d/data/BOUT.inp
index 86a52c69f2..7e81d992a2 100644
--- a/examples/laplace-petsc3d/data/BOUT.inp
+++ b/examples/laplace-petsc3d/data/BOUT.inp
@@ -6,7 +6,7 @@ mz = 128
 function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.)
 bndry_xin = none
 bndry_xout = none
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [rhs]
 function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.)
@@ -22,7 +22,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.)
 [C2]
 #function = 0.
 function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [A]
 function = 0.0
@@ -46,7 +46,7 @@ transform_from_field_aligned = false
 [initial]
 bndry_xin = neumann
 bndry_xout = neumann
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [input1]
 function = mixmode(x, 1.)*mixmode(z, 2.)
diff --git a/include/bout/adios_object.hxx b/include/bout/adios_object.hxx
index 9d2f545b46..4750930373 100755
--- a/include/bout/adios_object.hxx
+++ b/include/bout/adios_object.hxx
@@ -14,7 +14,7 @@
 
 #include "bout/build_config.hxx"
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
 
 #include <adios2.h>
 #include <memory>
@@ -79,5 +79,5 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue,
 
 } // namespace bout
 
-#endif //BOUT_HAS_ADIOS
+#endif //BOUT_HAS_ADIOS2
 #endif //ADIOS_OBJECT_HXX
diff --git a/include/bout/boundary_factory.hxx b/include/bout/boundary_factory.hxx
index 9fc2d7f256..5f1f6e06a6 100644
--- a/include/bout/boundary_factory.hxx
+++ b/include/bout/boundary_factory.hxx
@@ -4,10 +4,13 @@ class BoundaryFactory;
 #ifndef BOUT_BNDRY_FACTORY_H
 #define BOUT_BNDRY_FACTORY_H
 
-#include "bout/boundary_op.hxx"
-#include "bout/boundary_region.hxx"
-#include "bout/parallel_boundary_op.hxx"
-#include "bout/parallel_boundary_region.hxx"
+class BoundaryOpBase;
+class BoundaryOpPar;
+class BoundaryOp;
+class BoundaryRegionBase;
+class BoundaryRegionPar;
+class BoundaryRegion;
+class BoundaryModifier;
 
 #include <map>
 #include <string>
diff --git a/include/bout/bout.hxx b/include/bout/bout.hxx
index d929a19c2f..09433bcc3b 100644
--- a/include/bout/bout.hxx
+++ b/include/bout/bout.hxx
@@ -34,6 +34,7 @@
 #ifndef BOUT_H
 #define BOUT_H
 
+// IWYU pragma: begin_keep, begin_export
 #include "bout/build_config.hxx"
 
 #include "bout/boutcomm.hxx"
@@ -53,6 +54,7 @@
 #include "bout/vector3d.hxx"
 #include "bout/version.hxx"
 #include "bout/where.hxx"
+// IWYU pragma: end_keep, end_export
 
 // BOUT++ main functions
 
diff --git a/include/bout/bout_enum_class.hxx b/include/bout/bout_enum_class.hxx
index f8c9e364c5..585e5b020e 100644
--- a/include/bout/bout_enum_class.hxx
+++ b/include/bout/bout_enum_class.hxx
@@ -86,7 +86,11 @@
         BOUT_ENUM_CLASS_MAP_ARGS(BOUT_STR_ENUM_CLASS, enumname, __VA_ARGS__)}; \
     auto found = fromString_map.find(s);                                       \
     if (found == fromString_map.end()) {                                       \
-      throw BoutException("Did not find enum {:s}", s);                        \
+      std::string valid_values {};                                             \
+      for (auto const& entry : fromString_map) {                               \
+        valid_values += std::string(" ") + entry.first;                        \
+      }                                                                        \
+      throw BoutException("Did not find enum {:s}. Valid values: {:s}", s, valid_values); \
     }                                                                          \
     return found->second;                                                      \
   }                                                                            \
diff --git a/include/bout/build_config.hxx b/include/bout/build_config.hxx
index c97962f7cf..08158d00e9 100644
--- a/include/bout/build_config.hxx
+++ b/include/bout/build_config.hxx
@@ -17,7 +17,7 @@ constexpr auto has_gettext = static_cast<bool>(BOUT_HAS_GETTEXT);
 constexpr auto has_lapack = static_cast<bool>(BOUT_HAS_LAPACK);
 constexpr auto has_legacy_netcdf = static_cast<bool>(BOUT_HAS_LEGACY_NETCDF);
 constexpr auto has_netcdf = static_cast<bool>(BOUT_HAS_NETCDF);
-constexpr auto has_adios = static_cast<bool>(BOUT_HAS_ADIOS);
+constexpr auto has_adios2 = static_cast<bool>(BOUT_HAS_ADIOS2);
 constexpr auto has_petsc = static_cast<bool>(BOUT_HAS_PETSC);
 constexpr auto has_hypre = static_cast<bool>(BOUT_HAS_HYPRE);
 constexpr auto has_umpire = static_cast<bool>(BOUT_HAS_UMPIRE);
diff --git a/include/bout/field_data.hxx b/include/bout/field_data.hxx
index 03b9d6759b..185dcabf2d 100644
--- a/include/bout/field_data.hxx
+++ b/include/bout/field_data.hxx
@@ -44,7 +44,8 @@ class Coordinates;
 class Mesh;
 
 #include "bout/boundary_region.hxx"
-#include "bout/parallel_boundary_region.hxx"
+class BoundaryRegionPar;
+enum class BndryLoc;
 
 #include "bout/sys/expressionparser.hxx"
 
diff --git a/include/bout/invert_laplace.hxx b/include/bout/invert_laplace.hxx
index f7b9501a81..0b416d4aab 100644
--- a/include/bout/invert_laplace.hxx
+++ b/include/bout/invert_laplace.hxx
@@ -238,6 +238,10 @@ public:
   virtual void setInnerBoundaryFlags(int f) { inner_boundary_flags = f; }
   virtual void setOuterBoundaryFlags(int f) { outer_boundary_flags = f; }
 
+  virtual int getGlobalFlags() const { return global_flags; }
+  virtual int getInnerBoundaryFlags() const { return inner_boundary_flags; }
+  virtual int getOuterBoundaryFlags() const { return outer_boundary_flags; }
+
   /// Does this solver use Field3D coefficients (true) or only their DC component (false)
   virtual bool uses3DCoefs() const { return false; }
 
@@ -308,9 +312,23 @@ protected:
   int extra_yguards_lower; ///< exclude some number of points at the lower boundary, useful for staggered grids or when boundary conditions make inversion redundant
   int extra_yguards_upper; ///< exclude some number of points at the upper boundary, useful for staggered grids or when boundary conditions make inversion redundant
 
-  int global_flags;         ///< Default flags
-  int inner_boundary_flags; ///< Flags to set inner boundary condition
-  int outer_boundary_flags; ///< Flags to set outer boundary condition
+  /// Return true if global/default \p flag is set
+  bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; }
+  /// Return true if \p flag is set for the inner boundary condition
+  bool isInnerBoundaryFlagSet(int flag) const {
+    return (inner_boundary_flags & flag) != 0;
+  }
+  /// Return true if \p flag is set for the outer boundary condition
+  bool isOuterBoundaryFlagSet(int flag) const {
+    return (outer_boundary_flags & flag) != 0;
+  }
+
+  /// Return true if \p flag is set for the inner boundary condition
+  /// and this is the first proc in X direction
+  bool isInnerBoundaryFlagSetOnFirstX(int flag) const;
+  /// Return true if \p flag is set for the outer boundary condition
+  /// and this the last proc in X direction
+  bool isOuterBoundaryFlagSetOnLastX(int flag) const;
 
   void tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomplex& b, dcomplex& c,
                    const Field2D* ccoef = nullptr, const Field2D* d = nullptr,
@@ -322,15 +340,13 @@ protected:
                    CELL_LOC loc = CELL_DEFAULT);
 
   void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy,
-                    int kz, BoutReal kwave, int flags, int inner_boundary_flags,
-                    int outer_boundary_flags, const Field2D* a, const Field2D* ccoef,
+                    int kz, BoutReal kwave, const Field2D* a, const Field2D* ccoef,
                     const Field2D* d, bool includeguards = true, bool zperiodic = true) {
-    tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, flags, inner_boundary_flags,
-                 outer_boundary_flags, a, ccoef, ccoef, d, includeguards, zperiodic);
+    tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, a, ccoef, ccoef, d, includeguards,
+                 zperiodic);
   }
   void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy,
-                    int kz, BoutReal kwave, int flags, int inner_boundary_flags,
-                    int outer_boundary_flags, const Field2D* a, const Field2D* c1coef,
+                    int kz, BoutReal kwave, const Field2D* a, const Field2D* c1coef,
                     const Field2D* c2coef, const Field2D* d, bool includeguards = true,
                     bool zperiodic = true);
   CELL_LOC location;   ///< staggered grid location of this solver
@@ -339,6 +355,10 @@ protected:
                        ///  localmesh->getCoordinates(location) once
 
 private:
+  int global_flags;         ///< Default flags
+  int inner_boundary_flags; ///< Flags to set inner boundary condition
+  int outer_boundary_flags; ///< Flags to set outer boundary condition
+
   /// Singleton instance
   static std::unique_ptr<Laplacian> instance;
   /// Name for writing performance infomation; default taken from
diff --git a/include/bout/invert_pardiv.hxx b/include/bout/invert_pardiv.hxx
index 23ea59e943..0153cc1987 100644
--- a/include/bout/invert_pardiv.hxx
+++ b/include/bout/invert_pardiv.hxx
@@ -31,11 +31,11 @@
 #ifndef INV_PARDIV_H
 #define INV_PARDIV_H
 
-#include "field2d.hxx"
-#include "field3d.hxx"
-#include "options.hxx"
-#include "unused.hxx"
+#include "bout/field2d.hxx"
+#include "bout/field3d.hxx"
 #include "bout/generic_factory.hxx"
+#include "bout/options.hxx"
+#include "bout/unused.hxx"
 
 // Pardivergence implementations
 constexpr auto PARDIVCYCLIC = "cyclic";
diff --git a/include/bout/mask.hxx b/include/bout/mask.hxx
index 4250d21105..fd90ae7345 100644
--- a/include/bout/mask.hxx
+++ b/include/bout/mask.hxx
@@ -66,6 +66,8 @@ public:
 
   inline bool& operator()(int jx, int jy, int jz) { return mask(jx, jy, jz); }
   inline const bool& operator()(int jx, int jy, int jz) const { return mask(jx, jy, jz); }
+
+  inline bool& operator[](const Ind3D& i) { return mask[i]; }
   inline const bool& operator[](const Ind3D& i) const { return mask[i]; }
 };
 
diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx
index 3bc01d3787..c80716fc12 100644
--- a/include/bout/mesh.hxx
+++ b/include/bout/mesh.hxx
@@ -55,22 +55,24 @@ class Mesh;
 #include "bout/field_data.hxx"
 #include "bout/options.hxx"
 
-#include "fieldgroup.hxx"
+#include "bout/fieldgroup.hxx"
 
-#include "bout/boundary_region.hxx"
-#include "bout/parallel_boundary_region.hxx"
+class BoundaryRegion;
+class BoundaryRegionPar;
 
-#include "sys/range.hxx" // RangeIterator
+#include "bout/sys/range.hxx" // RangeIterator
 
 #include <bout/griddata.hxx>
 
-#include "coordinates.hxx" // Coordinates class
+#include "bout/coordinates.hxx" // Coordinates class
 
 #include "bout/unused.hxx"
 
 #include "bout/generic_factory.hxx"
 #include <bout/region.hxx>
 
+#include <bout/bout_enum_class.hxx>
+
 #include <list>
 #include <map>
 #include <memory>
@@ -90,6 +92,9 @@ public:
   ReturnType create(Options* options = nullptr, GridDataSource* source = nullptr) const;
 };
 
+BOUT_ENUM_CLASS(BoundaryParType, all, xin, xout, fwd, bwd, xin_fwd, xout_fwd, xin_bwd,
+                xout_bwd, SIZE);
+
 template <class DerivedType>
 using RegisterMesh = MeshFactory::RegisterInFactory<DerivedType>;
 
@@ -485,11 +490,20 @@ public:
   /// Add a boundary region to this processor
   virtual void addBoundary(BoundaryRegion* UNUSED(bndry)) {}
 
-  /// Get all the parallel (Y) boundaries on this processor
-  virtual std::vector<BoundaryRegionPar*> getBoundariesPar() = 0;
+  /// Get the list of parallel boundary regions. The option specifies with
+  /// region to get. Default is to get all regions. All possible options are
+  /// listed at the top of this file, see BoundaryParType.
+  /// For example:
+  /// get all regions:
+  /// mesh->getBoundariesPar(Mesh::BoundaryParType::all)
+  /// get only xout:
+  /// mesh->getBoundariesPar(Mesh::BoundaryParType::xout)
+  virtual std::vector<std::shared_ptr<BoundaryRegionPar>>
+  getBoundariesPar(BoundaryParType type = BoundaryParType::all) = 0;
 
   /// Add a parallel(Y) boundary to this processor
-  virtual void addBoundaryPar(BoundaryRegionPar* UNUSED(bndry)) {}
+  virtual void addBoundaryPar(std::shared_ptr<BoundaryRegionPar> UNUSED(bndry),
+                              BoundaryParType UNUSED(type)) {}
 
   /// Branch-cut special handling (experimental)
   virtual Field3D smoothSeparatrix(const Field3D& f) { return f; }
diff --git a/include/bout/options.hxx b/include/bout/options.hxx
index 839c847289..4a32907b17 100644
--- a/include/bout/options.hxx
+++ b/include/bout/options.hxx
@@ -241,7 +241,8 @@ public:
   ///
   ///     Option option2 = option1.copy();
   ///
-  Options(const Options& other) = delete; // Use a reference or .copy() method
+  [[deprecated("Please use a reference or .copy() instead")]] Options(
+      const Options& other);
 
   /// Copy assignment must be explicit
   ///
@@ -251,7 +252,8 @@ public:
   ///
   ///     option2.value = option1.value;
   ///
-  Options& operator=(const Options& other) = delete; // Use a reference or .copy() method
+  [[deprecated("Please use a reference or .copy() instead")]] Options&
+  operator=(const Options& other); // Use a reference or .copy() method
 
   /// Make a deep copy of this Options,
   /// recursively copying children.
@@ -364,7 +366,8 @@ public:
   ///         {"long_name", "some velocity"}
   ///       });
   Options& setAttributes(
-      std::initializer_list<std::pair<std::string, Options::AttributeType>> attrs) {
+      const std::initializer_list<std::pair<std::string, Options::AttributeType>>&
+          attrs) {
     for (const auto& attr : attrs) {
       attributes[attr.first] = attr.second;
     }
@@ -978,7 +981,7 @@ namespace details {
 /// avoiding lengthy recompilation if we change it
 struct OptionsFormatterBase {
   auto parse(fmt::format_parse_context& ctx) -> fmt::format_parse_context::iterator;
-  auto format(const Options& options, fmt::format_context& ctx)
+  auto format(const Options& options, fmt::format_context& ctx) const
       -> fmt::format_context::iterator;
 
 private:
diff --git a/include/bout/options_io.hxx b/include/bout/options_io.hxx
index 4c70159514..57be8bbaae 100644
--- a/include/bout/options_io.hxx
+++ b/include/bout/options_io.hxx
@@ -111,7 +111,7 @@ public:
   static constexpr auto default_type =
 #if BOUT_HAS_NETCDF
       "netcdf";
-#elif BOUT_HAS_ADIOS
+#elif BOUT_HAS_ADIOS2
       "adios";
 #else
       "invalid";
diff --git a/include/bout/output_bout_types.hxx b/include/bout/output_bout_types.hxx
index 6b1829b088..b67762521b 100644
--- a/include/bout/output_bout_types.hxx
+++ b/include/bout/output_bout_types.hxx
@@ -34,7 +34,7 @@ struct fmt::formatter<SpecificInd<N>> {
   // Formats the point p using the parsed format specification (presentation)
   // stored in this formatter.
   template <typename FormatContext>
-  auto format(const SpecificInd<N>& ind, FormatContext& ctx) {
+  auto format(const SpecificInd<N>& ind, FormatContext& ctx) const {
     // ctx.out() is an output iterator to write to.
     if (presentation == 'c') {
       switch (N) {
diff --git a/include/bout/parallel_boundary_op.hxx b/include/bout/parallel_boundary_op.hxx
index 2bcb660802..d8620e892b 100644
--- a/include/bout/parallel_boundary_op.hxx
+++ b/include/bout/parallel_boundary_op.hxx
@@ -52,7 +52,7 @@ protected:
   BoutReal getValue(const BoundaryRegionPar& bndry, BoutReal t);
 };
 
-template <class T>
+template <class T, bool isNeumann = false>
 class BoundaryOpParTemp : public BoundaryOpPar {
 public:
   using BoundaryOpPar::BoundaryOpPar;
@@ -89,51 +89,74 @@ public:
     throw BoutException("Can't apply parallel boundary conditions to Field2D!");
   }
   void apply(Field3D& f) override { return apply(f, 0); }
+
+  void apply(Field3D& f, BoutReal t) override {
+    f.ynext(bndry->dir).allocate(); // Ensure unique before modifying
+
+    auto dy = f.getCoordinates()->dy;
+
+    for (bndry->first(); !bndry->isDone(); bndry->next()) {
+      BoutReal value = getValue(*bndry, t);
+      if (isNeumann) {
+        value *= dy[bndry->ind()];
+      }
+      static_cast<T*>(this)->apply_stencil(f, bndry, value);
+    }
+  }
 };
 
 //////////////////////////////////////////////////
 // Implementations
 
-class BoundaryOpPar_dirichlet : public BoundaryOpParTemp<BoundaryOpPar_dirichlet> {
+class BoundaryOpPar_dirichlet_o1 : public BoundaryOpParTemp<BoundaryOpPar_dirichlet_o1> {
 public:
   using BoundaryOpParTemp::BoundaryOpParTemp;
-
-  using BoundaryOpParTemp::apply;
-  void apply(Field3D& f, BoutReal t) override;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->dirichlet_o1(f, value);
+  }
 };
 
-class BoundaryOpPar_dirichlet_O3 : public BoundaryOpParTemp<BoundaryOpPar_dirichlet_O3> {
+class BoundaryOpPar_dirichlet_o2 : public BoundaryOpParTemp<BoundaryOpPar_dirichlet_o2> {
 public:
   using BoundaryOpParTemp::BoundaryOpParTemp;
-
-  using BoundaryOpParTemp::apply;
-  void apply(Field3D& f, BoutReal t) override;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->dirichlet_o2(f, value);
+  }
 };
 
-class BoundaryOpPar_dirichlet_interp
-    : public BoundaryOpParTemp<BoundaryOpPar_dirichlet_interp> {
+class BoundaryOpPar_dirichlet_o3 : public BoundaryOpParTemp<BoundaryOpPar_dirichlet_o3> {
 public:
   using BoundaryOpParTemp::BoundaryOpParTemp;
-
-  using BoundaryOpParTemp::apply;
-  void apply(Field3D& f, BoutReal t) override;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->dirichlet_o3(f, value);
+  }
 };
 
-class BoundaryOpPar_neumann : public BoundaryOpParTemp<BoundaryOpPar_neumann> {
+class BoundaryOpPar_neumann_o1
+    : public BoundaryOpParTemp<BoundaryOpPar_neumann_o1, true> {
 public:
   using BoundaryOpParTemp::BoundaryOpParTemp;
-
-  using BoundaryOpParTemp::apply;
-  void apply(Field3D& f, BoutReal t) override;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->neumann_o1(f, value);
+  }
 };
 
-class BoundaryOpPar_neumann_c2_simple
-    : public BoundaryOpParTemp<BoundaryOpPar_neumann_c2_simple> {
+class BoundaryOpPar_neumann_o2
+    : public BoundaryOpParTemp<BoundaryOpPar_neumann_o2, true> {
 public:
   using BoundaryOpParTemp::BoundaryOpParTemp;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->neumann_o2(f, value);
+  }
+};
 
-  using BoundaryOpParTemp::apply;
-  void apply(Field3D& f, BoutReal t) override;
+class BoundaryOpPar_neumann_o3
+    : public BoundaryOpParTemp<BoundaryOpPar_neumann_o3, true> {
+public:
+  using BoundaryOpParTemp::BoundaryOpParTemp;
+  static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) {
+    bndry->neumann_o3(f, value);
+  }
 };
 
 #endif // BOUT_PAR_BNDRY_OP_H
diff --git a/include/bout/parallel_boundary_region.hxx b/include/bout/parallel_boundary_region.hxx
index ea609c7b55..4d5278d00f 100644
--- a/include/bout/parallel_boundary_region.hxx
+++ b/include/bout/parallel_boundary_region.hxx
@@ -5,18 +5,54 @@
 #include "bout/bout_types.hxx"
 #include <vector>
 
+#include <bout/field3d.hxx>
+#include <bout/mesh.hxx>
+
 /**
  * Boundary region for parallel direction. This contains a vector of points that are
  * inside the boundary.
  *
  */
-class BoundaryRegionPar : public BoundaryRegionBase {
 
-  struct IndexPoint {
-    int jx;
-    int jy;
-    int jz;
-  };
+namespace parallel_stencil {
+// generated by src/mesh/parallel_boundary_stencil.cxx.py
+inline BoutReal pow(BoutReal val, int exp) {
+  // constexpr int expval = exp;
+  // static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3");
+  if (exp == 2) {
+    return val * val;
+  }
+  ASSERT3(exp == 3);
+  return val * val * val;
+}
+inline BoutReal dirichlet_o1(BoutReal UNUSED(spacing0), BoutReal value0) {
+  return value0;
+}
+inline BoutReal dirichlet_o2(BoutReal spacing0, BoutReal value0, BoutReal spacing1,
+                             BoutReal value1) {
+  return (spacing0 * value1 - spacing1 * value0) / (spacing0 - spacing1);
+}
+inline BoutReal neumann_o2(BoutReal UNUSED(spacing0), BoutReal value0, BoutReal spacing1,
+                           BoutReal value1) {
+  return -spacing1 * value0 + value1;
+}
+inline BoutReal dirichlet_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1,
+                             BoutReal value1, BoutReal spacing2, BoutReal value2) {
+  return (pow(spacing0, 2) * spacing1 * value2 - pow(spacing0, 2) * spacing2 * value1
+          - spacing0 * pow(spacing1, 2) * value2 + spacing0 * pow(spacing2, 2) * value1
+          + pow(spacing1, 2) * spacing2 * value0 - spacing1 * pow(spacing2, 2) * value0)
+         / ((spacing0 - spacing1) * (spacing0 - spacing2) * (spacing1 - spacing2));
+}
+inline BoutReal neumann_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1,
+                           BoutReal value1, BoutReal spacing2, BoutReal value2) {
+  return (2 * spacing0 * spacing1 * value2 - 2 * spacing0 * spacing2 * value1
+          + pow(spacing1, 2) * spacing2 * value0 - pow(spacing1, 2) * value2
+          - spacing1 * pow(spacing2, 2) * value0 + pow(spacing2, 2) * value1)
+         / ((spacing1 - spacing2) * (2 * spacing0 - spacing1 - spacing2));
+}
+} // namespace parallel_stencil
+
+class BoundaryRegionPar : public BoundaryRegionBase {
 
   struct RealPoint {
     BoutReal s_x;
@@ -26,13 +62,15 @@ class BoundaryRegionPar : public BoundaryRegionBase {
 
   struct Indices {
     // Indices of the boundary point
-    IndexPoint index;
+    Ind3D index;
     // Intersection with boundary in index space
     RealPoint intersection;
     // Distance to intersection
     BoutReal length;
     // Angle between field line and boundary
-    BoutReal angle;
+    // BoutReal angle;
+    // How many points we can go in the opposite direction
+    signed char valid;
   };
 
   using IndicesVec = std::vector<Indices>;
@@ -46,28 +84,122 @@ class BoundaryRegionPar : public BoundaryRegionBase {
 public:
   BoundaryRegionPar(const std::string& name, int dir, Mesh* passmesh)
       : BoundaryRegionBase(name, passmesh), dir(dir) {
+    ASSERT0(std::abs(dir) == 1);
     BoundaryRegionBase::isParallel = true;
   }
   BoundaryRegionPar(const std::string& name, BndryLoc loc, int dir, Mesh* passmesh)
       : BoundaryRegionBase(name, loc, passmesh), dir(dir) {
     BoundaryRegionBase::isParallel = true;
+    ASSERT0(std::abs(dir) == 1);
   }
 
   /// Add a point to the boundary
-  void add_point(int jx, int jy, int jz, BoutReal x, BoutReal y, BoutReal z,
-                 BoutReal length, BoutReal angle);
+  void add_point(Ind3D ind, BoutReal x, BoutReal y, BoutReal z, BoutReal length,
+                 signed char valid) {
+    bndry_points.push_back({ind, {x, y, z}, length, valid});
+  }
+  void add_point(int ix, int iy, int iz, BoutReal x, BoutReal y, BoutReal z,
+                 BoutReal length, signed char valid) {
+    bndry_points.push_back({xyz2ind(ix, iy, iz, localmesh), {x, y, z}, length, valid});
+  }
+
+  // final, so they can be inlined
+  void first() final { bndry_position = begin(bndry_points); }
+  void next() final { ++bndry_position; }
+  bool isDone() final { return (bndry_position == end(bndry_points)); }
 
-  void first() override;
-  void next() override;
-  bool isDone() override;
+  // getter
+  Ind3D ind() const { return bndry_position->index; }
+  BoutReal s_x() const { return bndry_position->intersection.s_x; }
+  BoutReal s_y() const { return bndry_position->intersection.s_y; }
+  BoutReal s_z() const { return bndry_position->intersection.s_z; }
+  BoutReal length() const { return bndry_position->length; }
+  signed char valid() const { return bndry_position->valid; }
 
-  /// Index of the point in the boundary
-  int x, y, z;
-  BoutReal s_x, s_y, s_z;
-  BoutReal length;
-  BoutReal angle;
+  // setter
+  void setValid(signed char val) { bndry_position->valid = val; }
+
+  bool contains(const BoundaryRegionPar& bndry) const {
+    return std::binary_search(
+        begin(bndry_points), end(bndry_points), *bndry.bndry_position,
+        [](const Indices& i1, const Indices& i2) { return i1.index < i2.index; });
+  }
+
+  // extrapolate a given point to the boundary
+  BoutReal extrapolate_o1(const Field3D& f) const { return f[ind()]; }
+  BoutReal extrapolate_o2(const Field3D& f) const {
+    ASSERT3(valid() >= 0);
+    if (valid() < 1) {
+      return extrapolate_o1(f);
+    }
+    return f[ind()] * (1 + length()) - f.ynext(-dir)[ind().yp(-dir)] * length();
+  }
+
+  // dirichlet boundary code
+  void dirichlet_o1(Field3D& f, BoutReal value) const {
+    f.ynext(dir)[ind().yp(dir)] = value;
+  }
+
+  void dirichlet_o2(Field3D& f, BoutReal value) const {
+    if (length() < small_value) {
+      return dirichlet_o1(f, value);
+    }
+    ynext(f) = parallel_stencil::dirichlet_o2(1, f[ind()], 1 - length(), value);
+    // ynext(f) = f[ind()] * (1 + 1/length()) + value / length();
+  }
+
+  void dirichlet_o3(Field3D& f, BoutReal value) const {
+    ASSERT3(valid() >= 0);
+    if (valid() < 1) {
+      return dirichlet_o2(f, value);
+    }
+    if (length() < small_value) {
+      ynext(f) = parallel_stencil::dirichlet_o2(2, yprev(f), 1 - length(), value);
+    } else {
+      ynext(f) =
+          parallel_stencil::dirichlet_o3(2, yprev(f), 1, f[ind()], 1 - length(), value);
+    }
+  }
+
+  // NB: value needs to be scaled by dy
+  // neumann_o1 is actually o2 if we would use an appropriate one-sided stencil.
+  // But in general we do not, and thus for normal C2 stencils, this is 1st order.
+  void neumann_o1(Field3D& f, BoutReal value) const { ynext(f) = f[ind()] + value; }
+
+  // NB: value needs to be scaled by dy
+  void neumann_o2(Field3D& f, BoutReal value) const {
+    ASSERT3(valid() >= 0);
+    if (valid() < 1) {
+      return neumann_o1(f, value);
+    }
+    ynext(f) = yprev(f) + 2 * value;
+  }
+
+  // NB: value needs to be scaled by dy
+  void neumann_o3(Field3D& f, BoutReal value) const {
+    ASSERT3(valid() >= 0);
+    if (valid() < 1) {
+      return neumann_o1(f, value);
+    }
+    ynext(f) =
+        parallel_stencil::neumann_o3(1 - length(), value, 1, f[ind()], 2, yprev(f));
+  }
 
   const int dir;
+
+private:
+  constexpr static BoutReal small_value = 1e-2;
+
+  // BoutReal get(const Field3D& f, int off)
+  const BoutReal& ynext(const Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; }
+  BoutReal& ynext(Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; }
+  const BoutReal& yprev(const Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; }
+  BoutReal& yprev(Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; }
+  static Ind3D xyz2ind(int x, int y, int z, Mesh* mesh) {
+    const int ny = mesh->LocalNy;
+    const int nz = mesh->LocalNz;
+    return Ind3D{(x * ny + y) * nz + z, ny, nz};
+  }
 };
 
 #endif //  BOUT_PAR_BNDRY_H
diff --git a/include/bout/petsclib.hxx b/include/bout/petsclib.hxx
index 35334ce773..2008671286 100644
--- a/include/bout/petsclib.hxx
+++ b/include/bout/petsclib.hxx
@@ -59,7 +59,7 @@ class Options;
 // means we _must_ `#include` this header _before_ any PETSc header!
 #define PETSC_HAVE_BROKEN_RECURSIVE_MACRO
 
-#include <petsc.h>
+#include <petsc.h> // IWYU pragma: export
 #include <petscversion.h>
 
 #include "bout/boutexception.hxx"
diff --git a/include/bout/region.hxx b/include/bout/region.hxx
index 68810324e3..bb1cf82bf1 100644
--- a/include/bout/region.hxx
+++ b/include/bout/region.hxx
@@ -116,16 +116,16 @@ class BoutMask;
 ///     }
 //
 
-#define BOUT_FOR_SERIAL(index, region)                                            \
-  for (auto block = region.getBlocks().cbegin(), end = region.getBlocks().cend(); \
-       block < end; ++block)                                                      \
+#define BOUT_FOR_SERIAL(index, region)                                                \
+  for (auto block = (region).getBlocks().cbegin(), end = (region).getBlocks().cend(); \
+       block < end; ++block)                                                          \
     for (auto index = block->first; index < block->second; ++index)
 
 #if BOUT_USE_OPENMP
-#define BOUT_FOR_OMP(index, region, omp_pragmas)                                    \
-  BOUT_OMP_PERF(omp_pragmas)                                                        \
-  for (auto block = region.getBlocks().cbegin(); block < region.getBlocks().cend(); \
-       ++block)                                                                     \
+#define BOUT_FOR_OMP(index, region, omp_pragmas)                                        \
+  BOUT_OMP_PERF(omp_pragmas)                                                            \
+  for (auto block = (region).getBlocks().cbegin(); block < (region).getBlocks().cend(); \
+       ++block)                                                                         \
     for (auto index = block->first; index < block->second; ++index)
 #else
 // No OpenMP, so fall back to slightly more efficient serial form
@@ -133,10 +133,10 @@ class BoutMask;
 #endif
 
 #define BOUT_FOR(index, region) \
-  BOUT_FOR_OMP(index, region, parallel for schedule(BOUT_OPENMP_SCHEDULE))
+  BOUT_FOR_OMP(index, (region), parallel for schedule(BOUT_OPENMP_SCHEDULE))
 
 #define BOUT_FOR_INNER(index, region) \
-  BOUT_FOR_OMP(index, region, for schedule(BOUT_OPENMP_SCHEDULE) nowait)
+  BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait)
 // NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses)
 
 enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 };
diff --git a/include/bout/single_index_ops.hxx b/include/bout/single_index_ops.hxx
index 6a9089510b..60bd78bc36 100644
--- a/include/bout/single_index_ops.hxx
+++ b/include/bout/single_index_ops.hxx
@@ -16,7 +16,7 @@ using EXEC_POL = RAJA::cuda_exec<CUDA_BLOCK_SIZE>;
 using EXEC_POL = RAJA::loop_exec;
 #endif // end BOUT_USE_CUDA
 ////-----------CUDA settings------------------------------------------------------end
-#endif
+#endif // end BOUT_HAS_RAJA
 
 // Ind3D: i.zp():
 BOUT_HOST_DEVICE inline int i_zp(const int id, const int nz) {
diff --git a/include/bout/solver.hxx b/include/bout/solver.hxx
index 896ce62965..47fef7ce73 100644
--- a/include/bout/solver.hxx
+++ b/include/bout/solver.hxx
@@ -429,6 +429,8 @@ protected:
   bool has_constraints{false};
   /// Has init been called yet?
   bool initialised{false};
+  /// If calling user RHS for the first time
+  bool first_rhs_call{true};
 
   /// Current simulation time
   BoutReal simtime{0.0};
diff --git a/include/bout/sundials_backports.hxx b/include/bout/sundials_backports.hxx
index c4f4aa59ef..4ec334f4d4 100644
--- a/include/bout/sundials_backports.hxx
+++ b/include/bout/sundials_backports.hxx
@@ -1,81 +1,74 @@
-// Backports for SUNDIALS compatibility between versions 3-6
+// Backports for SUNDIALS compatibility between versions 4-7
 //
 // These are common backports shared between the CVode, ARKode, and IDA solvers
 //
 // Copyright 2022 Peter Hill, BOUT++ Team
-// SPDX-License-Identifier: LGPLv3
+// SPDX-License-Identifier: LGPL-3.0-or-later
 
 #ifndef BOUT_SUNDIALS_BACKPORTS_H
 #define BOUT_SUNDIALS_BACKPORTS_H
 
+#include "bout/bout_types.hxx"
+
+#include <type_traits>
+
 #include <nvector/nvector_parallel.h>
 #include <sundials/sundials_config.h>
 #include <sundials/sundials_iterative.h>
+#include <sundials/sundials_nonlinearsolver.h>
 #include <sundials/sundials_types.h>
-
-#if SUNDIALS_VERSION_MAJOR >= 3
 #include <sunlinsol/sunlinsol_spgmr.h>
-#endif
-
-#if SUNDIALS_VERSION_MAJOR >= 4
-#include <sundials/sundials_nonlinearsolver.h>
 #include <sunnonlinsol/sunnonlinsol_fixedpoint.h>
 #include <sunnonlinsol/sunnonlinsol_newton.h>
-#endif
-
-#include "bout/unused.hxx"
 
-#if SUNDIALS_VERSION_MAJOR < 3
-using SUNLinearSolver = int*;
-inline void SUNLinSolFree([[maybe_unused]] SUNLinearSolver solver) {}
-using sunindextype = long int;
+#if SUNDIALS_VERSION_MAJOR >= 6
+#include <sundials/sundials_context.hpp>
 #endif
 
-#if SUNDIALS_VERSION_MAJOR < 4
-using SUNNonlinearSolver = int*;
-inline void SUNNonlinSolFree([[maybe_unused]] SUNNonlinearSolver solver) {}
+#if SUNDIALS_VERSION_MAJOR < 6
+using sundials_real_type = realtype;
+#else
+using sundials_real_type = sunrealtype;
 #endif
 
-#if SUNDIALS_VERSION_MAJOR < 6
-namespace sundials {
-struct Context {
-  Context(void* comm [[maybe_unused]]) {}
-};
-} // namespace sundials
+static_assert(std::is_same_v<BoutReal, sundials_real_type>,
+              "BOUT++ and SUNDIALS real types do not match");
 
-using SUNContext = sundials::Context;
+#define SUNDIALS_CONTROLLER_SUPPORT \
+  (SUNDIALS_VERSION_MAJOR > 6       \
+   || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 7)
+#define SUNDIALS_TABLE_BY_NAME_SUPPORT \
+  (SUNDIALS_VERSION_MAJOR > 6          \
+   || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 4)
 
+#if SUNDIALS_VERSION_MAJOR < 6
 constexpr auto SUN_PREC_RIGHT = PREC_RIGHT;
 constexpr auto SUN_PREC_LEFT = PREC_LEFT;
 constexpr auto SUN_PREC_NONE = PREC_NONE;
 
-inline N_Vector N_VNew_Parallel(MPI_Comm comm, sunindextype local_length,
-                                sunindextype global_length,
-                                [[maybe_unused]] SUNContext sunctx) {
-  return N_VNew_Parallel(comm, local_length, global_length);
-}
+namespace sundials {
+using Context = std::nullptr_t;
+} // namespace sundials
+#endif
 
-#if SUNDIALS_VERSION_MAJOR >= 3
-inline SUNLinearSolver SUNLinSol_SPGMR(N_Vector y, int pretype, int maxl,
-                                       [[maybe_unused]] SUNContext sunctx) {
-#if SUNDIALS_VERSION_MAJOR == 3
-  return SUNSPGMR(y, pretype, maxl);
+inline sundials::Context createSUNContext([[maybe_unused]] MPI_Comm& comm) {
+#if SUNDIALS_VERSION_MAJOR < 6
+  return nullptr;
+#elif SUNDIALS_VERSION_MAJOR < 7
+  return sundials::Context(static_cast<void*>(&comm));
 #else
-  return SUNLinSol_SPGMR(y, pretype, maxl);
+  return sundials::Context(comm);
 #endif
 }
-#if SUNDIALS_VERSION_MAJOR >= 4
-inline SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m,
-                                                  [[maybe_unused]] SUNContext sunctx) {
-  return SUNNonlinSol_FixedPoint(y, m);
-}
 
-inline SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y,
-                                              [[maybe_unused]] SUNContext sunctx) {
-  return SUNNonlinSol_Newton(y);
+template <typename Func, typename... Args>
+inline decltype(auto) callWithSUNContext(Func f, [[maybe_unused]] sundials::Context& ctx,
+                                         Args&&... args) {
+#if SUNDIALS_VERSION_MAJOR < 6
+  return f(std::forward<Args>(args)...);
+#else
+  return f(std::forward<Args>(args)..., ctx);
+#endif
 }
-#endif // SUNDIALS_VERSION_MAJOR >= 4
-#endif // SUNDIALS_VERSION_MAJOR >= 3
-#endif // SUNDIALS_VERSION_MAJOR < 6
 
 #endif // BOUT_SUNDIALS_BACKPORTS_H
diff --git a/include/bout/utils.hxx b/include/bout/utils.hxx
index 3172f1cf7a..b45152fbcc 100644
--- a/include/bout/utils.hxx
+++ b/include/bout/utils.hxx
@@ -362,6 +362,14 @@ public:
     return data[i.ind];
   }
 
+  T& operator[](Ind3D i) {
+    // ny and nz are private :-(
+    // ASSERT2(i.nz == n3);
+    // ASSERT2(i.ny == n2);
+    ASSERT2(0 <= i.ind && i.ind < n1 * n2 * n3);
+    return data[i.ind];
+  }
+
   Tensor& operator=(const T& val) {
     for (auto& i : data) {
       i = val;
diff --git a/manual/sphinx/conf.py b/manual/sphinx/conf.py
index 29c0985841..d27e8ab1fd 100755
--- a/manual/sphinx/conf.py
+++ b/manual/sphinx/conf.py
@@ -88,7 +88,7 @@ def __getattr__(cls, name):
         + " -DBOUT_UPDATE_GIT_SUBMODULE=OFF"
         + " -DBOUT_TESTS=OFF"
         + " -DBOUT_ALLOW_INSOURCE_BUILD=ON"
-        + f" -DPython_ROOT_DIR={pydir}"
+        + f" -DPython3_ROOT_DIR={pydir}"
         + f" -Dmpark_variant_DIR={pwd}/externalpackages/mpark.variant/"
         + f" -Dfmt_DIR={pwd}/externalpackages/fmt/"
     )
diff --git a/manual/sphinx/user_docs/adios2.rst b/manual/sphinx/user_docs/adios2.rst
index 8a6228cd3a..d8e0135c0d 100644
--- a/manual/sphinx/user_docs/adios2.rst
+++ b/manual/sphinx/user_docs/adios2.rst
@@ -11,14 +11,14 @@ Installation
 The easiest way to configure BOUT++ with ADIOS2 is to tell CMake to download and build it
 with this flag::
 
-  -DBOUT_DOWNLOAD_ADIOS=ON
+  -DBOUT_DOWNLOAD_ADIOS2=ON
 
 The ``master`` branch will be downloaded from `Github <https://github.com/ornladios/ADIOS2>`_,
 configured and built with BOUT++.
 
-Alternatively, if ADIOS is already installed then the following flags can be used::
+Alternatively, if ADIOS2 is already installed then the following flags can be used::
 
-  -DBOUT_USE_ADIOS=ON -DADIOS2_ROOT=/path/to/adios2
+  -DBOUT_USE_ADIOS2=ON -DADIOS2_ROOT=/path/to/adios2
 
 Output files
 ------------
diff --git a/manual/sphinx/user_docs/advanced_install.rst b/manual/sphinx/user_docs/advanced_install.rst
index e25be12b4b..048a26a6e3 100644
--- a/manual/sphinx/user_docs/advanced_install.rst
+++ b/manual/sphinx/user_docs/advanced_install.rst
@@ -145,13 +145,12 @@ where ``<build-directory>`` is the path to the build directory
 
 MPCDF HPC Systems
 ~~~~~~~~~~~~~~~~~
+After cloning BOUT-dev and checking out the branch you want (e.g. db-outer), run:
 .. code-block:: bash
 
-    module purge # or at least onload intel and impi and mkl
-    module load gcc/10 cmake/3.18 openmpi/4
-    # ensure python3 is >= python3.6 - skip if you have a newer python3 loaded
-    mkdir -p $HOME/bin ; test -e $HOME/bin/python3 || ln -s $(which python3.6) $HOME/bin/python3
-    BUILD=/ptmp/$USER/bout-deps bin/bout-build-deps.sh
+    module purge # or at least onload intel
+    module load gcc/13 anaconda/3/2021.11 impi/2021.9 hdf5-serial/1.12.2 mkl/2022.0 netcdf-serial/4.8.1 fftw-mpi/3.3.10
+    BUILD=/ptmp/$USER/bout-deps NO_HDF5=1 NO_NETCDF=1 NO_FFTW=1 bin/bout-build-deps.sh
 
 and follow the instructions for configuring BOUT++. To enable openMP
 for a production run use:
@@ -159,11 +158,11 @@ for a production run use:
 .. code-block:: bash
 
     module load bout-dep
-    cmake .. -DBOUT_USE_NETCDF=ON -DnetCDF_ROOT=$BOUT_DEP -DnetCDFCxx_ROOT=$BOUT_DEP \
+    cmake .. -DBOUT_USE_NETCDF=ON -DnetCDFCxx_ROOT=$BOUT_DEP \
       -DBOUT_USE_PETSC=ON -DPETSC_DIR=$BOUT_DEP \
-      -DBOUT_USE_FFTW=ON -DFFTW_ROOT=$BOUT_DEP \
+      -DBOUT_USE_FFTW=ON \
       -DBOUT_USE_SUNDIALS=ON -DSUNDIALS_ROOT=$BOUT_DEP \
-      -DBOUT_ENABLE_OPENMP=ON \
+      -DBOUT_ENABLE_OPENMP=OFF \
       -DCMAKE_BUILD_TYPE=Release
 
 
@@ -306,9 +305,10 @@ solver. Currently, BOUT++ also supports the SUNDIALS solvers CVODE, IDA
 and ARKODE which are available from
 https://computation.llnl.gov/casc/sundials/main.html.
 
-.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 5.4.0 as of
-          September 2020. It is advisable to use the highest possible
-          version
+.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 6.7.0 as of
+          January 2024. It is advisable to use the highest possible
+          version. Support for SUNDIALS versions < 4 will be removed
+          in the next release.
 
 The full installation guide is found in the downloaded ``.tar.gz``,
 but we will provide a step-by-step guide to install it and make it
diff --git a/manual/sphinx/user_docs/installing.rst b/manual/sphinx/user_docs/installing.rst
index eb155909bf..10f5d9b9f1 100644
--- a/manual/sphinx/user_docs/installing.rst
+++ b/manual/sphinx/user_docs/installing.rst
@@ -373,7 +373,7 @@ For SUNDIALS, use ``-DBOUT_DOWNLOAD_SUNDIALS=ON``. If using ``ccmake`` this opti
 may not appear initially. This automatically sets ``BOUT_USE_SUNDIALS=ON``, and
 configures SUNDIALS to use MPI.
 
-For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS=ON``. This will download and
+For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS2=ON``. This will download and
 configure `ADIOS2 <https://adios2.readthedocs.io/>`_, enabling BOUT++
 to read and write this high-performance parallel file format.
 
diff --git a/manual/sphinx/user_docs/laplacian.rst b/manual/sphinx/user_docs/laplacian.rst
index e422fa82bd..5365ba14a7 100644
--- a/manual/sphinx/user_docs/laplacian.rst
+++ b/manual/sphinx/user_docs/laplacian.rst
@@ -536,7 +536,6 @@ Fourth order approximation
            &+ c_{i-2,j} f_{i-2,j} + c_{i-2,j+1} f_{i-2,j+1} \\
            &+ c_{i-2,j+2} f_{i-2,j+2} + c_{i-1,j-2} f_{i-1,j-2} \\
            &+ c_{i-1,j-1} f_{i-1,j-1} + c_{i-1,j} f_{i-1,j} \\
-           &+ c_{i-1,j-1} f_{i-1,j-1} + c_{i-1,j} f_{i-1,j} \\
            &+ c_{i-1,j+1} f_{i-1,j+1} + c_{i-1,j+2} f_{i-1,j+2} \\
            &+ c_{i,j-2} f_{i,j-2} + c_{i,j-1} f_{i,j-1} \\
            &+ c_{i,j+1} f_{i,j+1} + c_{i,j+2} f_{i,j+2} \\
@@ -573,9 +572,9 @@ Fourth order approximation (9-point stencil)
 .. math::
 
        \texttt{ddx\_c} = \frac{-\texttt{c2}_{x+2} + 8\texttt{c2}_{x+1} -
-       8\texttt{c2}_{x-1} + \texttt{c2}_{x-1} }{ 12\texttt{c1}\text{d}x} \\
+       8\texttt{c2}_{x-1} + \texttt{c2}_{x-2} }{ 12\texttt{c1}\text{d}x} \\
        \texttt{ddz\_c} = \frac{-\texttt{c2}_{z+2} + 8\texttt{c2}_{z+1} -
-       8\texttt{c2}_{z-1} + \texttt{c2}_{z-1} }{ 12\texttt{c1}\text{d}z}
+       8\texttt{c2}_{z-1} + \texttt{c2}_{z-2} }{ 12\texttt{c1}\text{d}z}
 
 
 This gives
diff --git a/requirements.txt b/requirements.txt
index 75358b10db..dcbe5cef5c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-Jinja2>=2.11.3
-numpy>=1.14.1
-scipy>=1.0.0
-netcdf4~=1.6.0
-matplotlib>=2.0.0
+Jinja2~=3.1.0
+numpy~=2.0.0
+scipy>=1.11.0
+netcdf4~=1.7.1
+matplotlib>=3.7.0
 Cython~=3.0.0
 boututils~=0.2.1
 boutdata~=0.2.1
diff --git a/src/bout++.cxx b/src/bout++.cxx
index a83e278d9c..ff25b1163e 100644
--- a/src/bout++.cxx
+++ b/src/bout++.cxx
@@ -59,7 +59,7 @@ const char DEFAULT_DIR[] = "data";
 #include "bout/bout.hxx"
 #undef BOUT_NO_USING_NAMESPACE_BOUTGLOBALS
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
 #include "bout/adios_object.hxx"
 #endif
 
@@ -165,7 +165,7 @@ int BoutInitialise(int& argc, char**& argv) {
 
     savePIDtoFile(args.data_dir, MYPE);
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
     bout::ADIOSInit(BoutComm::get());
 #endif
 
@@ -572,7 +572,7 @@ void printCompileTimeOptions() {
   constexpr auto netcdf_flavour =
       has_netcdf ? (has_legacy_netcdf ? " (Legacy)" : " (NetCDF4)") : "";
   output_info.write(_("\tNetCDF support {}{}\n"), is_enabled(has_netcdf), netcdf_flavour);
-  output_info.write(_("\tADIOS support {}\n"), is_enabled(has_adios));
+  output_info.write(_("\tADIOS2 support {}\n"), is_enabled(has_adios2));
   output_info.write(_("\tPETSc support {}\n"), is_enabled(has_petsc));
   output_info.write(_("\tPretty function name support {}\n"),
                     is_enabled(has_pretty_function));
@@ -698,7 +698,7 @@ void addBuildFlagsToOptions(Options& options) {
   options["has_gettext"].force(bout::build::has_gettext);
   options["has_lapack"].force(bout::build::has_lapack);
   options["has_netcdf"].force(bout::build::has_netcdf);
-  options["has_adios"].force(bout::build::has_adios);
+  options["has_adios2"].force(bout::build::has_adios2);
   options["has_petsc"].force(bout::build::has_petsc);
   options["has_hypre"].force(bout::build::has_hypre);
   options["has_umpire"].force(bout::build::has_umpire);
@@ -795,7 +795,7 @@ int BoutFinalise(bool write_settings) {
   // Call HYPER_Finalize if not already called
   bout::HypreLib::cleanup();
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
   bout::ADIOSFinalize();
 #endif
 
diff --git a/src/field/field3d.cxx b/src/field/field3d.cxx
index 011353f34a..4ed9641f44 100644
--- a/src/field/field3d.cxx
+++ b/src/field/field3d.cxx
@@ -32,6 +32,8 @@
 
 #include <cmath>
 
+#include "bout/parallel_boundary_op.hxx"
+#include "bout/parallel_boundary_region.hxx"
 #include <bout/assert.hxx>
 #include <bout/boundary_factory.hxx>
 #include <bout/boundary_op.hxx>
@@ -504,7 +506,7 @@ void Field3D::applyParallelBoundary(const std::string& condition) {
   /// Loop over the mesh boundary regions
   for (const auto& reg : fieldmesh->getBoundariesPar()) {
     auto op = std::unique_ptr<BoundaryOpPar>{
-        dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg))};
+        dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg.get()))};
     op->apply(*this);
   }
 }
@@ -524,7 +526,7 @@ void Field3D::applyParallelBoundary(const std::string& region,
   for (const auto& reg : fieldmesh->getBoundariesPar()) {
     if (reg->label == region) {
       auto op = std::unique_ptr<BoundaryOpPar>{
-          dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg))};
+          dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg.get()))};
       op->apply(*this);
       break;
     }
@@ -548,9 +550,9 @@ void Field3D::applyParallelBoundary(const std::string& region,
       // BoundaryFactory can't create boundaries using Field3Ds, so get temporary
       // boundary of the right type
       auto tmp = std::unique_ptr<BoundaryOpPar>{
-          dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg))};
+          dynamic_cast<BoundaryOpPar*>(bfact->create(condition, reg.get()))};
       // then clone that with the actual argument
-      auto op = std::unique_ptr<BoundaryOpPar>{tmp->clone(reg, f)};
+      auto op = std::unique_ptr<BoundaryOpPar>{tmp->clone(reg.get(), f)};
       op->apply(*this);
       break;
     }
diff --git a/src/field/field_data.cxx b/src/field/field_data.cxx
index ee8bd97b30..529f595316 100644
--- a/src/field/field_data.cxx
+++ b/src/field/field_data.cxx
@@ -1,4 +1,6 @@
 
+#include "bout/parallel_boundary_op.hxx"
+#include "bout/parallel_boundary_region.hxx"
 #include "bout/unused.hxx"
 #include <bout/boundary_factory.hxx>
 #include <bout/field_data.hxx>
@@ -151,10 +153,9 @@ void FieldData::setBoundary(const std::string& name) {
   }
 
   /// Get the mesh boundary regions
-  std::vector<BoundaryRegionPar*> par_reg = mesh->getBoundariesPar();
   /// Loop over the mesh parallel boundary regions
   for (const auto& reg : mesh->getBoundariesPar()) {
-    auto* op = dynamic_cast<BoundaryOpPar*>(bfact->createFromOptions(name, reg));
+    auto* op = dynamic_cast<BoundaryOpPar*>(bfact->createFromOptions(name, reg.get()));
     if (op != nullptr) {
       bndry_op_par.push_back(op);
     }
diff --git a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx
index cf16240c0c..5ce4e540b7 100644
--- a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx
+++ b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx
@@ -33,11 +33,13 @@
  *
  */
 
-#include "cyclic_laplace.hxx"
-#include "bout/build_config.hxx"
+#include "bout/build_defines.hxx"
 
 #if not BOUT_USE_METRIC_3D
 
+#include "cyclic_laplace.hxx"
+#include "bout/assert.hxx"
+#include "bout/bout_types.hxx"
 #include <bout/boutexception.hxx>
 #include <bout/constants.hxx>
 #include <bout/fft.hxx>
@@ -47,7 +49,7 @@
 #include <bout/sys/timer.hxx>
 #include <bout/utils.hxx>
 
-#include "cyclic_laplace.hxx"
+#include <vector>
 
 LaplaceCyclic::LaplaceCyclic(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
                              Solver* UNUSED(solver))
@@ -120,13 +122,13 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
 
   // If the flags to assign that only one guard cell should be used is set
   int inbndry = localmesh->xstart, outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -143,9 +145,9 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -169,8 +171,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false,  // Don't include guard cells in arrays
                      false); // Z domain not periodic
       }
@@ -218,9 +219,9 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0[ix], localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -241,8 +242,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -275,7 +275,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       // ZFFT routine expects input of this length
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2 + 1);
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ix = xs; ix <= xe; ix++) {
@@ -316,13 +316,13 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
 
   // If the flags to assign that only one guard cell should be used is set
   int inbndry = localmesh->xstart, outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -350,6 +350,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
   const int nsys = nmode * ny;  // Number of systems of equations to solve
   const int nxny = nx * ny;     // Number of points in X-Y
 
+  // This is just to silence static analysis
+  ASSERT0(ny > 0);
+
   auto a3D = Matrix<dcomplex>(nsys, nx);
   auto b3D = Matrix<dcomplex>(nsys, nx);
   auto c3D = Matrix<dcomplex>(nsys, nx);
@@ -374,10 +377,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -405,8 +407,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false,  // Don't include guard cells in arrays
                      false); // Z domain not periodic
       }
@@ -462,10 +463,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -490,8 +490,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -502,9 +501,8 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
 
     if (localmesh->periodicX) {
       // Subtract X average of kz=0 mode
-      BoutReal local[ny + 1];
+      std::vector<BoutReal> local(ny + 1, 0.0);
       for (int y = 0; y < ny; y++) {
-        local[y] = 0.0;
         for (int ix = xs; ix <= xe; ix++) {
           local[y] += xcmplx3D(y * nmode, ix - xs).real();
         }
@@ -512,8 +510,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
       local[ny] = static_cast<BoutReal>(xe - xs + 1);
 
       // Global reduce
-      BoutReal global[ny + 1];
-      MPI_Allreduce(local, global, ny + 1, MPI_DOUBLE, MPI_SUM, localmesh->getXcomm());
+      std::vector<BoutReal> global(ny + 1, 0.0);
+      MPI_Allreduce(local.data(), global.data(), ny + 1, MPI_DOUBLE, MPI_SUM,
+                    localmesh->getXcomm());
       // Subtract average from kz=0 modes
       for (int y = 0; y < ny; y++) {
         BoutReal avg = global[y] / global[ny];
@@ -530,7 +529,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) {
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2
                                  + 1); // ZFFT routine expects input of this length
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y
diff --git a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx
index c74e184be3..d789e5e408 100644
--- a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx
+++ b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx
@@ -99,7 +99,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
 
   // Set up boundary conditions in operator
   BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) {
-    if (inner_boundary_flags & INVERT_AC_GRAD) {
+    if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann on inner X boundary
       operator3D(i, i) = -1. / coords->dx[i] / sqrt(coords->g_11[i]);
       operator3D(i, i.xp()) = 1. / coords->dx[i] / sqrt(coords->g_11[i]);
@@ -111,7 +111,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   }
 
   BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) {
-    if (outer_boundary_flags & INVERT_AC_GRAD) {
+    if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann on outer X boundary
       operator3D(i, i) = 1. / coords->dx[i] / sqrt(coords->g_11[i]);
       operator3D(i, i.xm()) = -1. / coords->dx[i] / sqrt(coords->g_11[i]);
@@ -180,9 +180,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) {
   // Adjust vectors to represent boundary conditions and check that
   // boundary cells are finite
   BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) {
-    const BoutReal val = (inner_boundary_flags & INVERT_SET) ? x0[i] : 0.;
+    const BoutReal val = isInnerBoundaryFlagSet(INVERT_SET) ? x0[i] : 0.;
     ASSERT1(std::isfinite(val));
-    if (!(inner_boundary_flags & INVERT_RHS)) {
+    if (!(isInnerBoundaryFlagSet(INVERT_RHS))) {
       b[i] = val;
     } else {
       ASSERT1(std::isfinite(b[i]));
@@ -190,9 +190,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) {
   }
 
   BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) {
-    const BoutReal val = (outer_boundary_flags & INVERT_SET) ? x0[i] : 0.;
+    const BoutReal val = (isOuterBoundaryFlagSet(INVERT_SET)) ? x0[i] : 0.;
     ASSERT1(std::isfinite(val));
-    if (!(outer_boundary_flags & INVERT_RHS)) {
+    if (!(isOuterBoundaryFlagSet(INVERT_RHS))) {
       b[i] = val;
     } else {
       ASSERT1(std::isfinite(b[i]));
diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx
index b09b67611b..f79463769a 100644
--- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx
+++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx
@@ -293,10 +293,8 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) {
    */
   auto bcmplx = Matrix<dcomplex>(nmode, ncx);
 
-  const bool invert_inner_boundary =
-      isInnerBoundaryFlagSet(INVERT_SET) and localmesh->firstX();
-  const bool invert_outer_boundary =
-      isOuterBoundaryFlagSet(INVERT_SET) and localmesh->lastX();
+  const bool invert_inner_boundary = isInnerBoundaryFlagSetOnFirstX(INVERT_SET);
+  const bool invert_outer_boundary = isOuterBoundaryFlagSetOnLastX(INVERT_SET);
 
   BOUT_OMP_PERF(parallel for)
   for (int ix = 0; ix < ncx; ix++) {
@@ -345,8 +343,7 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) {
                  kz,
                  // wave number (different from kz only if we are taking a part
                  // of the z-domain [and not from 0 to 2*pi])
-                 kz * kwaveFactor, global_flags, inner_boundary_flags,
-                 outer_boundary_flags, &A, &C, &D);
+                 kz * kwaveFactor, &A, &C, &D);
 
     // Patch up internal boundaries
     if (not localmesh->lastX()) {
diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx
index 1c6bb7a02e..02e3eca06c 100644
--- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx
+++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx
@@ -234,14 +234,6 @@ private:
 
   /// First and last interior points xstart, xend
   int xs, xe;
-
-  bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; }
-  bool isInnerBoundaryFlagSet(int flag) const {
-    return (inner_boundary_flags & flag) != 0;
-  }
-  bool isOuterBoundaryFlagSet(int flag) const {
-    return (outer_boundary_flags & flag) != 0;
-  }
 };
 
 #endif // BOUT_USE_METRIC_3D
diff --git a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx
index 82273ee7ad..c5076cd499 100644
--- a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx
+++ b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx
@@ -84,19 +84,18 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_
   // Initialize, allocate memory, etc.
   comms_tagbase = 385; // Some random number
 
-  int implemented_global_flags = INVERT_START_NEW;
-  if (global_flags & ~implemented_global_flags) {
+  constexpr int implemented_global_flags = INVERT_START_NEW;
+  if (isGlobalFlagSet(~implemented_global_flags)) {
     throw BoutException("Attempted to set Laplacian inversion flag that is not "
                         "implemented in LaplaceMultigrid.");
   }
-  int implemented_boundary_flags =
-      INVERT_AC_GRAD + INVERT_SET
-      + INVERT_DC_GRAD; // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions
-  if (inner_boundary_flags & ~implemented_boundary_flags) {
+  // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions
+  constexpr int implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_DC_GRAD;
+  if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) {
     throw BoutException("Attempted to set Laplacian inner boundary inversion flag that "
                         "is not implemented in LaplaceMultigrid.");
   }
-  if (outer_boundary_flags & ~implemented_boundary_flags) {
+  if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) {
     throw BoutException("Attempted to set Laplacian outer boundary inversion flag that "
                         "is not implemented in LaplaceMultigrid.");
   }
@@ -242,7 +241,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
   int lz2 = lzz + 2;
   int lxx = kMG->lnx[level];
 
-  if (global_flags & INVERT_START_NEW) {
+  if (isGlobalFlagSet(INVERT_START_NEW)) {
     // set initial guess to zero
     BOUT_OMP_PERF(parallel default(shared))
     BOUT_OMP_PERF(for collapse(2))
@@ -276,9 +275,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
   }
 
   if (localmesh->firstX()) {
-    if (inner_boundary_flags & INVERT_AC_GRAD) {
+    if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify gradient to set at inner boundary
         BOUT_OMP_PERF(parallel default(shared))
         BOUT_OMP_PERF(for)
@@ -299,7 +298,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
       }
     } else {
       // Dirichlet boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify value to set at inner boundary
         BOUT_OMP_PERF(parallel default(shared))
         BOUT_OMP_PERF(for)
@@ -320,9 +319,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
     }
   }
   if (localmesh->lastX()) {
-    if (outer_boundary_flags & INVERT_AC_GRAD) {
+    if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify gradient to set at outer boundary
         BOUT_OMP_PERF(parallel default(shared))
         BOUT_OMP_PERF(for)
@@ -344,7 +343,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
       }
     } else {
       // Dirichlet boundary condition
-      if (outer_boundary_flags & INVERT_SET) {
+      if (isOuterBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify value to set at outer boundary
         BOUT_OMP_PERF(parallel default(shared))
         BOUT_OMP_PERF(for)
@@ -477,9 +476,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
     }
   }
   if (localmesh->firstX()) {
-    if (inner_boundary_flags & INVERT_AC_GRAD) {
+    if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify gradient to set at inner boundary
         int i2 = -1 + localmesh->xstart;
         BOUT_OMP_PERF(parallel default(shared))
@@ -503,7 +502,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
       }
     } else {
       // Dirichlet boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify value to set at inner boundary
         int i2 = -1 + localmesh->xstart;
         BOUT_OMP_PERF(parallel default(shared))
@@ -525,9 +524,9 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
     }
   }
   if (localmesh->lastX()) {
-    if (outer_boundary_flags & INVERT_AC_GRAD) {
+    if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
-      if (inner_boundary_flags & INVERT_SET) {
+      if (isInnerBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify gradient to set at outer boundary
         int i2 = lxx + localmesh->xstart;
         BOUT_OMP_PERF(parallel default(shared))
@@ -551,7 +550,7 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) {
       }
     } else {
       // Dirichlet boundary condition
-      if (outer_boundary_flags & INVERT_SET) {
+      if (isOuterBoundaryFlagSet(INVERT_SET)) {
         // guard cells of x0 specify value to set at outer boundary
         int i2 = lxx + localmesh->xstart;
         BOUT_OMP_PERF(parallel default(shared))
@@ -651,7 +650,7 @@ void LaplaceMultigrid::generateMatrixF(int level) {
   // Here put boundary conditions
 
   if (kMG->rProcI == 0) {
-    if (inner_boundary_flags & INVERT_AC_GRAD) {
+    if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
       BOUT_OMP_PERF(parallel default(shared))
       BOUT_OMP_PERF(for)
@@ -686,7 +685,7 @@ void LaplaceMultigrid::generateMatrixF(int level) {
     }
   }
   if (kMG->rProcI == kMG->xNP - 1) {
-    if (outer_boundary_flags & INVERT_AC_GRAD) {
+    if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
       // Neumann boundary condition
       BOUT_OMP_PERF(parallel default(shared))
       BOUT_OMP_PERF(for)
diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.cxx b/src/invert/laplace/impls/naulin/naulin_laplace.cxx
index d82f874cbb..7a614c3498 100644
--- a/src/invert/laplace/impls/naulin/naulin_laplace.cxx
+++ b/src/invert/laplace/impls/naulin/naulin_laplace.cxx
@@ -164,6 +164,11 @@ LaplaceNaulin::LaplaceNaulin(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   // Get options
   OPTION(opt, rtol, 1.e-7);
   OPTION(opt, atol, 1.e-20);
+  rtol_accept =
+      (*opt)["rtol_accept"].doc("Accept this rtol after maxits").withDefault(rtol);
+  atol_accept =
+      (*opt)["atol_accept"].doc("Accept this atol after maxits").withDefault(atol);
+
   OPTION(opt, maxits, 100);
   OPTION(opt, initial_underrelax_factor, 1.);
   ASSERT0(initial_underrelax_factor > 0. and initial_underrelax_factor <= 1.);
@@ -174,9 +179,9 @@ LaplaceNaulin::LaplaceNaulin(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   // invert Delp2 and we will not converge
   ASSERT0(delp2type == "cyclic" || delp2type == "spt" || delp2type == "tri");
   // Use same flags for FFT solver as for NaulinSolver
-  delp2solver->setGlobalFlags(global_flags);
-  delp2solver->setInnerBoundaryFlags(inner_boundary_flags);
-  delp2solver->setOuterBoundaryFlags(outer_boundary_flags);
+  delp2solver->setGlobalFlags(getGlobalFlags());
+  delp2solver->setInnerBoundaryFlags(getInnerBoundaryFlags());
+  delp2solver->setOuterBoundaryFlags(getOuterBoundaryFlags());
 
   static int naulinsolver_count = 1;
   setPerformanceName(fmt::format("{}{}", "naulinsolver", ++naulinsolver_count));
@@ -258,7 +263,7 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) {
     // Note take a copy of the 'b' argument, because we want to return a copy of it in the
     // result
 
-    if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) {
+    if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) {
       // This passes in the boundary conditions from x0's guard cells
       copy_x_boundaries(x_guess, x0, localmesh);
     }
@@ -289,6 +294,10 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) {
 
     ++count;
     if (count > maxits) {
+      // Perhaps accept a worse solution
+      if (error_rel < rtol_accept or error_abs < atol_accept) {
+        break;
+      }
       throw BoutException(
           "LaplaceNaulin error: Not converged within maxits={:d} iterations.", maxits);
     }
@@ -313,6 +322,9 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) {
       // effectively another iteration, so increment the counter
       ++count;
       if (count > maxits) {
+        if (error_rel < rtol_accept or error_abs < atol_accept) {
+          break;
+        }
         throw BoutException(
             "LaplaceNaulin error: Not converged within maxits={:d} iterations.", maxits);
       }
diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.hxx b/src/invert/laplace/impls/naulin/naulin_laplace.hxx
index e464ef18e7..70bd2668ef 100644
--- a/src/invert/laplace/impls/naulin/naulin_laplace.hxx
+++ b/src/invert/laplace/impls/naulin/naulin_laplace.hxx
@@ -157,6 +157,8 @@ private:
 
   /// Solver tolerances
   BoutReal rtol, atol;
+  /// Accept these tolerances if number of iterations exceeds maxits
+  BoutReal rtol_accept, atol_accept;
 
   /// Maximum number of iterations
   int maxits;
diff --git a/src/invert/laplace/impls/pcr/pcr.cxx b/src/invert/laplace/impls/pcr/pcr.cxx
index 5c4f8da35b..48bbdbac4b 100644
--- a/src/invert/laplace/impls/pcr/pcr.cxx
+++ b/src/invert/laplace/impls/pcr/pcr.cxx
@@ -149,13 +149,13 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
   // If the flags to assign that only one guard cell should be used is set
   inbndry = localmesh->xstart;
   outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -173,10 +173,9 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -199,8 +198,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     } // BOUT_OMP_PERF(parallel)
@@ -245,10 +243,9 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0[ix], localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -269,8 +266,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     } // BOUT_OMP_PERF(parallel)
@@ -285,7 +281,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2
                                  + 1); // ZFFT routine expects input of this length
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ix = xs; ix <= xe; ix++) {
@@ -327,13 +323,13 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
   // If the flags to assign that only one guard cell should be used is set
   inbndry = localmesh->xstart;
   outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -387,10 +383,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -417,8 +412,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     } // BOUT_OMP_PERF(parallel)
@@ -472,10 +466,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -500,8 +493,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     } // BOUT_OMP_PERF(parallel)
@@ -516,7 +508,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) {
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2
                                  + 1); // ZFFT routine expects input of this length
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y
diff --git a/src/invert/laplace/impls/pcr/pcr.hxx b/src/invert/laplace/impls/pcr/pcr.hxx
index 38b7c356d3..ec4637f56c 100644
--- a/src/invert/laplace/impls/pcr/pcr.hxx
+++ b/src/invert/laplace/impls/pcr/pcr.hxx
@@ -172,14 +172,6 @@ private:
   /// First and last interior points xstart, xend
   int xs, xe;
 
-  bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; }
-  bool isInnerBoundaryFlagSet(int flag) const {
-    return (inner_boundary_flags & flag) != 0;
-  }
-  bool isOuterBoundaryFlagSet(int flag) const {
-    return (outer_boundary_flags & flag) != 0;
-  }
-
   bool dst{false};
 };
 
diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx
index 35a25779a7..61c8f58694 100644
--- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx
+++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx
@@ -145,13 +145,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
   // If the flags to assign that only one guard cell should be used is set
   int inbndry = localmesh->xstart;
   int outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -169,10 +169,9 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -195,8 +194,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -241,10 +239,9 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       for (int ix = xs; ix <= xe; ix++) {
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0[ix], localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -265,8 +262,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
         tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -281,7 +277,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) {
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2
                                  + 1); // ZFFT routine expects input of this length
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ix = xs; ix <= xe; ix++) {
@@ -323,13 +319,13 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
   // If the flags to assign that only one guard cell should be used is set
   int inbndry = localmesh->xstart;
   int outbndry = localmesh->xstart;
-  if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -383,10 +379,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take DST in Z direction and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d));
         } else {
@@ -413,8 +408,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // wave number index
                      kwave, // kwave (inverse wave length)
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -468,10 +462,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
 
         // Take FFT in Z direction, apply shift, and put result in k1d
 
-        if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0)
-             && localmesh->firstX())
+        if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
             || ((localmesh->LocalNx - ix - 1 < outbndry)
-                && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) {
+                && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) {
           // Use the values in x0 in the boundary
           rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d));
         } else {
@@ -497,8 +490,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
         tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy,
                      kz,    // True for the component constant (DC) in Z
                      kwave, // Z wave number
-                     global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef,
-                     &C1coef, &C2coef, &Dcoef,
+                     &Acoef, &C1coef, &C2coef, &Dcoef,
                      false); // Don't include guard cells in arrays
       }
     }
@@ -513,7 +505,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) {
       auto k1d = Array<dcomplex>((localmesh->LocalNz) / 2
                                  + 1); // ZFFT routine expects input of this length
 
-      const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0;
+      const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC);
 
       BOUT_OMP_PERF(for nowait)
       for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y
diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx
index 009a1def2b..e12a647789 100644
--- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx
+++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx
@@ -175,14 +175,6 @@ private:
   /// First and last interior points xstart, xend
   int xs, xe;
 
-  bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; }
-  bool isInnerBoundaryFlagSet(int flag) const {
-    return (inner_boundary_flags & flag) != 0;
-  }
-  bool isOuterBoundaryFlagSet(int flag) const {
-    return (outer_boundary_flags & flag) != 0;
-  }
-
   bool dst{false};
 };
 
diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.cxx b/src/invert/laplace/impls/petsc/petsc_laplace.cxx
index d125b90694..f06f4c7de6 100644
--- a/src/invert/laplace/impls/petsc/petsc_laplace.cxx
+++ b/src/invert/laplace/impls/petsc/petsc_laplace.cxx
@@ -23,7 +23,8 @@
  * along with BOUT++.  If not, see <http://www.gnu.org/licenses/>.
  *
  **************************************************************************/
-#include "bout/build_config.hxx"
+
+#include "bout/build_defines.hxx"
 
 #if BOUT_HAS_PETSC
 
@@ -32,6 +33,8 @@
 #include <bout/assert.hxx>
 #include <bout/boutcomm.hxx>
 #include <bout/mesh.hxx>
+#include <bout/output.hxx>
+#include <bout/petsclib.hxx>
 #include <bout/sys/timer.hxx>
 #include <bout/utils.hxx>
 
@@ -49,14 +52,13 @@
 #define KSP_PREONLY "preonly"
 
 static PetscErrorCode laplacePCapply(PC pc, Vec x, Vec y) {
-  int ierr;
+  PetscFunctionBegin; // NOLINT
 
-  // Get the context
-  LaplacePetsc* s;
-  ierr = PCShellGetContext(pc, reinterpret_cast<void**>(&s));
+  LaplacePetsc* laplace = nullptr;
+  const int ierr = PCShellGetContext(pc, reinterpret_cast<void**>(&laplace)); // NOLINT
   CHKERRQ(ierr);
 
-  PetscFunctionReturn(s->precon(x, y));
+  PetscFunctionReturn(laplace->precon(x, y)); // NOLINT
 }
 
 LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
@@ -79,28 +81,9 @@ LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   }
 
 #if CHECK > 0
-  // These are the implemented flags
-  implemented_flags = INVERT_START_NEW;
-  implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_RHS;
   // Checking flags are set to something which is not implemented
-  // This is done binary (which is possible as each flag is a power of 2)
-  if (global_flags & ~implemented_flags) {
-    if (global_flags & INVERT_4TH_ORDER) {
-      output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of "
-                "setting INVERT_4TH_ORDER flag"
-             << endl;
-    }
-    throw BoutException("Attempted to set Laplacian inversion flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
-  if (inner_boundary_flags & ~implemented_boundary_flags) {
-    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
-  if (outer_boundary_flags & ~implemented_boundary_flags) {
-    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
+  checkFlags();
+
   if (localmesh->periodicX) {
     throw BoutException("LaplacePetsc does not work with periodicity in the x direction "
                         "(localmesh->PeriodicX == true). Change boundary conditions or "
@@ -360,25 +343,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
   ASSERT1(x0.getLocation() == location);
 
 #if CHECK > 0
-  // Checking flags are set to something which is not implemented (see
-  // constructor for details)
-  if (global_flags & !implemented_flags) {
-    if (global_flags & INVERT_4TH_ORDER) {
-      output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of "
-                "setting INVERT_4TH_ORDER flag"
-             << endl;
-    }
-    throw BoutException("Attempted to set Laplacian inversion flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
-  if (inner_boundary_flags & ~implemented_boundary_flags) {
-    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
-  if (outer_boundary_flags & ~implemented_boundary_flags) {
-    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
-                        "implemented in petsc_laplace.cxx");
-  }
+  checkFlags();
 #endif
 
   int y = b.getIndex(); // Get the Y index
@@ -415,7 +380,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
         for (int z = 0; z < localmesh->LocalNz; z++) {
           PetscScalar val; // Value of element to be set in the matrix
           // If Neumann Boundary Conditions are set.
-          if (inner_boundary_flags & INVERT_AC_GRAD) {
+          if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
             // Set values corresponding to nodes adjacent in x
             if (fourth_order) {
               // Fourth Order Accuracy on Boundary
@@ -472,9 +437,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
 
           // Set Components of RHS
           // If the inner boundary value should be set by b or x0
-          if (inner_boundary_flags & INVERT_RHS) {
+          if (isInnerBoundaryFlagSet(INVERT_RHS)) {
             val = b[x][z];
-          } else if (inner_boundary_flags & INVERT_SET) {
+          } else if (isInnerBoundaryFlagSet(INVERT_SET)) {
             val = x0[x][z];
           }
 
@@ -680,7 +645,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
           Element(i, x, z, 0, 0, val, MatA);
 
           // If Neumann Boundary Conditions are set.
-          if (outer_boundary_flags & INVERT_AC_GRAD) {
+          if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
             // Set values corresponding to nodes adjacent in x
             if (fourth_order) {
               // Fourth Order Accuracy on Boundary
@@ -733,9 +698,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
           // Set Components of RHS
           // If the inner boundary value should be set by b or x0
           val = 0;
-          if (outer_boundary_flags & INVERT_RHS) {
+          if (isOuterBoundaryFlagSet(INVERT_RHS)) {
             val = b[x][z];
-          } else if (outer_boundary_flags & INVERT_SET) {
+          } else if (isOuterBoundaryFlagSet(INVERT_SET)) {
             val = x0[x][z];
           }
 
@@ -812,7 +777,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
       KSPSetTolerances(ksp, rtol, atol, dtol, maxits);
 
       // If the initial guess is not set to zero
-      if (!(global_flags & INVERT_START_NEW)) {
+      if (!isGlobalFlagSet(INVERT_START_NEW)) {
         KSPSetInitialGuessNonzero(ksp, static_cast<PetscBool>(true));
       }
 
@@ -1194,4 +1159,24 @@ int LaplacePetsc::precon(Vec x, Vec y) {
   return 0;
 }
 
+void LaplacePetsc::checkFlags() {
+  if (isGlobalFlagSet(~implemented_flags)) {
+    if (isGlobalFlagSet(INVERT_4TH_ORDER)) {
+      output_error.write(
+          "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of "
+          "setting INVERT_4TH_ORDER flag\n");
+    }
+    throw BoutException("Attempted to set Laplacian inversion flag that is not "
+                        "implemented in petsc_laplace.cxx");
+  }
+  if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) {
+    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
+                        "implemented in petsc_laplace.cxx");
+  }
+  if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) {
+    throw BoutException("Attempted to set Laplacian inversion boundary flag that is not "
+                        "implemented in petsc_laplace.cxx");
+  }
+}
+
 #endif // BOUT_HAS_PETSC_3_3
diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.hxx b/src/invert/laplace/impls/petsc/petsc_laplace.hxx
index 011f8971df..55482644be 100644
--- a/src/invert/laplace/impls/petsc/petsc_laplace.hxx
+++ b/src/invert/laplace/impls/petsc/petsc_laplace.hxx
@@ -254,10 +254,11 @@ private:
   void vecToField(Vec x, FieldPerp& f);       // Copy a vector into a fieldperp
   void fieldToVec(const FieldPerp& f, Vec x); // Copy a fieldperp into a vector
 
-#if CHECK > 0
-  int implemented_flags;
-  int implemented_boundary_flags;
-#endif
+  static constexpr int implemented_flags = INVERT_START_NEW;
+  static constexpr int implemented_boundary_flags =
+      INVERT_AC_GRAD | INVERT_SET | INVERT_RHS;
+
+  void checkFlags();
 };
 
 #endif //BOUT_HAS_PETSC
diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
index d1e2207725..a7bfd209ee 100644
--- a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
+++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
@@ -84,12 +84,12 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes
 #if CHECK > 0
   // Checking flags are set to something which is not implemented
   // This is done binary (which is possible as each flag is a power of 2)
-  if (flagSet(global_flags, INVERT_4TH_ORDER)) {
+  if (isGlobalFlagSet(INVERT_4TH_ORDER)) {
     output.write("For PETSc based Laplacian inverter, use 'fourth_order=true' instead of "
                  "setting INVERT_4TH_ORDER flag\n");
   }
 
-  if (flagSet(global_flags, ~implemented_flags)) {
+  if (isGlobalFlagSet(~implemented_flags)) {
     throw BoutException("Attempted to set global Laplacian inversion flag that is not "
                         "implemented in petsc_laplace.cxx");
   }
@@ -102,8 +102,8 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes
                           name);
     }
   };
-  unimplementedBoundaryFlag(inner_boundary_flags, "inner");
-  unimplementedBoundaryFlag(outer_boundary_flags, "outer");
+  unimplementedBoundaryFlag(getInnerBoundaryFlags(), "inner");
+  unimplementedBoundaryFlag(getOuterBoundaryFlags(), "outer");
   unimplementedBoundaryFlag(lower_boundary_flags, "lower");
   unimplementedBoundaryFlag(upper_boundary_flags, "upper");
 
@@ -119,7 +119,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes
   }
 
   // Set up boundary conditions in operator
-  const bool inner_X_neumann = flagSet(inner_boundary_flags, INVERT_AC_GRAD);
+  const bool inner_X_neumann = isInnerBoundaryFlagSet(INVERT_AC_GRAD);
   const auto inner_X_BC = inner_X_neumann ? -1. / coords->dx / sqrt(coords->g_11) : 0.5;
   const auto inner_X_BC_plus = inner_X_neumann ? -inner_X_BC : 0.5;
 
@@ -128,7 +128,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes
     operator3D(i, i.xp()) = inner_X_BC_plus[i];
   }
 
-  const bool outer_X_neumann = flagSet(outer_boundary_flags, INVERT_AC_GRAD);
+  const bool outer_X_neumann = isOuterBoundaryFlagSet(INVERT_AC_GRAD);
   const auto outer_X_BC = outer_X_neumann ? 1. / coords->dx / sqrt(coords->g_11) : 0.5;
   const auto outer_X_BC_minus = outer_X_neumann ? -outer_X_BC : 0.5;
 
@@ -191,8 +191,8 @@ Field3D LaplacePetsc3dAmg::solve(const Field3D& b_in, const Field3D& x0) {
 
   // Adjust vectors to represent boundary conditions and check that
   // boundary cells are finite
-  setBC(rhs, b_in, indexer->getRegionInnerX(), inner_boundary_flags, x0);
-  setBC(rhs, b_in, indexer->getRegionOuterX(), outer_boundary_flags, x0);
+  setBC(rhs, b_in, indexer->getRegionInnerX(), getInnerBoundaryFlags(), x0);
+  setBC(rhs, b_in, indexer->getRegionOuterX(), getOuterBoundaryFlags(), x0);
   setBC(rhs, b_in, indexer->getRegionLowerY(), lower_boundary_flags, x0);
   setBC(rhs, b_in, indexer->getRegionUpperY(), upper_boundary_flags, x0);
 
@@ -460,7 +460,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() {
     KSPSetTolerances(ksp, rtol, atol, dtol, maxits);
 
     // If the initial guess is not set to zero
-    if ((global_flags & INVERT_START_NEW) == 0) {
+    if (!isGlobalFlagSet(INVERT_START_NEW)) {
       KSPSetInitialGuessNonzero(ksp, (PetscBool) true);
     }
 
diff --git a/src/invert/laplace/impls/serial_band/serial_band.cxx b/src/invert/laplace/impls/serial_band/serial_band.cxx
index eda76498fc..4e7bb4c63f 100644
--- a/src/invert/laplace/impls/serial_band/serial_band.cxx
+++ b/src/invert/laplace/impls/serial_band/serial_band.cxx
@@ -99,7 +99,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
 
   int xbndry = localmesh->xstart; // Width of the x boundary
   // If the flags to assign that only one guard cell should be used is set
-  if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     xbndry = 1;
   }
 
@@ -107,8 +107,8 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
   for (int ix = 0; ix < localmesh->LocalNx; ix++) {
     // for fixed ix,jy set a complex vector rho(z)
 
-    if (((ix < xbndry) && (inner_boundary_flags & INVERT_SET))
-        || ((ncx - ix < xbndry) && (outer_boundary_flags & INVERT_SET))) {
+    if (((ix < xbndry) && isInnerBoundaryFlagSet(INVERT_SET))
+        || ((ncx - ix < xbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) {
       // Use the values in x0 in the boundary
       rfft(x0[ix], ncz, &bk(ix, 0));
     } else {
@@ -247,10 +247,10 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
     for (int ix = 0; ix < xbndry; ix++) {
       // Set zero-value. Change to zero-gradient if needed
 
-      if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) {
+      if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) {
         bk1d[ix] = 0.0;
       }
-      if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) {
+      if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) {
         bk1d[ncx - ix] = 0.0;
       }
 
@@ -265,8 +265,8 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
       // DC
 
       // Inner boundary
-      if (inner_boundary_flags & (INVERT_DC_GRAD + INVERT_SET)
-          || inner_boundary_flags & (INVERT_DC_GRAD + INVERT_RHS)) {
+      if (isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_SET)
+          || isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_RHS)) {
         // Zero gradient at inner boundary. 2nd-order accurate
         // Boundary at midpoint
         for (int ix = 0; ix < xbndry; ix++) {
@@ -277,7 +277,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
           A(ix, 4) = 0.;
         }
 
-      } else if (inner_boundary_flags & INVERT_DC_GRAD) {
+      } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) {
         // Zero gradient at inner boundary. 2nd-order accurate
         // Boundary at midpoint
         for (int ix = 0; ix < xbndry; ix++) {
@@ -288,7 +288,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
           A(ix, 4) = 0.;
         }
 
-      } else if (inner_boundary_flags & INVERT_DC_GRADPAR) {
+      } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) {
         for (int ix = 0; ix < xbndry; ix++) {
           A(ix, 0) = 0.;
           A(ix, 1) = 0.;
@@ -296,7 +296,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
           A(ix, 3) = 4. / sqrt(coords->g_22(ix + 1, jy));
           A(ix, 4) = -1. / sqrt(coords->g_22(ix + 2, jy));
         }
-      } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) {
+      } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) {
         for (int ix = 0; ix < xbndry; ix++) {
           A(ix, 0) = 0.;
           A(ix, 1) = 0.;
@@ -304,7 +304,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
           A(ix, 3) = 4. * sqrt(coords->g_22(ix + 1, jy));
           A(ix, 4) = -sqrt(coords->g_22(ix + 2, jy));
         }
-      } else if (inner_boundary_flags & INVERT_DC_LAP) {
+      } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) {
         for (int ix = 0; ix < xbndry; ix++) {
           A(ix, 0) = 0.;
           A(ix, 1) = 0.;
@@ -315,7 +315,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
       }
 
       // Outer boundary
-      if (outer_boundary_flags & INVERT_DC_GRAD) {
+      if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) {
         // Zero gradient at outer boundary
         for (int ix = 0; ix < xbndry; ix++) {
           A(ncx - ix, 1) = -1.0;
@@ -326,12 +326,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
       // AC
 
       // Inner boundarySQ(kwave)*coef2
-      if (inner_boundary_flags & INVERT_AC_GRAD) {
+      if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
         // Zero gradient at inner boundary
         for (int ix = 0; ix < xbndry; ix++) {
           A(ix, 3) = -1.0;
         }
-      } else if (inner_boundary_flags & INVERT_AC_LAP) {
+      } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) {
         // Enforce zero laplacian for 2nd and 4th-order
 
         int ix = 1;
@@ -369,12 +369,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
       }
 
       // Outer boundary
-      if (outer_boundary_flags & INVERT_AC_GRAD) {
+      if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
         // Zero gradient at outer boundary
         for (int ix = 0; ix < xbndry; ix++) {
           A(ncx - ix, 1) = -1.0;
         }
-      } else if (outer_boundary_flags & INVERT_AC_LAP) {
+      } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) {
         // Enforce zero laplacian for 2nd and 4th-order
         // NOTE: Currently ignoring XZ term and coef4 assumed zero on boundary
         // FIX THIS IF IT WORKS
@@ -417,7 +417,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
     // Perform inversion
     cband_solve(A, localmesh->LocalNx, 2, 2, bk1d);
 
-    if ((global_flags & INVERT_KX_ZERO) && (iz == 0)) {
+    if (isGlobalFlagSet(INVERT_KX_ZERO) && (iz == 0)) {
       // Set the Kx = 0, n = 0 component to zero. For now just subtract
       // Should do in the inversion e.g. Sherman-Morrison formula
 
@@ -440,7 +440,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) {
   // Done inversion, transform back
 
   for (int ix = 0; ix <= ncx; ix++) {
-    if (global_flags & INVERT_ZERO_DC) {
+    if (isGlobalFlagSet(INVERT_ZERO_DC)) {
       xk(ix, 0) = 0.0;
     }
 
diff --git a/src/invert/laplace/impls/serial_tri/serial_tri.cxx b/src/invert/laplace/impls/serial_tri/serial_tri.cxx
index 909a47f856..f46a0a46e5 100644
--- a/src/invert/laplace/impls/serial_tri/serial_tri.cxx
+++ b/src/invert/laplace/impls/serial_tri/serial_tri.cxx
@@ -91,13 +91,13 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) {
   int inbndry = localmesh->xstart, outbndry = localmesh->xstart;
 
   // If the flags to assign that only one guard cell should be used is set
-  if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if (inner_boundary_flags & INVERT_BNDRY_ONE) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if (outer_boundary_flags & INVERT_BNDRY_ONE) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -140,8 +140,8 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) {
      * If the INVERT_SET flag is set (meaning that x0 will be used to set the
      * bounadry values),
      */
-    if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET))
-        || ((ncx - 1 - ix < outbndry) && (outer_boundary_flags & INVERT_SET))) {
+    if (((ix < inbndry) && isInnerBoundaryFlagSet(INVERT_SET))
+        || ((ncx - 1 - ix < outbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) {
       // Use the values in x0 in the boundary
 
       // x0 is the input
@@ -185,8 +185,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) {
                  kz,
                  // wave number (different from kz only if we are taking a part
                  // of the z-domain [and not from 0 to 2*pi])
-                 kz * kwaveFactor, global_flags, inner_boundary_flags,
-                 outer_boundary_flags, &A, &C, &D);
+                 kz * kwaveFactor, &A, &C, &D);
 
     ///////// PERFORM INVERSION /////////
     if (!localmesh->periodicX) {
@@ -208,7 +207,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) {
     }
 
     // If the global flag is set to INVERT_KX_ZERO
-    if ((global_flags & INVERT_KX_ZERO) && (kz == 0)) {
+    if (isGlobalFlagSet(INVERT_KX_ZERO) && (kz == 0)) {
       dcomplex offset(0.0);
       for (int ix = localmesh->xstart; ix <= localmesh->xend; ix++) {
         offset += xk1d[ix];
@@ -228,7 +227,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) {
   // Done inversion, transform back
   for (int ix = 0; ix < ncx; ix++) {
 
-    if (global_flags & INVERT_ZERO_DC) {
+    if (isGlobalFlagSet(INVERT_ZERO_DC)) {
       xk(ix, 0) = 0.0;
     }
 
diff --git a/src/invert/laplace/impls/spt/spt.cxx b/src/invert/laplace/impls/spt/spt.cxx
index 56ac496271..2e4c844c94 100644
--- a/src/invert/laplace/impls/spt/spt.cxx
+++ b/src/invert/laplace/impls/spt/spt.cxx
@@ -65,10 +65,9 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
     ye = localmesh->LocalNy - 1; // Contains upper boundary
   }
 
-  alldata = new SPT_data[ye - ys + 1];
-  alldata -= ys; // Re-number indices to start at ys
+  alldata.reallocate(ye - ys + 1);
   for (int jy = ys; jy <= ye; jy++) {
-    alldata[jy].comm_tag = SPT_DATA + jy; // Give each one a different tag
+    alldata[jy - ys].comm_tag = SPT_DATA + jy; // Give each one a different tag
   }
 
   // Temporary array for taking FFTs
@@ -76,11 +75,6 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   dc1d.reallocate(ncz / 2 + 1);
 }
 
-LaplaceSPT::~LaplaceSPT() {
-  alldata += ys; // Return to index from 0
-  delete[] alldata;
-}
-
 FieldPerp LaplaceSPT::solve(const FieldPerp& b) { return solve(b, b); }
 
 FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) {
@@ -90,15 +84,15 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) {
 
   FieldPerp x{emptyFrom(b)};
 
-  if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) {
+  if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) {
     FieldPerp bs = copy(b);
 
     int xbndry = localmesh->xstart;
     // If the flags to assign that only one guard cell should be used is set
-    if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
+    if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
       xbndry = 1;
     }
-    if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) {
+    if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) {
       // Copy x0 inner boundary into bs
       for (int ix = 0; ix < xbndry; ix++) {
         for (int iz = 0; iz < localmesh->LocalNz; iz++) {
@@ -106,7 +100,7 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) {
         }
       }
     }
-    if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) {
+    if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) {
       // Copy x0 outer boundary into bs
       for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) {
         for (int iz = 0; iz < localmesh->LocalNz; iz++) {
@@ -141,29 +135,29 @@ Field3D LaplaceSPT::solve(const Field3D& b) {
 
   for (int jy = ys; jy <= ye; jy++) {
     // And start another one going
-    start(sliceXZ(b, jy), alldata[jy]);
+    start(sliceXZ(b, jy), alldata[jy - ys]);
 
     // Move each calculation along one processor
     for (int jy2 = ys; jy2 < jy; jy2++) {
-      next(alldata[jy2]);
+      next(alldata[jy2 - ys]);
     }
   }
 
   bool running = true;
-  do {
+  while (running) {
     // Move each calculation along until the last one is finished
-    for (int jy = ys; jy <= ye; jy++) {
-      running = next(alldata[jy]) == 0;
+    for (auto& data : alldata) {
+      running = next(data) == 0;
     }
-  } while (running);
+  }
 
   FieldPerp xperp(localmesh);
   xperp.setLocation(location);
   xperp.allocate();
 
   // All calculations finished. Get result
-  for (int jy = ys; jy <= ye; jy++) {
-    finish(alldata[jy], xperp);
+  for (auto& data : alldata) {
+    finish(data, xperp);
     x = xperp;
   }
 
@@ -173,17 +167,17 @@ Field3D LaplaceSPT::solve(const Field3D& b) {
 Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) {
   ASSERT1(localmesh == b.getMesh() && localmesh == x0.getMesh());
 
-  if (((inner_boundary_flags & INVERT_SET) && localmesh->firstX())
-      || ((outer_boundary_flags & INVERT_SET) && localmesh->lastX())) {
+  if ((isInnerBoundaryFlagSetOnFirstX(INVERT_SET))
+      || isOuterBoundaryFlagSetOnLastX(INVERT_SET)) {
     Field3D bs = copy(b);
 
     int xbndry = localmesh->xstart;
     // If the flags to assign that only one guard cell should be used is set
-    if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
+    if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
       xbndry = 1;
     }
 
-    if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) {
+    if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) {
       // Copy x0 inner boundary into bs
       for (int ix = 0; ix < xbndry; ix++) {
         for (int iy = 0; iy < localmesh->LocalNy; iy++) {
@@ -193,7 +187,7 @@ Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) {
         }
       }
     }
-    if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) {
+    if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) {
       // Copy x0 outer boundary into bs
       for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) {
         for (int iy = 0; iy < localmesh->LocalNy; iy++) {
@@ -323,8 +317,7 @@ int LaplaceSPT::start(const FieldPerp& b, SPT_data& data) {
   /// Set matrix elements
   for (int kz = 0; kz <= maxmode; kz++) {
     tridagMatrix(&data.avec(kz, 0), &data.bvec(kz, 0), &data.cvec(kz, 0), &data.bk(kz, 0),
-                 data.jy, kz, kz * kwaveFactor, global_flags, inner_boundary_flags,
-                 outer_boundary_flags, &Acoef, &Ccoef, &Dcoef);
+                 data.jy, kz, kz * kwaveFactor, &Acoef, &Ccoef, &Dcoef);
   }
 
   data.proc = 0; //< Starts at processor 0
@@ -516,7 +509,7 @@ void LaplaceSPT::finish(SPT_data& data, FieldPerp& x) {
       dc1d[kz] = 0.0;
     }
 
-    if (global_flags & INVERT_ZERO_DC) {
+    if (isGlobalFlagSet(INVERT_ZERO_DC)) {
       dc1d[0] = 0.0;
     }
 
diff --git a/src/invert/laplace/impls/spt/spt.hxx b/src/invert/laplace/impls/spt/spt.hxx
index c6aa8fd404..a9d5b2583f 100644
--- a/src/invert/laplace/impls/spt/spt.hxx
+++ b/src/invert/laplace/impls/spt/spt.hxx
@@ -69,7 +69,6 @@ class LaplaceSPT : public Laplacian {
 public:
   LaplaceSPT(Options* opt = nullptr, const CELL_LOC = CELL_CENTRE,
              Mesh* mesh_in = nullptr, Solver* solver = nullptr);
-  ~LaplaceSPT();
 
   using Laplacian::setCoefA;
   void setCoefA(const Field2D& val) override {
@@ -106,17 +105,15 @@ public:
   Field3D solve(const Field3D& b, const Field3D& x0) override;
 
 private:
-  enum { SPT_DATA = 1123 }; ///< 'magic' number for SPT MPI messages
+  constexpr static int SPT_DATA = 1123; ///< 'magic' number for SPT MPI messages
 
   Field2D Acoef, Ccoef, Dcoef;
 
   /// Data structure for SPT algorithm
   struct SPT_data {
-    SPT_data() : comm_tag(SPT_DATA) {}
     void allocate(int mm, int nx); // Allocates memory
-    ~SPT_data(){};                 // Free memory
 
-    int jy; ///< Y index
+    int jy = 0; ///< Y index
 
     Matrix<dcomplex> bk; ///< b vector in Fourier space
     Matrix<dcomplex> xk;
@@ -125,19 +122,19 @@ private:
 
     Matrix<dcomplex> avec, bvec, cvec; ///< Diagonal bands of matrix
 
-    int proc; // Which processor has this reached?
-    int dir;  // Which direction is it going?
+    int proc = 0; // Which processor has this reached?
+    int dir = 1;  // Which direction is it going?
 
-    comm_handle recv_handle; // Handle for receives
+    comm_handle recv_handle = nullptr; // Handle for receives
 
-    int comm_tag; // Tag for communication
+    int comm_tag = SPT_DATA; // Tag for communication
 
     Array<BoutReal> buffer;
   };
 
   int ys, ye;         // Range of Y indices
   SPT_data slicedata; // Used to solve for a single FieldPerp
-  SPT_data* alldata;  // Used to solve a Field3D
+  Array<SPT_data> alldata; // Used to solve a Field3D
 
   Array<dcomplex> dc1d; ///< 1D in Z for taking FFTs
 
diff --git a/src/invert/laplace/invert_laplace.cxx b/src/invert/laplace/invert_laplace.cxx
index 505b04cc4f..4032499781 100644
--- a/src/invert/laplace/invert_laplace.cxx
+++ b/src/invert/laplace/invert_laplace.cxx
@@ -424,20 +424,16 @@ void Laplacian::tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomple
 #if BOUT_USE_METRIC_3D
 void Laplacian::tridagMatrix(dcomplex* /*avec*/, dcomplex* /*bvec*/, dcomplex* /*cvec*/,
                              dcomplex* /*bk*/, int /*jy*/, int /*kz*/, BoutReal /*kwave*/,
-                             int /*global_flags*/, int /*inner_boundary_flags*/,
-                             int /*outer_boundary_flags*/, const Field2D* /*a*/,
-                             const Field2D* /*c1coef*/, const Field2D* /*c2coef*/,
-                             const Field2D* /*d*/, bool /*includeguards*/,
-                             bool /*zperiodic*/) {
+                             const Field2D* /*a*/, const Field2D* /*c1coef*/,
+                             const Field2D* /*c2coef*/, const Field2D* /*d*/,
+                             bool /*includeguards*/, bool /*zperiodic*/) {
   throw BoutException("Error: tridagMatrix does not yet work with 3D metric.");
 }
 #else
 void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk,
-                             int jy, int kz, BoutReal kwave, int global_flags,
-                             int inner_boundary_flags, int outer_boundary_flags,
-                             const Field2D* a, const Field2D* c1coef,
-                             const Field2D* c2coef, const Field2D* d, bool includeguards,
-                             bool zperiodic) {
+                             int jy, int kz, BoutReal kwave, const Field2D* a,
+                             const Field2D* c1coef, const Field2D* c2coef,
+                             const Field2D* d, bool includeguards, bool zperiodic) {
   ASSERT1(a->getLocation() == location);
   ASSERT1(c1coef->getLocation() == location);
   ASSERT1(c2coef->getLocation() == location);
@@ -469,13 +465,13 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
   int inbndry = localmesh->xstart, outbndry = localmesh->xstart;
 
   // If the flags to assign that only one guard cell should be used is set
-  if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
+  if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) {
     inbndry = outbndry = 1;
   }
-  if (inner_boundary_flags & INVERT_BNDRY_ONE) {
+  if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     inbndry = 1;
   }
-  if (outer_boundary_flags & INVERT_BNDRY_ONE) {
+  if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) {
     outbndry = 1;
   }
 
@@ -497,7 +493,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
 
       // If no user specified value is set on inner boundary, set the first
       // element in b (in the equation AX=b) to 0
-      if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) {
+      if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) {
         for (int ix = 0; ix < inbndry; ix++) {
           bk[ix] = 0.;
         }
@@ -506,34 +502,35 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
       // DC i.e. kz = 0 (the offset mode)
       if (kz == 0) {
 
-        if (inner_boundary_flags & INVERT_DC_GRAD
-            && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) {
+        if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)
+            && (isInnerBoundaryFlagSet(INVERT_SET)
+                || isInnerBoundaryFlagSet(INVERT_RHS))) {
           // Zero gradient at inner boundary
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.;
             bvec[ix] = -1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy);
             cvec[ix] = 1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy);
           }
-        } else if (inner_boundary_flags & INVERT_DC_GRAD) {
+        } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) {
           // Zero gradient at inner boundary
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.;
             bvec[ix] = -1.;
             cvec[ix] = 1.;
           }
-        } else if (inner_boundary_flags & INVERT_DC_GRADPAR) {
+        } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) {
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.0;
             bvec[ix] = 1.0 / sqrt(coords->g_22(ix, jy));
             cvec[ix] = -1.0 / sqrt(coords->g_22(ix + 1, jy));
           }
-        } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) {
+        } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) {
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.0;
             bvec[ix] = sqrt(coords->g_22(ix, jy));
             cvec[ix] = -sqrt(coords->g_22(ix + 1, jy));
           }
-        } else if (inner_boundary_flags & INVERT_DC_LAP) {
+        } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) {
           // Decaying boundary conditions
           BoutReal k = 0.0;
           if (a != nullptr) {
@@ -548,7 +545,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
             bvec[ix] = 1.;
             cvec[ix] = -exp(-k * coords->dx(ix, jy) / sqrt(coords->g11(ix, jy)));
           }
-        } else if (inner_boundary_flags & INVERT_IN_CYLINDER) {
+        } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) {
           // Condition for inner radial boundary for cylindrical coordinates
           /* Explanation:
            * The discrete fourier transform is defined as
@@ -602,8 +599,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
       // AC i.e. kz =/= 0 (all other modes than the offset mode)
       else {
 
-        if (inner_boundary_flags & INVERT_AC_GRAD
-            && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) {
+        if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)
+            && (isInnerBoundaryFlagSet(INVERT_SET)
+                || isInnerBoundaryFlagSet(INVERT_RHS))) {
           // Zero gradient at inner boundary
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = dcomplex(0., 0.);
@@ -611,14 +609,14 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
                 dcomplex(-1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy);
             cvec[ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy);
           }
-        } else if (inner_boundary_flags & INVERT_AC_GRAD) {
+        } else if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) {
           // Zero gradient at inner boundary
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = dcomplex(0., 0.);
             bvec[ix] = dcomplex(-1., 0.);
             cvec[ix] = dcomplex(1., 0.);
           }
-        } else if (inner_boundary_flags & INVERT_AC_LAP) {
+        } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) {
           // Use decaying zero-Laplacian solution in the boundary
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.0;
@@ -626,9 +624,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
             cvec[ix] = -exp(-1.0 * sqrt(coords->g33(ix, jy) / coords->g11(ix, jy)) * kwave
                             * coords->dx(ix, jy));
           }
-        } else if (inner_boundary_flags & INVERT_IN_CYLINDER) {
+        } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) {
           // Condition for inner radial boundary for cylindrical coordinates
-          // Explanation under "if (inner_boundary_flags & INVERT_IN_CYLINDER)"
+          // Explanation under "if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER))"
           for (int ix = 0; ix < inbndry; ix++) {
             avec[ix] = 0.;
             bvec[ix] = 1.;
@@ -655,7 +653,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
 
       // If no user specified value is set on outer boundary, set the last
       // element in b (in the equation AX=b) to 0
-      if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) {
+      if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) {
         for (int ix = 0; ix < outbndry; ix++) {
           bk[ncx - ix] = 0.;
         }
@@ -664,36 +662,37 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
       // DC i.e. kz = 0 (the offset mode)
       if (kz == 0) {
 
-        if (outer_boundary_flags & INVERT_DC_GRAD
-            && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) {
+        if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)
+            && (isOuterBoundaryFlagSet(INVERT_SET)
+                || isOuterBoundaryFlagSet(INVERT_RHS))) {
           // Zero gradient at outer boundary
           for (int ix = 0; ix < outbndry; ix++) {
-            avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy))
-                             / coords->dx(ncx - ix, jy);
-            bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy))
-                             / coords->dx(ncx - ix, jy);
+            avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy))
+                             / coords->dx(xe - ix, jy);
+            bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy))
+                             / coords->dx(xe - ix, jy);
             cvec[ncx - ix] = dcomplex(0., 0.);
           }
-        } else if (outer_boundary_flags & INVERT_DC_GRAD) {
+        } else if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) {
           // Zero gradient at outer boundary
           for (int ix = 0; ix < outbndry; ix++) {
             avec[ncx - ix] = dcomplex(1., 0.);
             bvec[ncx - ix] = dcomplex(-1., 0.);
             cvec[ncx - ix] = dcomplex(0., 0.);
           }
-        } else if (inner_boundary_flags & INVERT_DC_GRADPAR) {
+        } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPAR)) {
           for (int ix = 0; ix < inbndry; ix++) {
-            avec[ncx - ix] = 1.0 / sqrt(coords->g_22(ncx - ix + 1, jy));
-            bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(ncx - ix, jy));
+            avec[ncx - ix] = 1.0 / sqrt(coords->g_22(xe - ix - 1, jy));
+            bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(xe - ix, jy));
             cvec[ncx - ix] = 0.0;
           }
-        } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) {
+        } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPARINV)) {
           for (int ix = 0; ix < inbndry; ix++) {
-            avec[ncx - ix] = sqrt(coords->g_22(ncx - ix - 1, jy));
-            bvec[ncx - ix] = -sqrt(coords->g_22(ncx - ix, jy));
+            avec[ncx - ix] = sqrt(coords->g_22(xe - ix - 1, jy));
+            bvec[ncx - ix] = -sqrt(coords->g_22(xe - ix, jy));
             cvec[ncx - ix] = 0.0;
           }
-        } else if (inner_boundary_flags & INVERT_DC_LAP) {
+        } else if (isOuterBoundaryFlagSet(INVERT_DC_LAP)) {
           // Decaying boundary conditions
           BoutReal k = 0.0;
           if (a != nullptr) {
@@ -707,7 +706,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
             cvec[ncx - ix] = 0.;
             bvec[ncx - ix] = 1.;
             avec[ncx - ix] =
-                -exp(-k * coords->dx(ncx - ix, jy) / sqrt(coords->g11(ncx - ix, jy)));
+                -exp(-k * coords->dx(xe - ix, jy) / sqrt(coords->g11(xe - ix, jy)));
           }
         } else {
           // Order 2 dirichlet BC (boundary half between points)
@@ -722,24 +721,25 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco
       // AC i.e. kz =/= 0 (all other modes than the offset mode)
       else {
 
-        if (outer_boundary_flags & INVERT_AC_GRAD
-            && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) {
+        if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)
+            && (isOuterBoundaryFlagSet(INVERT_SET)
+                || isOuterBoundaryFlagSet(INVERT_RHS))) {
           // Zero gradient at outer boundary
           for (int ix = 0; ix < outbndry; ix++) {
-            avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy))
-                             / coords->dx(ncx - ix, jy);
-            bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy))
-                             / coords->dx(ncx - ix, jy);
+            avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy))
+                             / coords->dx(xe - ix, jy);
+            bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy))
+                             / coords->dx(xe - ix, jy);
             cvec[ncx - ix] = dcomplex(0., 0.);
           }
-        } else if (outer_boundary_flags & INVERT_AC_GRAD) {
+        } else if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) {
           // Zero gradient at outer boundary
           for (int ix = 0; ix < outbndry; ix++) {
             avec[ncx - ix] = dcomplex(1., 0.);
             bvec[ncx - ix] = dcomplex(-1., 0.);
             cvec[ncx - ix] = dcomplex(0., 0.);
           }
-        } else if (outer_boundary_flags & INVERT_AC_LAP) {
+        } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) {
           // Use decaying zero-Laplacian solution in the boundary
           for (int ix = 0; ix < outbndry; ix++) {
             avec[ncx - ix] =
@@ -795,6 +795,13 @@ void Laplacian::LaplacianMonitor::outputVars(Options& output_options,
   laplacian->outputVars(output_options, time_dimension);
 }
 
+bool Laplacian::isInnerBoundaryFlagSetOnFirstX(int flag) const {
+  return isInnerBoundaryFlagSet(flag) and localmesh->firstX();
+}
+bool Laplacian::isOuterBoundaryFlagSetOnLastX(int flag) const {
+  return isOuterBoundaryFlagSet(flag) and localmesh->lastX();
+}
+
 /**********************************************************************************
  *                              LEGACY INTERFACE
  *
diff --git a/src/mesh/boundary_factory.cxx b/src/mesh/boundary_factory.cxx
index 5f5978f132..00282566a9 100644
--- a/src/mesh/boundary_factory.cxx
+++ b/src/mesh/boundary_factory.cxx
@@ -1,3 +1,5 @@
+#include "bout/parallel_boundary_op.hxx"
+#include "bout/parallel_boundary_region.hxx"
 #include <bout/boundary_factory.hxx>
 #include <bout/boundary_standard.hxx>
 #include <bout/globals.hxx>
@@ -41,10 +43,12 @@ BoundaryFactory::BoundaryFactory() {
   addMod(new BoundaryFromFieldAligned(), "fromFieldAligned");
 
   // Parallel boundaries
-  add(new BoundaryOpPar_dirichlet(), "parallel_dirichlet");
-  add(new BoundaryOpPar_dirichlet_O3(), "parallel_dirichlet_O3");
-  add(new BoundaryOpPar_dirichlet_interp(), "parallel_dirichlet_interp");
-  add(new BoundaryOpPar_neumann(), "parallel_neumann");
+  add(new BoundaryOpPar_dirichlet_o1(), "parallel_dirichlet_o1");
+  add(new BoundaryOpPar_dirichlet_o2(), "parallel_dirichlet_o2");
+  add(new BoundaryOpPar_dirichlet_o3(), "parallel_dirichlet_o3");
+  add(new BoundaryOpPar_neumann_o1(), "parallel_neumann_o1");
+  add(new BoundaryOpPar_neumann_o2(), "parallel_neumann_o2");
+  add(new BoundaryOpPar_neumann_o3(), "parallel_neumann_o3");
 }
 
 BoundaryFactory::~BoundaryFactory() {
diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx
index 01f0fe46ca..4e515449ca 100644
--- a/src/mesh/coordinates.cxx
+++ b/src/mesh/coordinates.cxx
@@ -1502,7 +1502,7 @@ Field3D Coordinates::DDY(const Field3D& f, CELL_LOC outloc, const std::string& m
   if (!f.hasParallelSlices() and !transform->canToFromFieldAligned()) {
     Field3D f_parallel = f;
     transform->calcParallelSlices(f_parallel);
-    f_parallel.applyParallelBoundary("parallel_neumann");
+    f_parallel.applyParallelBoundary("parallel_neumann_o2");
     return bout::derivatives::index::DDY(f_parallel, outloc, method, region);
   }
 #endif
@@ -1908,7 +1908,7 @@ Coordinates::Grad2_par2_DDY_invSg(CELL_LOC outloc, const std::string& method) co
 
   // Communicate to get parallel slices
   localmesh->communicate(*invSgCache);
-  invSgCache->applyParallelBoundary("parallel_neumann");
+  invSgCache->applyParallelBoundary("parallel_neumann_o2");
 
   // cache
   auto ptr = std::make_unique<FieldMetric>();
diff --git a/src/mesh/fv_ops.cxx b/src/mesh/fv_ops.cxx
index 0a5d5f9624..cd5b924e9e 100644
--- a/src/mesh/fv_ops.cxx
+++ b/src/mesh/fv_ops.cxx
@@ -22,7 +22,7 @@ Slices<T> makeslices(bool use_slices, const T& field) {
 
 namespace FV {
 
-// Div ( a Grad_perp(f) ) -- ∇⊥ ( a ⋅ ∇⊥ f) --  Vorticity
+// Div ( a Grad_perp(f) ) -- ∇ ⋅ ( a ∇⊥ f) --  Vorticity
 Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) {
   ASSERT2(a.getLocation() == f.getLocation());
 
diff --git a/src/mesh/impls/bout/boutmesh.cxx b/src/mesh/impls/bout/boutmesh.cxx
index 956aba0f79..16061cd47e 100644
--- a/src/mesh/impls/bout/boutmesh.cxx
+++ b/src/mesh/impls/bout/boutmesh.cxx
@@ -35,6 +35,7 @@
 
 #include "boutmesh.hxx"
 
+#include <bout/boundary_region.hxx>
 #include <bout/boutcomm.hxx>
 #include <bout/boutexception.hxx>
 #include <bout/constants.hxx>
@@ -44,6 +45,7 @@
 #include <bout/msg_stack.hxx>
 #include <bout/options.hxx>
 #include <bout/output.hxx>
+#include <bout/parallel_boundary_region.hxx>
 #include <bout/sys/timer.hxx>
 #include <bout/utils.hxx>
 
@@ -80,9 +82,6 @@ BoutMesh::~BoutMesh() {
   for (const auto& bndry : boundary) {
     delete bndry;
   }
-  for (const auto& bndry : par_boundary) {
-    delete bndry;
-  }
 
   if (comm_x != MPI_COMM_NULL) {
     MPI_Comm_free(&comm_x);
@@ -3011,11 +3010,36 @@ RangeIterator BoutMesh::iterateBndryUpperY() const {
 
 std::vector<BoundaryRegion*> BoutMesh::getBoundaries() { return boundary; }
 
-std::vector<BoundaryRegionPar*> BoutMesh::getBoundariesPar() { return par_boundary; }
+std::vector<std::shared_ptr<BoundaryRegionPar>>
+BoutMesh::getBoundariesPar(BoundaryParType type) {
+  return par_boundary[static_cast<int>(type)];
+}
 
-void BoutMesh::addBoundaryPar(BoundaryRegionPar* bndry) {
+void BoutMesh::addBoundaryPar(std::shared_ptr<BoundaryRegionPar> bndry,
+                              BoundaryParType type) {
   output_info << "Adding new parallel boundary: " << bndry->label << endl;
-  par_boundary.push_back(bndry);
+  switch (type) {
+  case BoundaryParType::xin_fwd:
+    par_boundary[static_cast<int>(BoundaryParType::xin)].push_back(bndry);
+    par_boundary[static_cast<int>(BoundaryParType::fwd)].push_back(bndry);
+    break;
+  case BoundaryParType::xin_bwd:
+    par_boundary[static_cast<int>(BoundaryParType::xin)].push_back(bndry);
+    par_boundary[static_cast<int>(BoundaryParType::bwd)].push_back(bndry);
+    break;
+  case BoundaryParType::xout_fwd:
+    par_boundary[static_cast<int>(BoundaryParType::xout)].push_back(bndry);
+    par_boundary[static_cast<int>(BoundaryParType::fwd)].push_back(bndry);
+    break;
+  case BoundaryParType::xout_bwd:
+    par_boundary[static_cast<int>(BoundaryParType::xout)].push_back(bndry);
+    par_boundary[static_cast<int>(BoundaryParType::bwd)].push_back(bndry);
+    break;
+  default:
+    throw BoutException("Unexpected type of boundary {}", toString(type));
+  }
+  par_boundary[static_cast<int>(type)].push_back(bndry);
+  par_boundary[static_cast<int>(BoundaryParType::all)].push_back(bndry);
 }
 
 Field3D BoutMesh::smoothSeparatrix(const Field3D& f) {
diff --git a/src/mesh/impls/bout/boutmesh.hxx b/src/mesh/impls/bout/boutmesh.hxx
index 59c6ecbfbd..cc674d401a 100644
--- a/src/mesh/impls/bout/boutmesh.hxx
+++ b/src/mesh/impls/bout/boutmesh.hxx
@@ -158,8 +158,10 @@ public:
 
   // Boundary regions
   std::vector<BoundaryRegion*> getBoundaries() override;
-  std::vector<BoundaryRegionPar*> getBoundariesPar() override;
-  void addBoundaryPar(BoundaryRegionPar* bndry) override;
+  std::vector<std::shared_ptr<BoundaryRegionPar>>
+  getBoundariesPar(BoundaryParType type) override;
+  void addBoundaryPar(std::shared_ptr<BoundaryRegionPar> bndry,
+                      BoundaryParType type) override;
   std::set<std::string> getPossibleBoundaries() const override;
 
   Field3D smoothSeparatrix(const Field3D& f) override;
@@ -393,8 +395,10 @@ protected:
   void addBoundaryRegions();
 
 private:
-  std::vector<BoundaryRegion*> boundary;        // Vector of boundary regions
-  std::vector<BoundaryRegionPar*> par_boundary; // Vector of parallel boundary regions
+  std::vector<BoundaryRegion*> boundary; // Vector of boundary regions
+  std::array<std::vector<std::shared_ptr<BoundaryRegionPar>>,
+             static_cast<int>(BoundaryParType::SIZE)>
+      par_boundary; // Vector of parallel boundary regions
 
   //////////////////////////////////////////////////
   // Communications
diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx
index 23b2b91eab..cb8c19bbd7 100644
--- a/src/mesh/parallel/fci.cxx
+++ b/src/mesh/parallel/fci.cxx
@@ -47,9 +47,9 @@
 
 #include <string>
 
-FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options,
-               int offset_, BoundaryRegionPar* inner_boundary,
-               BoundaryRegionPar* outer_boundary, bool zperiodic)
+FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& options,
+               int offset_, const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
+               const std::shared_ptr<BoundaryRegionPar>& outer_boundary, bool zperiodic)
     : map_mesh(mesh), offset(offset_),
       region_no_boundary(map_mesh.getRegion("RGN_NOBNDRY")),
       corner_boundary_mask(map_mesh) {
@@ -222,13 +222,16 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options,
     const BoutReal dx = (dZ_dz * dR - dR_dz * dZ) / det;
     const BoutReal dz = (dR_dx * dZ - dZ_dx * dR) / det;
 
-    // Negative xt_prime means we've hit the inner boundary, otherwise
-    // the outer boundary
-    auto* boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary;
+    // Negative xt_prime means we've hit the inner boundary, otherwise the
+    // outer boundary. However, if any of the surrounding points are negative,
+    // that also means inner. So to differentiate between inner and outer we
+    // need at least 2 points in the domain.
+    ASSERT2(map_mesh.xend - map_mesh.xstart >= 2);
+    auto boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary;
     boundary->add_point(x, y, z, x + dx, y + 0.5 * offset,
-                        z + dz,      // Intersection point in local index space
-                        0.5 * dy[i], // Distance to intersection
-                        PI           // Right-angle intersection
+                        z + dz, // Intersection point in local index space
+                        0.5,    // Distance to intersection
+                        1       // Default to that there is a point in the other direction
     );
   }
   region_no_boundary = region_no_boundary.mask(to_remove);
diff --git a/src/mesh/parallel/fci.hxx b/src/mesh/parallel/fci.hxx
index dd647d939d..3ec3321a6a 100644
--- a/src/mesh/parallel/fci.hxx
+++ b/src/mesh/parallel/fci.hxx
@@ -44,8 +44,8 @@ class FCIMap {
 public:
   FCIMap() = delete;
   FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, int offset,
-         BoundaryRegionPar* inner_boundary, BoundaryRegionPar* outer_boundary,
-         bool zperiodic);
+         const std::shared_ptr<BoundaryRegionPar>& inner_boundary,
+         const std::shared_ptr<BoundaryRegionPar>& outer_boundary, bool zperiodic);
 
   // The mesh this map was created on
   Mesh& map_mesh;
@@ -79,19 +79,19 @@ public:
     FCITransform::checkInputGrid();
 
     auto forward_boundary_xin =
-        new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh);
-    auto backward_boundary_xin =
-        new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh);
+        std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh);
+    auto backward_boundary_xin = std::make_shared<BoundaryRegionPar>(
+        "FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh);
     auto forward_boundary_xout =
-        new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh);
-    auto backward_boundary_xout =
-        new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
+        std::make_shared<BoundaryRegionPar>("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh);
+    auto backward_boundary_xout = std::make_shared<BoundaryRegionPar>(
+        "FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
 
     // Add the boundary region to the mesh's vector of parallel boundaries
-    mesh.addBoundaryPar(forward_boundary_xin);
-    mesh.addBoundaryPar(backward_boundary_xin);
-    mesh.addBoundaryPar(forward_boundary_xout);
-    mesh.addBoundaryPar(backward_boundary_xout);
+    mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd);
+    mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd);
+    mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd);
+    mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xout_bwd);
 
     field_line_maps.reserve(mesh.ystart * 2);
     for (int offset = 1; offset < mesh.ystart + 1; ++offset) {
@@ -100,6 +100,22 @@ public:
       field_line_maps.emplace_back(mesh, dy, options, -offset, backward_boundary_xin,
                                    backward_boundary_xout, zperiodic);
     }
+    ASSERT0(mesh.ystart == 1);
+    std::shared_ptr<BoundaryRegionPar> bndries[]{
+        forward_boundary_xin, forward_boundary_xout, backward_boundary_xin,
+        backward_boundary_xout};
+    for (auto& bndry : bndries) {
+      for (const auto& bndry2 : bndries) {
+        if (bndry->dir == bndry2->dir) {
+          continue;
+        }
+        for (bndry->first(); !bndry->isDone(); bndry->next()) {
+          if (bndry2->contains(*bndry)) {
+            bndry->setValid(0);
+          }
+        }
+      }
+    }
   }
 
   void calcParallelSlices(Field3D& f) override;
diff --git a/src/mesh/parallel/shiftedmetric.cxx b/src/mesh/parallel/shiftedmetric.cxx
index 84084d9cbb..382052047d 100644
--- a/src/mesh/parallel/shiftedmetric.cxx
+++ b/src/mesh/parallel/shiftedmetric.cxx
@@ -6,7 +6,9 @@
  *
  */
 
+#include "bout/parallel_boundary_region.hxx"
 #include "bout/paralleltransform.hxx"
+#include <bout/boundary_region.hxx>
 #include <bout/constants.hxx>
 #include <bout/fft.hxx>
 #include <bout/mesh.hxx>
diff --git a/src/mesh/parallel/shiftedmetricinterp.cxx b/src/mesh/parallel/shiftedmetricinterp.cxx
index 214f7ded76..7f3637e79c 100644
--- a/src/mesh/parallel/shiftedmetricinterp.cxx
+++ b/src/mesh/parallel/shiftedmetricinterp.cxx
@@ -29,7 +29,7 @@
 
 #include "shiftedmetricinterp.hxx"
 #include "bout/constants.hxx"
-#include "bout/mask.hxx"
+#include "bout/parallel_boundary_region.hxx"
 
 ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
                                          Field2D zShift_in, BoutReal zlength_in,
@@ -114,11 +114,16 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
 
   interp_from_aligned->calcWeights(zt_prime_from);
 
+  int yvalid = mesh.LocalNy - 2 * mesh.ystart;
+  // avoid overflow - no stencil need more than 5 points
+  if (yvalid > 20) {
+    yvalid = 20;
+  }
   // Create regions for parallel boundary conditions
   Field2D dy;
   mesh.get(dy, "dy", 1.);
-  auto forward_boundary_xin =
-      new BoundaryRegionPar("parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh);
+  auto forward_boundary_xin = std::make_shared<BoundaryRegionPar>(
+      "parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh);
   for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) {
     for (int z = mesh.zstart; z <= mesh.zend; z++) {
       forward_boundary_xin->add_point(
@@ -128,14 +133,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
           zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z
               + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)),
           0.25
-              * (dy(it.ind, mesh.yend) // dy/2
-                 + dy(it.ind, mesh.yend + 1)),
-          0. // angle?
-      );
+              * (1                                                     // dy/2
+                 + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)), // length
+          yvalid);
     }
   }
-  auto backward_boundary_xin =
-      new BoundaryRegionPar("parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh);
+  auto backward_boundary_xin = std::make_shared<BoundaryRegionPar>(
+      "parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh);
   for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) {
     for (int z = mesh.zstart; z <= mesh.zend; z++) {
       backward_boundary_xin->add_point(
@@ -145,15 +149,14 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
           zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z
               + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)),
           0.25
-              * (dy(it.ind, mesh.ystart - 1) // dy/2
-                 + dy(it.ind, mesh.ystart)),
-          0. // angle?
-      );
+              * (1 // dy/2
+                 + dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart)),
+          yvalid);
     }
   }
   // Create regions for parallel boundary conditions
-  auto forward_boundary_xout =
-      new BoundaryRegionPar("parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh);
+  auto forward_boundary_xout = std::make_shared<BoundaryRegionPar>(
+      "parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh);
   for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) {
     for (int z = mesh.zstart; z <= mesh.zend; z++) {
       forward_boundary_xout->add_point(
@@ -163,14 +166,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
           zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z
               + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)),
           0.25
-              * (dy(it.ind, mesh.yend) // dy/2
-                 + dy(it.ind, mesh.yend + 1)),
-          0. // angle?
-      );
+              * (1 // dy/2
+                 + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)),
+          yvalid);
     }
   }
-  auto backward_boundary_xout =
-      new BoundaryRegionPar("parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
+  auto backward_boundary_xout = std::make_shared<BoundaryRegionPar>(
+      "parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh);
   for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) {
     for (int z = mesh.zstart; z <= mesh.zend; z++) {
       backward_boundary_xout->add_point(
@@ -180,18 +182,17 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in,
           zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z
               + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)),
           0.25
-              * (dy(it.ind, mesh.ystart - 1) // dy/2
-                 + dy(it.ind, mesh.ystart)),
-          0. // angle?
-      );
+              * (dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart) // dy/2
+                 + 1),
+          yvalid);
     }
   }
 
   // Add the boundary region to the mesh's vector of parallel boundaries
-  mesh.addBoundaryPar(forward_boundary_xin);
-  mesh.addBoundaryPar(backward_boundary_xin);
-  mesh.addBoundaryPar(forward_boundary_xout);
-  mesh.addBoundaryPar(backward_boundary_xout);
+  mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd);
+  mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd);
+  mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd);
+  mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xin_bwd);
 }
 
 void ShiftedMetricInterp::checkInputGrid() {
diff --git a/src/mesh/parallel_boundary_op.cxx b/src/mesh/parallel_boundary_op.cxx
index 8b2c294a4a..ebd9852791 100644
--- a/src/mesh/parallel_boundary_op.cxx
+++ b/src/mesh/parallel_boundary_op.cxx
@@ -6,18 +6,15 @@
 #include "bout/output.hxx"
 
 BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) {
-
-  Mesh* mesh = bndry.localmesh;
-
   BoutReal value;
 
   switch (value_type) {
   case ValueType::GEN:
-    return gen_values->generate(
-        bout::generator::Context(bndry.s_x, bndry.s_y, bndry.s_z, CELL_CENTRE, mesh, t));
+    return gen_values->generate(bout::generator::Context(
+        bndry.s_x(), bndry.s_y(), bndry.s_z(), CELL_CENTRE, bndry.localmesh, t));
   case ValueType::FIELD:
     // FIXME: Interpolate to s_x, s_y, s_z...
-    value = (*field_values)(bndry.x, bndry.y, bndry.z);
+    value = (*field_values)[bndry.ind()];
     return value;
   case ValueType::REAL:
     return real_value;
@@ -25,123 +22,3 @@ BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) {
     throw BoutException("Invalid value_type encountered in BoundaryOpPar::getValue");
   }
 }
-
-//////////////////////////////////////////
-// Dirichlet boundary
-
-void BoundaryOpPar_dirichlet::apply(Field3D& f, BoutReal t) {
-  Field3D& f_next = f.ynext(bndry->dir);
-
-  Coordinates& coord = *(f.getCoordinates());
-
-  // Loop over grid points If point is in boundary, then fill in
-  // f_next such that the field would be VALUE on the boundary
-  for (bndry->first(); !bndry->isDone(); bndry->next()) {
-    // temp variables for convenience
-    int x = bndry->x;
-    int y = bndry->y;
-    int z = bndry->z;
-
-    // Generate the boundary value
-    BoutReal value = getValue(*bndry, t);
-
-    // Scale the field and normalise to the desired value
-    BoutReal y_prime = bndry->length;
-    BoutReal f2 = (f(x, y, z) - value) * (coord.dy(x, y, z) - y_prime) / y_prime;
-
-    f_next(x, y + bndry->dir, z) = value - f2;
-  }
-}
-
-//////////////////////////////////////////
-// Dirichlet boundary - Third order
-
-void BoundaryOpPar_dirichlet_O3::apply(Field3D& f, BoutReal t) {
-
-  Field3D& f_next = f.ynext(bndry->dir);
-  Field3D& f_prev = f.ynext(-bndry->dir);
-
-  Coordinates& coord = *(f.getCoordinates());
-
-  // Loop over grid points If point is in boundary, then fill in
-  // f_next such that the field would be VALUE on the boundary
-  for (bndry->first(); !bndry->isDone(); bndry->next()) {
-    // temp variables for convenience
-    int x = bndry->x;
-    int y = bndry->y;
-    int z = bndry->z;
-
-    // Generate the boundary value
-    BoutReal fb = getValue(*bndry, t);
-    BoutReal f1 = f_prev(x, y - bndry->dir, z);
-    BoutReal f2 = f(x, y, z);
-    BoutReal l1 = coord.dy(x, y, z);
-    BoutReal l2 = bndry->length;
-    BoutReal l3 = coord.dy(x, y, z) - l2;
-
-    BoutReal denom = (l1 * l1 * l2 + l1 * l2 * l2);
-    BoutReal term1 = (l2 * l2 * l3 + l2 * l3 * l3);
-    BoutReal term2 = l1 * (l1 + l2 + l3) * (l2 + l3);
-    BoutReal term3 = l3 * ((l1 + l2) * l3 + (l1 + l2) * (l1 + l2));
-
-    f_next(x, y + bndry->dir, z) = (term1 * f1 + term2 * fb - term3 * f2) / denom;
-  }
-}
-
-//////////////////////////////////////////
-// Dirichlet with interpolation
-
-void BoundaryOpPar_dirichlet_interp::apply(Field3D& f, BoutReal t) {
-
-  Field3D& f_next = f.ynext(bndry->dir);
-  Field3D& f_prev = f.ynext(-bndry->dir);
-
-  Coordinates& coord = *(f.getCoordinates());
-
-  // Loop over grid points If point is in boundary, then fill in
-  // f_next such that the field would be VALUE on the boundary
-  for (bndry->first(); !bndry->isDone(); bndry->next()) {
-    // temp variables for convenience
-    int x = bndry->x;
-    int y = bndry->y;
-    int z = bndry->z;
-
-    // Generate the boundary value
-    BoutReal fs = getValue(*bndry, t);
-
-    // Scale the field and normalise to the desired value
-    BoutReal dy = coord.dy(x, y, z);
-    BoutReal s = bndry->length * dy;
-
-    f_next(x, y + bndry->dir, z) =
-        f_prev(x, y - bndry->dir, z) * (1. - (2. * s / (dy + s)))
-        + 2. * f(x, y, z) * ((s - dy) / s) + fs * (dy / s - (2. / s + 1.));
-  }
-}
-
-//////////////////////////////////////////
-// Neumann boundary
-
-void BoundaryOpPar_neumann::apply(Field3D& f, BoutReal t) {
-  TRACE("BoundaryOpPar_neumann::apply");
-
-  Field3D& f_next = f.ynext(bndry->dir);
-  f_next.allocate(); // Ensure unique before modifying
-
-  Coordinates& coord = *(f.getCoordinates());
-
-  // If point is in boundary, then fill in f_next such that the derivative
-  // would be VALUE on the boundary
-  for (bndry->first(); !bndry->isDone(); bndry->next()) {
-    // temp variables for convience
-    int x = bndry->x;
-    int y = bndry->y;
-    int z = bndry->z;
-
-    // Generate the boundary value
-    BoutReal value = getValue(*bndry, t);
-    BoutReal dy = coord.dy(x, y, z);
-
-    f_next(x, y + bndry->dir, z) = f(x, y, z) + bndry->dir * value * dy;
-  }
-}
diff --git a/src/mesh/parallel_boundary_region.cxx b/src/mesh/parallel_boundary_region.cxx
index 3f77d96737..e69de29bb2 100644
--- a/src/mesh/parallel_boundary_region.cxx
+++ b/src/mesh/parallel_boundary_region.cxx
@@ -1,37 +0,0 @@
-#include "bout/parallel_boundary_region.hxx"
-
-void BoundaryRegionPar::add_point(const int jx, const int jy, const int jz,
-                                  const BoutReal x, const BoutReal y, const BoutReal z,
-                                  const BoutReal length, const BoutReal angle) {
-  bndry_points.push_back({{jx, jy, jz}, {x, y, z}, length, angle});
-}
-
-void BoundaryRegionPar::first() {
-  bndry_position = begin(bndry_points);
-  if (!isDone()) {
-    x = bndry_position->index.jx;
-    y = bndry_position->index.jy;
-    z = bndry_position->index.jz;
-    s_x = bndry_position->intersection.s_x;
-    s_y = bndry_position->intersection.s_y;
-    s_z = bndry_position->intersection.s_z;
-    length = bndry_position->length;
-    angle = bndry_position->angle;
-  }
-}
-
-void BoundaryRegionPar::next() {
-  ++bndry_position;
-  if (!isDone()) {
-    x = bndry_position->index.jx;
-    y = bndry_position->index.jy;
-    z = bndry_position->index.jz;
-    s_x = bndry_position->intersection.s_x;
-    s_y = bndry_position->intersection.s_y;
-    s_z = bndry_position->intersection.s_z;
-    length = bndry_position->length;
-    angle = bndry_position->angle;
-  }
-}
-
-bool BoundaryRegionPar::isDone() { return (bndry_position == end(bndry_points)); }
diff --git a/src/mesh/parallel_boundary_stencil.cxx.py b/src/mesh/parallel_boundary_stencil.cxx.py
new file mode 100644
index 0000000000..d0988ee099
--- /dev/null
+++ b/src/mesh/parallel_boundary_stencil.cxx.py
@@ -0,0 +1,62 @@
+import os
+from tempfile import NamedTemporaryFile as tmpf
+from stencils_sympy import dirichlet, neumann, simp, Symbol, Matrix, ccode
+
+
+def gen_code(order, matrix_type):
+    x = [Symbol("spacing%d" % i) for i in range(order)]
+    matrix = matrix_type(x)
+    A = Matrix(order, order, matrix)
+
+    try:
+        iA = A.inv()
+    except:
+        import sys
+
+        print(A, matrix, file=sys.stderr)
+        raise
+    return ccode(simp(sum([iA[0, i] * Symbol("value%d" % i) for i in range(order)])))
+
+
+def run(cmd):
+    print(cmd)
+    out = os.system(cmd)
+    assert out == 0
+
+
+if __name__ == "__main__":
+    with tmpf("w", dir=".", delete=False) as f:
+        f.write("namespace {\n")
+        f.write(
+            """
+inline BoutReal pow(BoutReal val, int exp) {
+  //constexpr int expval = exp;
+  //static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3");
+  if (exp == 2) {
+    return val * val;
+  }
+  ASSERT3(exp == 3);
+  return val * val * val;
+}
+"""
+        )
+
+        for order in range(1, 4):
+            for matrix in dirichlet, neumann:
+                if order == 1 and matrix == neumann:
+                    continue
+                print(f"generating {matrix.name}_o{order}")
+                args = ", ".join(
+                    [
+                        "BoutReal spacing%d, BoutReal value%d" % (i, i)
+                        for i in range(order)
+                    ]
+                )
+                f.write(
+                    f"inline BoutReal stencil_{matrix.name}_o{order}({args}) {{\n  return "
+                )
+                f.write(gen_code(order, matrix))
+                f.write(";\n}\n")
+        f.write("}\n")
+    run("clang-format -i " + f.name)
+    run(f"mv {f.name} {__file__[:-3]}")
diff --git a/src/mesh/stencils.md b/src/mesh/stencils.md
new file mode 100644
index 0000000000..0c7d181481
--- /dev/null
+++ b/src/mesh/stencils.md
@@ -0,0 +1,29 @@
+Notes concerning the generation of stencils
+================
+
+We want to create a Taylor function
+$f(x-x_0)=\sum_i=0^n \frac{1}{i!}f_i(x-x_0)^i$ where $n$
+is the order of the function, $x_0$ is the point in the boundary
+where we want to calculate the function. $f_i$ are some coefficients
+that we need to determine. To be precise, only $f_0$ needs to be
+determined.
+We know that the function has at some points certain values. If the
+value at some distance `spacing.f0` is a given value `val` then we
+can build a linear system of equations using the above formula.
+If rather the derivative is given, the above equations needs to be
+differentiated once.
+
+stencils_sympy.py calculates the coefficients of the above matrix
+which represents our system of equations. The derivative is simply
+one the factor of the next smaller term (or zero if the there is no
+smaller one). This is what is calculated by `taylor`, `dirichlet`
+and `neumann`, the respective matrix coefficients.
+
+sympy does all the heavy lifting on analytically inverting the
+matrix.
+
+With the analytic inversion we can put in the numerical offsets
+`spacing.f?` in C++ and get a fast expression for the respective
+coefficients. As mentioned before, we do not need the full inverse,
+just the first row, as we only care about the value, not about it's
+derivative.
diff --git a/src/mesh/stencils_sympy.py b/src/mesh/stencils_sympy.py
new file mode 100644
index 0000000000..64677f1985
--- /dev/null
+++ b/src/mesh/stencils_sympy.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+from sympy import Symbol, Eq
+from sympy.matrices import Matrix
+from sympy.printing import ccode
+from sympy.simplify import combsimp as simp
+from sympy.utilities.codegen import codegen
+
+
+def pow(a, b):
+    if b == 0:
+        return "1"
+    if b == 1:
+        return a
+    else:
+        return "%s**%d" % (a, b)
+
+
+def factorial(a):
+    if a == 0 or a == 1:
+        return 1
+    else:
+        assert a > 0
+        return a * factorial(a - 1)
+
+
+def gen_code(order, matrix_type):
+    x = [Symbol("spacing.f%d" % i) for i in range(order)]
+    matrix = matrix_type(x)
+    A = Matrix(order, order, matrix)
+
+    try:
+        iA = A.inv()
+    except:
+        import sys
+
+        print(A, matrix, file=sys.stderr)
+        raise
+    ret = ""
+    for i in range(order):
+        ret += ccode(simp(iA[0, i]), assign_to="facs.f%d" % i)
+        ret += "\n"
+    return ret
+
+
+def taylor(x, i, j):
+    if j >= 0:
+        return x[i] ** j / factorial(j)
+    else:
+        return 0
+
+
+class dirichlet:
+    name = "dirichlet"
+
+    def __init__(self, x):
+        self.x = x
+
+    def __call__(self, i, j):
+        return taylor(self.x, i, j)
+
+
+class neumann:
+    name = "neumann"
+
+    def __init__(self, x):
+        self.x = x
+
+    def __call__(self, i, j):
+        if i == 0:
+            return taylor(self.x, i, j - 1)
+        else:
+            return taylor(self.x, i, j)
+
+
+if __name__ == "__main__":
+    print(gen_code(3, dirichlet))
diff --git a/src/solver/impls/arkode/arkode.cxx b/src/solver/impls/arkode/arkode.cxx
index aabe2ae050..440f8f54f1 100644
--- a/src/solver/impls/arkode/arkode.cxx
+++ b/src/solver/impls/arkode/arkode.cxx
@@ -4,9 +4,7 @@
  * NOTE: ARKode is still in beta testing so use with cautious optimism
  *
  **************************************************************************
- * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu
- *
- * Contact: Nick Walkden, nick.walkden@ccfe.ac.uk
+ * Copyright 2010-2024 BOUT++ contributors
  *
  * This file is part of BOUT++.
  *
@@ -31,6 +29,7 @@
 
 #if BOUT_HAS_ARKODE
 
+#include "bout/bout_enum_class.hxx"
 #include "bout/boutcomm.hxx"
 #include "bout/boutexception.hxx"
 #include "bout/field3d.hxx"
@@ -41,17 +40,7 @@
 #include "bout/unused.hxx"
 #include "bout/utils.hxx"
 
-#if SUNDIALS_VERSION_MAJOR >= 4
 #include <arkode/arkode_arkstep.h>
-#else
-#include <arkode/arkode.h>
-#if SUNDIALS_VERSION_MAJOR >= 3
-#include <arkode/arkode_spils.h>
-#else
-#include <arkode/arkode_spgmr.h>
-#endif
-#endif
-
 #include <arkode/arkode_bbdpre.h>
 #include <sundials/sundials_math.h>
 #include <sundials/sundials_types.h>
@@ -61,110 +50,21 @@
 
 class Field2D;
 
-#define ZERO RCONST(0.)
-#define ONE RCONST(1.0)
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data);
+int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data);
+int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data);
 
-#ifndef ARKODEINT
-#if SUNDIALS_VERSION_MAJOR < 3
-using ARKODEINT = bout::utils::function_traits<ARKLocalFn>::arg_t<0>;
-#else
-using ARKODEINT = sunindextype;
-#endif
-#endif
+int arkode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du,
+                   void* user_data);
+int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec,
+               BoutReal gamma, BoutReal delta, int lr, void* user_data);
 
-static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data);
-static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data);
-static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data);
-
-static int arkode_bbd_rhs(ARKODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du,
-                          void* user_data);
-static int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec,
-                      BoutReal gamma, BoutReal delta, int lr, void* user_data);
-#if SUNDIALS_VERSION_MAJOR < 3
-// Shim for earlier versions
-inline static int arkode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec,
-                                  N_Vector zvec, BoutReal gamma, BoutReal delta, int lr,
-                                  void* user_data, N_Vector UNUSED(tmp)) {
-  return arkode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data);
-}
-#else
-// Alias for newer versions
-constexpr auto& arkode_pre_shim = arkode_pre;
-#endif
-
-static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy,
-                      void* user_data, N_Vector tmp);
-#if SUNDIALS_VERSION_MAJOR < 4
-// Shim for earlier versions
-inline int ARKStepSetJacTimes(void* arkode_mem, std::nullptr_t,
-                              ARKSpilsJacTimesVecFn jtimes) {
-#if SUNDIALS_VERSION_MAJOR < 3
-  return ARKSpilsSetJacTimesVecFn(arkode_mem, jtimes);
-#else
-  return ARKSpilsSetJacTimes(arkode_mem, nullptr, jtimes);
-#endif
-}
-#endif
-
-#if SUNDIALS_VERSION_MAJOR < 4
-void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0) {
-  auto arkode_mem = ARKodeCreate();
-
-  if (arkode_mem == nullptr) {
-    throw BoutException("ARKodeCreate failed\n");
-  }
-  if (ARKodeInit(arkode_mem, fe, fi, t0, y0) != ARK_SUCCESS) {
-    throw BoutException("ARKodeInit failed\n");
-  }
-  return arkode_mem;
-}
-
-#if SUNDIALS_VERSION_MAJOR == 3
-int ARKStepSetLinearSolver(void* arkode_mem, SUNLinearSolver LS, std::nullptr_t) {
-  return ARKSpilsSetLinearSolver(arkode_mem, LS);
-}
-#endif
-
-// Aliases for older versions
-// In SUNDIALS 4, ARKode has become ARKStep, hence all the renames
-constexpr auto& ARKStepEvolve = ARKode;
-constexpr auto& ARKStepFree = ARKodeFree;
-constexpr auto& ARKStepGetCurrentTime = ARKodeGetCurrentTime;
-constexpr auto& ARKStepGetDky = ARKodeGetDky;
-constexpr auto& ARKStepGetLastStep = ARKodeGetLastStep;
-constexpr auto& ARKStepGetNumLinIters = ARKSpilsGetNumLinIters;
-constexpr auto& ARKStepGetNumNonlinSolvIters = ARKodeGetNumNonlinSolvIters;
-constexpr auto& ARKStepGetNumPrecEvals = ARKSpilsGetNumPrecEvals;
-constexpr auto& ARKStepGetNumRhsEvals = ARKodeGetNumRhsEvals;
-constexpr auto& ARKStepGetNumSteps = ARKodeGetNumSteps;
-constexpr auto& ARKStepReInit = ARKodeReInit;
-constexpr auto& ARKStepSStolerances = ARKodeSStolerances;
-constexpr auto& ARKStepSVtolerances = ARKodeSVtolerances;
-constexpr auto& ARKStepSetAdaptivityMethod = ARKodeSetAdaptivityMethod;
-constexpr auto& ARKStepSetCFLFraction = ARKodeSetCFLFraction;
-constexpr auto& ARKStepSetEpsLin = ARKSpilsSetEpsLin;
-constexpr auto& ARKStepSetExplicit = ARKodeSetExplicit;
-constexpr auto& ARKStepSetFixedPoint = ARKodeSetFixedPoint;
-constexpr auto& ARKStepSetFixedStep = ARKodeSetFixedStep;
-constexpr auto& ARKStepSetImEx = ARKodeSetImEx;
-constexpr auto& ARKStepSetImplicit = ARKodeSetImplicit;
-constexpr auto& ARKStepSetInitStep = ARKodeSetInitStep;
-constexpr auto& ARKStepSetLinear = ARKodeSetLinear;
-constexpr auto& ARKStepSetMaxNumSteps = ARKodeSetMaxNumSteps;
-constexpr auto& ARKStepSetMaxStep = ARKodeSetMaxStep;
-constexpr auto& ARKStepSetMinStep = ARKodeSetMinStep;
-constexpr auto& ARKStepSetOptimalParams = ARKodeSetOptimalParams;
-constexpr auto& ARKStepSetOrder = ARKodeSetOrder;
-constexpr auto& ARKStepSetPreconditioner = ARKSpilsSetPreconditioner;
-constexpr auto& ARKStepSetUserData = ARKodeSetUserData;
-#endif
-
-#if SUNDIALS_VERSION_MAJOR < 6
-void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0,
-                    [[maybe_unused]] SUNContext context) {
-  return ARKStepCreate(fe, fi, t0, y0);
-}
-#endif
+int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy,
+               void* user_data, N_Vector tmp);
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 ArkodeSolver::ArkodeSolver(Options* opts)
     : Solver(opts), diagnose((*options)["diagnose"]
@@ -173,11 +73,10 @@ ArkodeSolver::ArkodeSolver(Options* opts)
       mxsteps((*options)["mxstep"]
                   .doc("Maximum number of steps to take between outputs")
                   .withDefault(500)),
-      imex((*options)["imex"].doc("Use ImEx capability").withDefault(true)),
-      solve_explicit(
-          (*options)["explicit"].doc("Solve only explicit part").withDefault(true)),
-      solve_implicit(
-          (*options)["implicit"].doc("Solve only implicit part").withDefault(true)),
+      treatment((*options)["treatment"]
+                    .doc("Use default capability (imex) or provide a specific treatment: "
+                         "implicit or explicit")
+                    .withDefault(Treatment::ImEx)),
       set_linear(
           (*options)["set_linear"]
               .doc("Use linear implicit solver (only evaluates jacobian inversion once)")
@@ -187,14 +86,22 @@ ArkodeSolver::ArkodeSolver(Options* opts)
                           "not recommended except for code comparison")
                      .withDefault(false)),
       order((*options)["order"].doc("Order of internal step").withDefault(4)),
+#if SUNDIALS_TABLE_BY_NAME_SUPPORT
+      implicit_table((*options)["implicit_table"]
+                         .doc("Name of the implicit Butcher table")
+                         .withDefault("")),
+      explicit_table((*options)["explicit_table"]
+                         .doc("Name of the explicit Butcher table")
+                         .withDefault("")),
+#endif
       cfl_frac((*options)["cfl_frac"]
                    .doc("Fraction of the estimated explicitly stable step to use")
                    .withDefault(-1.0)),
-      adap_method((*options)["adap_method"]
-                      .doc("Set timestep adaptivity function: 0 -> PID adaptivity "
-                           "(default); 1 -> PI; 2 -> I; 3 -> explicit Gustafsson; 4 -> "
-                           "implicit Gustafsson; 5 -> ImEx Gustafsson;")
-                      .withDefault(0)),
+      adap_method(
+          (*options)["adap_method"]
+              .doc("Set timestep adaptivity function: pid, pi, i, explicit_gustafsson,  "
+                   "implicit_gustafsson, imex_gustafsson.")
+              .withDefault(AdapMethod::PID)),
       abstol((*options)["atol"].doc("Absolute tolerance").withDefault(1.0e-12)),
       reltol((*options)["rtol"].doc("Relative tolerance").withDefault(1.0e-5)),
       use_vector_abstol((*options)["use_vector_abstol"]
@@ -226,7 +133,7 @@ ArkodeSolver::ArkodeSolver(Options* opts)
                        .withDefault(false)),
       optimize(
           (*options)["optimize"].doc("Use ARKode optimal parameters").withDefault(false)),
-      suncontext(static_cast<void*>(&BoutComm::get())) {
+      suncontext(createSUNContext(BoutComm::get())) {
   has_constraints = false; // This solver doesn't have constraints
 
   // Add diagnostics to output
@@ -243,10 +150,14 @@ ArkodeSolver::ArkodeSolver(Options* opts)
 }
 
 ArkodeSolver::~ArkodeSolver() {
-  N_VDestroy_Parallel(uvec);
+  N_VDestroy(uvec);
   ARKStepFree(&arkode_mem);
   SUNLinSolFree(sun_solver);
   SUNNonlinSolFree(nonlinear_solver);
+
+#if SUNDIALS_CONTROLLER_SUPPORT
+  SUNAdaptController_Destroy(controller);
+#endif
 }
 
 /**************************************************************************
@@ -274,50 +185,55 @@ int ArkodeSolver::init() {
                n2Dvars(), neq, local_N);
 
   // Allocate memory
-  if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) {
+  uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq);
+  if (uvec == nullptr) {
     throw BoutException("SUNDIALS memory allocation failed\n");
   }
 
   // Put the variables into uvec
-  save_vars(NV_DATA_P(uvec));
-
-  ASSERT1(solve_explicit or solve_implicit);
-
-  const auto& explicit_rhs = [this]() {
-    if (imex) {
-      return arkode_rhs_explicit;
-    } else {
-      return solve_explicit ? arkode_rhs : nullptr;
-    }
-  }();
-  const auto& implicit_rhs = [this]() {
-    if (imex) {
-      return arkode_rhs_implicit;
-    } else {
-      return solve_implicit ? arkode_rhs : nullptr;
-    }
-  }();
-
-  if ((arkode_mem = ARKStepCreate(explicit_rhs, implicit_rhs, simtime, uvec, suncontext))
-      == nullptr) {
+  save_vars(N_VGetArrayPointer(uvec));
+
+  switch (treatment) {
+  case Treatment::ImEx:
+    arkode_mem = callWithSUNContext(ARKStepCreate, suncontext, arkode_rhs_explicit,
+                                    arkode_rhs_implicit, simtime, uvec);
+    break;
+  case Treatment::Explicit:
+    arkode_mem =
+        callWithSUNContext(ARKStepCreate, suncontext, arkode_rhs, nullptr, simtime, uvec);
+    break;
+  case Treatment::Implicit:
+    arkode_mem =
+        callWithSUNContext(ARKStepCreate, suncontext, nullptr, arkode_rhs, simtime, uvec);
+    break;
+  default:
+    throw BoutException("Invalid treatment: {}\n", toString(treatment));
+  }
+  if (arkode_mem == nullptr) {
     throw BoutException("ARKStepCreate failed\n");
   }
 
-  if (imex and solve_explicit and solve_implicit) {
+  switch (treatment) {
+  case Treatment::ImEx:
     output_info.write("\tUsing ARKode ImEx solver \n");
     if (ARKStepSetImEx(arkode_mem) != ARK_SUCCESS) {
       throw BoutException("ARKStepSetImEx failed\n");
     }
-  } else if (solve_explicit) {
+    break;
+  case Treatment::Explicit:
     output_info.write("\tUsing ARKStep Explicit solver \n");
     if (ARKStepSetExplicit(arkode_mem) != ARK_SUCCESS) {
       throw BoutException("ARKStepSetExplicit failed\n");
     }
-  } else {
+    break;
+  case Treatment::Implicit:
     output_info.write("\tUsing ARKStep Implicit solver \n");
     if (ARKStepSetImplicit(arkode_mem) != ARK_SUCCESS) {
       throw BoutException("ARKStepSetImplicit failed\n");
     }
+    break;
+  default:
+    throw BoutException("Invalid treatment: {}\n", toString(treatment));
   }
 
   // For callbacks, need pointer to solver object
@@ -325,11 +241,8 @@ int ArkodeSolver::init() {
     throw BoutException("ARKStepSetUserData failed\n");
   }
 
-  if (set_linear) {
-    output.write("\tSetting ARKStep implicit solver to Linear\n");
-    if (ARKStepSetLinear(arkode_mem, 1) != ARK_SUCCESS) {
-      throw BoutException("ARKStepSetLinear failed\n");
-    }
+  if (ARKStepSetLinear(arkode_mem, set_linear) != ARK_SUCCESS) {
+    throw BoutException("ARKStepSetLinear failed\n");
   }
 
   if (fixed_step) {
@@ -344,13 +257,84 @@ int ArkodeSolver::init() {
     throw BoutException("ARKStepSetOrder failed\n");
   }
 
+#if SUNDIALS_TABLE_BY_NAME_SUPPORT
+  if (!implicit_table.empty() || !explicit_table.empty()) {
+    if (ARKStepSetTableName(
+            arkode_mem,
+            implicit_table.empty() ? "ARKODE_DIRK_NONE" : implicit_table.c_str(),
+            explicit_table.empty() ? "ARKODE_ERK_NONE" : explicit_table.c_str())
+        != ARK_SUCCESS) {
+      throw BoutException("ARKStepSetTableName failed\n");
+    }
+  }
+#endif
+
   if (ARKStepSetCFLFraction(arkode_mem, cfl_frac) != ARK_SUCCESS) {
     throw BoutException("ARKStepSetCFLFraction failed\n");
   }
 
-  if (ARKStepSetAdaptivityMethod(arkode_mem, adap_method, 1, 1, nullptr) != ARK_SUCCESS) {
+#if SUNDIALS_CONTROLLER_SUPPORT
+  switch (adap_method) {
+  case AdapMethod::PID:
+    controller = SUNAdaptController_PID(suncontext);
+    break;
+  case AdapMethod::PI:
+    controller = SUNAdaptController_PI(suncontext);
+    break;
+  case AdapMethod::I:
+    controller = SUNAdaptController_I(suncontext);
+    break;
+  case AdapMethod::Explicit_Gustafsson:
+    controller = SUNAdaptController_ExpGus(suncontext);
+    break;
+  case AdapMethod::Implicit_Gustafsson:
+    controller = SUNAdaptController_ImpGus(suncontext);
+    break;
+  case AdapMethod::ImEx_Gustafsson:
+    controller = SUNAdaptController_ImExGus(suncontext);
+    break;
+  default:
+    throw BoutException("Invalid adap_method\n");
+  }
+
+  if (ARKStepSetAdaptController(arkode_mem, controller) != ARK_SUCCESS) {
+    throw BoutException("ARKStepSetAdaptController failed\n");
+  }
+
+  if (ARKStepSetAdaptivityAdjustment(arkode_mem, 0) != ARK_SUCCESS) {
+    throw BoutException("ARKStepSetAdaptivityAdjustment failed\n");
+  }
+#else
+  int adap_method_int;
+  // Could cast to underlying integer, but this is more explicit
+  switch (adap_method) {
+  case AdapMethod::PID:
+    adap_method_int = 0;
+    break;
+  case AdapMethod::PI:
+    adap_method_int = 1;
+    break;
+  case AdapMethod::I:
+    adap_method_int = 2;
+    break;
+  case AdapMethod::Explicit_Gustafsson:
+    adap_method_int = 3;
+    break;
+  case AdapMethod::Implicit_Gustafsson:
+    adap_method_int = 4;
+    break;
+  case AdapMethod::ImEx_Gustafsson:
+    adap_method_int = 5;
+    break;
+  default:
+    throw BoutException("Invalid adap_method\n");
+  }
+
+  if (ARKStepSetAdaptivityMethod(arkode_mem, adap_method_int, 1, 1, nullptr)
+      != ARK_SUCCESS) {
     throw BoutException("ARKStepSetAdaptivityMethod failed\n");
   }
+#endif
 
   if (use_vector_abstol) {
     std::vector<BoutReal> f2dtols;
@@ -374,18 +358,18 @@ int ArkodeSolver::init() {
                      return Options::root()[f3.name]["atol"].withDefault(abstol);
                    });
 
-    N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext);
+    N_Vector abstolvec = N_VClone(uvec);
     if (abstolvec == nullptr) {
       throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n");
     }
 
-    set_abstol_values(NV_DATA_P(abstolvec), f2dtols, f3dtols);
+    set_abstol_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols);
 
     if (ARKStepSVtolerances(arkode_mem, reltol, abstolvec) != ARK_SUCCESS) {
       throw BoutException("ARKStepSVtolerances failed\n");
     }
 
-    N_VDestroy_Parallel(abstolvec);
+    N_VDestroy(abstolvec);
   } else {
     if (ARKStepSStolerances(arkode_mem, reltol, abstol) != ARK_SUCCESS) {
       throw BoutException("ARKStepSStolerances failed\n");
@@ -414,130 +398,94 @@ int ArkodeSolver::init() {
     }
   }
 
-  // ARKStepSetPredictorMethod(arkode_mem,4);
-
-#if SUNDIALS_VERSION_MAJOR < 4
-  if (fixed_point) {
-    output.write("\tUsing accelerated fixed point solver\n");
-    if (ARKodeSetFixedPoint(arkode_mem, 3.0)) {
-      throw BoutException("ARKodeSetFixedPoint failed\n");
-    }
-  } else {
-    output.write("\tUsing Newton iteration\n");
-    if (ARKodeSetNewton(arkode_mem)) {
-      throw BoutException("ARKodeSetNewton failed\n");
-    }
-  }
-#else
-  if (fixed_point) {
-    output.write("\tUsing accelerated fixed point solver\n");
-    if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 3, suncontext)) == nullptr) {
-      throw BoutException("Creating SUNDIALS fixed point nonlinear solver failed\n");
-    }
-  } else {
-    output.write("\tUsing Newton iteration\n");
-    if ((nonlinear_solver = SUNNonlinSol_Newton(uvec, suncontext)) == nullptr) {
-      throw BoutException("Creating SUNDIALS Newton nonlinear solver failed\n");
-    }
-  }
-  if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) {
-    throw BoutException("ARKStepSetNonlinearSolver failed\n");
-  }
-#endif
-
-  /// Set Preconditioner
-  if (use_precon) {
-    const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT;
-
-#if SUNDIALS_VERSION_MAJOR >= 3
-    if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) {
-      throw BoutException("Creating SUNDIALS linear solver failed\n");
-    }
-    if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) {
-      throw BoutException("ARKStepSetLinearSolver failed\n");
-    }
-#else
-    if (ARKSpgmr(arkode_mem, prectype, maxl) != ARKSPILS_SUCCESS) {
-      throw BoutException("ARKSpgmr failed\n");
-    }
-#endif
-
-    if (!hasPreconditioner()) {
-      output.write("\tUsing BBD preconditioner\n");
-
-      /// Get options
-      // Compute band_width_default from actually added fields, to allow for multiple
-      // Mesh objects
-      //
-      // Previous implementation was equivalent to:
-      //   int MXSUB = mesh->xend - mesh->xstart + 1;
-      //   int band_width_default = n3Dvars()*(MXSUB+2);
-      const int band_width_default = std::accumulate(
-          begin(f3d), end(f3d), 0, [](int a, const VarStr<Field3D>& fvar) {
-            Mesh* localmesh = fvar.var->getMesh();
-            return a + localmesh->xend - localmesh->xstart + 3;
-          });
-
-      const auto mudq = (*options)["mudq"]
-                            .doc("Upper half-bandwidth to be used in the difference "
-                                 "quotient Jacobian approximation")
-                            .withDefault(band_width_default);
-      const auto mldq = (*options)["mldq"]
-                            .doc("Lower half-bandwidth to be used in the difference "
-                                 "quotient Jacobian approximation")
-                            .withDefault(band_width_default);
-      const auto mukeep = (*options)["mukeep"]
-                              .doc("Upper half-bandwidth of the retained banded "
-                                   "approximate Jacobian block")
-                              .withDefault(n3Dvars() + n2Dvars());
-      const auto mlkeep = (*options)["mlkeep"]
-                              .doc("Lower half-bandwidth of the retained banded "
-                                   "approximate Jacobian block")
-                              .withDefault(n3Dvars() + n2Dvars());
-
-      if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO,
-                         arkode_bbd_rhs, nullptr)
-          != ARK_SUCCESS) {
-        throw BoutException("ARKBBDPrecInit failed\n");
+  if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) {
+    if (fixed_point) {
+      output.write("\tUsing accelerated fixed point solver\n");
+      nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 3);
+      if (nonlinear_solver == nullptr) {
+        throw BoutException("Creating SUNDIALS fixed point nonlinear solver failed\n");
+      }
+      if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) {
+        throw BoutException("ARKStepSetNonlinearSolver failed\n");
       }
-
     } else {
-      output.write("\tUsing user-supplied preconditioner\n");
+      output.write("\tUsing Newton iteration\n");
 
-      if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre_shim) != ARK_SUCCESS) {
-        throw BoutException("ARKStepSetPreconditioner failed\n");
+      const auto prectype =
+          use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE;
+      sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl);
+      if (sun_solver == nullptr) {
+        throw BoutException("Creating SUNDIALS linear solver failed\n");
+      }
+      if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARKLS_SUCCESS) {
+        throw BoutException("ARKStepSetLinearSolver failed\n");
       }
-    }
-  } else {
-    // Not using preconditioning
-
-    output.write("\tNo preconditioning\n");
 
-#if SUNDIALS_VERSION_MAJOR >= 3
-    if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext))
-        == nullptr) {
-      throw BoutException("Creating SUNDIALS linear solver failed\n");
-    }
-    if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) {
-      throw BoutException("ARKStepSetLinearSolver failed\n");
-    }
-#else
-    if (ARKSpgmr(arkode_mem, SUN_PREC_NONE, maxl) != ARKSPILS_SUCCESS) {
-      throw BoutException("ARKSpgmr failed\n");
+      /// Set Preconditioner
+      if (use_precon) {
+        if (hasPreconditioner()) {
+          output.write("\tUsing user-supplied preconditioner\n");
+
+          if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre)
+              != ARKLS_SUCCESS) {
+            throw BoutException("ARKStepSetPreconditioner failed\n");
+          }
+        } else {
+          output.write("\tUsing BBD preconditioner\n");
+
+          /// Get options
+          // Compute band_width_default from actually added fields, to allow for multiple
+          // Mesh objects
+          //
+          // Previous implementation was equivalent to:
+          //   int MXSUB = mesh->xend - mesh->xstart + 1;
+          //   int band_width_default = n3Dvars()*(MXSUB+2);
+          const int band_width_default = std::accumulate(
+              begin(f3d), end(f3d), 0, [](int acc, const VarStr<Field3D>& fvar) {
+                Mesh* localmesh = fvar.var->getMesh();
+                return acc + localmesh->xend - localmesh->xstart + 3;
+              });
+
+          const auto mudq = (*options)["mudq"]
+                                .doc("Upper half-bandwidth to be used in the difference "
+                                     "quotient Jacobian approximation")
+                                .withDefault(band_width_default);
+          const auto mldq = (*options)["mldq"]
+                                .doc("Lower half-bandwidth to be used in the difference "
+                                     "quotient Jacobian approximation")
+                                .withDefault(band_width_default);
+          const auto mukeep = (*options)["mukeep"]
+                                  .doc("Upper half-bandwidth of the retained banded "
+                                       "approximate Jacobian block")
+                                  .withDefault(n3Dvars() + n2Dvars());
+          const auto mlkeep = (*options)["mlkeep"]
+                                  .doc("Lower half-bandwidth of the retained banded "
+                                       "approximate Jacobian block")
+                                  .withDefault(n3Dvars() + n2Dvars());
+
+          if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0,
+                             arkode_bbd_rhs, nullptr)
+              != ARKLS_SUCCESS) {
+            throw BoutException("ARKBBDPrecInit failed\n");
+          }
+        }
+      } else {
+        // Not using preconditioning
+        output.write("\tNo preconditioning\n");
+      }
     }
-#endif
-  }
 
-  /// Set Jacobian-vector multiplication function
+    /// Set Jacobian-vector multiplication function
 
-  if (use_jacobian and hasJacobian()) {
-    output.write("\tUsing user-supplied Jacobian function\n");
+    if (use_jacobian and hasJacobian()) {
+      output.write("\tUsing user-supplied Jacobian function\n");
 
-    if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARK_SUCCESS) {
-      throw BoutException("ARKStepSetJacTimesVecFn failed\n");
+      if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARKLS_SUCCESS) {
+        throw BoutException("ARKStepSetJacTimes failed\n");
+      }
+    } else {
+      output.write("\tUsing difference quotient approximation for Jacobian\n");
     }
-  } else {
-    output.write("\tUsing difference quotient approximation for Jacobian\n");
   }
 
   if (optimize) {
@@ -580,24 +528,27 @@ int ArkodeSolver::run() {
     ARKStepGetNumRhsEvals(arkode_mem, &temp_long_int, &temp_long_int2);
     nfe_evals = int(temp_long_int);
     nfi_evals = int(temp_long_int2);
-    ARKStepGetNumNonlinSolvIters(arkode_mem, &temp_long_int);
-    nniters = int(temp_long_int);
-    ARKStepGetNumPrecEvals(arkode_mem, &temp_long_int);
-    npevals = int(temp_long_int);
-    ARKStepGetNumLinIters(arkode_mem, &temp_long_int);
-    nliters = int(temp_long_int);
+    if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) {
+      ARKStepGetNumNonlinSolvIters(arkode_mem, &temp_long_int);
+      nniters = int(temp_long_int);
+      ARKStepGetNumPrecEvals(arkode_mem, &temp_long_int);
+      npevals = int(temp_long_int);
+      ARKStepGetNumLinIters(arkode_mem, &temp_long_int);
+      nliters = int(temp_long_int);
+    }
 
     if (diagnose) {
       output.write("\nARKODE: nsteps {:d}, nfe_evals {:d}, nfi_evals {:d}, nniters {:d}, "
                    "npevals {:d}, nliters {:d}\n",
                    nsteps, nfe_evals, nfi_evals, nniters, npevals, nliters);
-
-      output.write("    -> Newton iterations per step: {:e}\n",
-                   static_cast<BoutReal>(nniters) / static_cast<BoutReal>(nsteps));
-      output.write("    -> Linear iterations per Newton iteration: {:e}\n",
-                   static_cast<BoutReal>(nliters) / static_cast<BoutReal>(nniters));
-      output.write("    -> Preconditioner evaluations per Newton: {:e}\n",
-                   static_cast<BoutReal>(npevals) / static_cast<BoutReal>(nniters));
+      if (treatment == Treatment::ImEx or treatment == Treatment::Implicit) {
+        output.write("    -> Newton iterations per step: {:e}\n",
+                     static_cast<BoutReal>(nniters) / static_cast<BoutReal>(nsteps));
+        output.write("    -> Linear iterations per Newton iteration: {:e}\n",
+                     static_cast<BoutReal>(nliters) / static_cast<BoutReal>(nniters));
+        output.write("    -> Preconditioner evaluations per Newton: {:e}\n",
+                     static_cast<BoutReal>(npevals) / static_cast<BoutReal>(nniters));
+      }
     }
 
     if (call_monitors(simtime, i, getNumberOutputSteps())) {
@@ -645,7 +596,7 @@ BoutReal ArkodeSolver::run(BoutReal tout) {
   }
 
   // Copy variables
-  load_vars(NV_DATA_P(uvec));
+  load_vars(N_VGetArrayPointer(uvec));
   // Call rhs function to get extra variables at this time
   run_rhs(simtime);
   // run_diffusive(simtime);
@@ -718,8 +669,8 @@ void ArkodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* uda
 
   if (!hasPreconditioner()) {
     // Identity (but should never happen)
-    const int N = NV_LOCLENGTH_P(uvec);
-    std::copy(rvec, rvec + N, zvec);
+    const auto length = N_VGetLocalLength_Parallel(uvec);
+    std::copy(rvec, rvec + length, zvec);
     return;
   }
 
@@ -766,10 +717,12 @@ void ArkodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* J
  * ARKODE explicit RHS functions
  **************************************************************************/
 
-static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 
-  BoutReal* udata = NV_DATA_P(u);
-  BoutReal* dudata = NV_DATA_P(du);
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
 
   auto* s = static_cast<ArkodeSolver*>(user_data);
 
@@ -782,10 +735,10 @@ static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_d
   return 0;
 }
 
-static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
+int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 
-  BoutReal* udata = NV_DATA_P(u);
-  BoutReal* dudata = NV_DATA_P(du);
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
 
   auto* s = static_cast<ArkodeSolver*>(user_data);
 
@@ -798,10 +751,10 @@ static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_d
   return 0;
 }
 
-static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
+int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 
-  BoutReal* udata = NV_DATA_P(u);
-  BoutReal* dudata = NV_DATA_P(du);
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
 
   auto* s = static_cast<ArkodeSolver*>(user_data);
 
@@ -815,18 +768,17 @@ static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 }
 
 /// RHS function for BBD preconditioner
-static int arkode_bbd_rhs(ARKODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
-                          void* user_data) {
+int arkode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
+                   void* user_data) {
   return arkode_rhs_implicit(t, u, du, user_data);
 }
 
 /// Preconditioner function
-static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec,
-                      N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr),
-                      void* user_data) {
-  BoutReal* udata = NV_DATA_P(yy);
-  BoutReal* rdata = NV_DATA_P(rvec);
-  BoutReal* zdata = NV_DATA_P(zvec);
+int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec,
+               BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) {
+  BoutReal* udata = N_VGetArrayPointer(yy);
+  BoutReal* rdata = N_VGetArrayPointer(rvec);
+  BoutReal* zdata = N_VGetArrayPointer(zvec);
 
   auto* s = static_cast<ArkodeSolver*>(user_data);
 
@@ -837,11 +789,11 @@ static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rve
 }
 
 /// Jacobian-vector multiplication function
-static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y,
-                      N_Vector UNUSED(fy), void* user_data, N_Vector UNUSED(tmp)) {
-  BoutReal* ydata = NV_DATA_P(y);   ///< System state
-  BoutReal* vdata = NV_DATA_P(v);   ///< Input vector
-  BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output
+int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy),
+               void* user_data, N_Vector UNUSED(tmp)) {
+  BoutReal* ydata = N_VGetArrayPointer(y);   ///< System state
+  BoutReal* vdata = N_VGetArrayPointer(v);   ///< Input vector
+  BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output
 
   auto* s = static_cast<ArkodeSolver*>(user_data);
 
@@ -849,6 +801,8 @@ static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y,
 
   return 0;
 }
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 /**************************************************************************
  * vector abstol functions
diff --git a/src/solver/impls/arkode/arkode.hxx b/src/solver/impls/arkode/arkode.hxx
index 302413d8aa..4050ed377f 100644
--- a/src/solver/impls/arkode/arkode.hxx
+++ b/src/solver/impls/arkode/arkode.hxx
@@ -5,9 +5,9 @@
  * NOTE: Only one solver can currently be compiled in
  *
  **************************************************************************
- * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu
+ * Copyright 2010-2024 BOUT++ contributors
  *
- * Contact: Ben Dudson, bd512@york.ac.uk
+ * Contact: Ben Dudson, dudson2@llnl.gov
  *
  * This file is part of BOUT++.
  *
@@ -41,12 +41,17 @@ RegisterUnavailableSolver
 
 #else
 
+#include "bout/bout_enum_class.hxx"
 #include "bout/bout_types.hxx"
 #include "bout/sundials_backports.hxx"
 
 #include <nvector/nvector_parallel.h>
 #include <sundials/sundials_config.h>
 
+#if SUNDIALS_CONTROLLER_SUPPORT
+#include <sundials/sundials_adaptcontroller.h>
+#endif
+
 #include <vector>
 
 class ArkodeSolver;
@@ -56,6 +61,14 @@ namespace {
 RegisterSolver<ArkodeSolver> registersolverarkode("arkode");
 }
 
+// enum describing treatment of equations
+// Note: Capitalized because `explicit` is a C++ reserved keyword
+BOUT_ENUM_CLASS(Treatment, ImEx, Implicit, Explicit);
+
+// Adaptivity method
+BOUT_ENUM_CLASS(AdapMethod, PID, PI, I, Explicit_Gustafsson, Implicit_Gustafsson,
+                ImEx_Gustafsson);
+
 class ArkodeSolver : public Solver {
 public:
   explicit ArkodeSolver(Options* opts = nullptr);
@@ -89,12 +102,8 @@ private:
 
   /// Maximum number of steps to take between outputs
   int mxsteps;
-  /// Use ImEx capability
-  bool imex;
-  /// Solve only explicit part
-  bool solve_explicit;
-  /// Solve only implicit part
-  bool solve_implicit;
+  /// Integrator treatment enum: IMEX, Implicit or Explicit
+  Treatment treatment;
   /// Use linear implicit solver (only evaluates jacobian inversion once)
   bool set_linear;
   /// Solve explicit portion in fixed timestep mode. NOTE: This is not recommended except
@@ -102,16 +111,14 @@ private:
   bool fixed_step;
   /// Order of internal step
   int order;
+  /// Name of the implicit Butcher table
+  std::string implicit_table;
+  /// Name of the explicit Butcher table
+  std::string explicit_table;
   /// Fraction of the estimated explicitly stable step to use
   BoutReal cfl_frac;
-  /// Set timestep adaptivity function:
-  /// - 0: PID adaptivity (default)
-  /// - 1: PI
-  /// - 2: I
-  /// - 3: explicit Gustafsson
-  /// - 4: implicit Gustafsson
-  /// - 5: ImEx Gustafsson
-  int adap_method;
+  /// Timestep adaptivity function
+  AdapMethod adap_method;
   /// Absolute tolerance
   BoutReal abstol;
   /// Relative tolerance
@@ -153,8 +160,12 @@ private:
 
   /// SPGMR solver structure
   SUNLinearSolver sun_solver{nullptr};
-  /// Solver for functional iterations for Adams-Moulton
+  /// Solver for implicit stages
   SUNNonlinearSolver nonlinear_solver{nullptr};
+#if SUNDIALS_CONTROLLER_SUPPORT
+  /// Timestep controller
+  SUNAdaptController controller{nullptr};
+#endif
   /// Context for SUNDIALS memory allocations
   sundials::Context suncontext;
 };
diff --git a/src/solver/impls/cvode/cvode.cxx b/src/solver/impls/cvode/cvode.cxx
index c17bed420c..7137ce3304 100644
--- a/src/solver/impls/cvode/cvode.cxx
+++ b/src/solver/impls/cvode/cvode.cxx
@@ -3,9 +3,9 @@
  *
  *
  **************************************************************************
- * Copyright 2010 B.D.Dudson, S.Farley, M.V.Umansky, X.Q.Xu
+ * Copyright 2010-2024 BOUT++ contributors
  *
- * Contact: Ben Dudson, bd512@york.ac.uk
+ * Contact: Ben Dudson, dudson2@llnl.gov
  *
  * This file is part of BOUT++.
  *
@@ -44,16 +44,9 @@
 #include "fmt/core.h"
 
 #include <cvode/cvode.h>
-
-#if SUNDIALS_VERSION_MAJOR >= 3
-#include <cvode/cvode_spils.h>
-#include <sunlinsol/sunlinsol_spgmr.h>
-#else
-#include <cvode/cvode_spgmr.h>
-#endif
-
 #include <cvode/cvode_bbdpre.h>
 #include <sundials/sundials_types.h>
+#include <sunlinsol/sunlinsol_spgmr.h>
 
 #include <algorithm>
 #include <numeric>
@@ -61,68 +54,23 @@
 
 class Field2D;
 
-#define ZERO RCONST(0.)
-#define ONE RCONST(1.0)
-
-#ifndef CVODEINT
-#if SUNDIALS_VERSION_MAJOR < 3
-using CVODEINT = bout::utils::function_traits<CVLocalFn>::arg_t<0>;
-#else
-using CVODEINT = sunindextype;
-#endif
-#endif
-
 BOUT_ENUM_CLASS(positivity_constraint, none, positive, non_negative, negative,
                 non_positive);
 
-static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data);
-static int cvode_bbd_rhs(CVODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du,
-                         void* user_data);
-
-static int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec,
-                     BoutReal gamma, BoutReal delta, int lr, void* user_data);
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int cvode_linear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data);
+int cvode_nonlinear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data);
+int cvode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du,
+                  void* user_data);
 
-#if SUNDIALS_VERSION_MAJOR < 3
-// Shim for earlier versions
-inline static int cvode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec,
-                                 N_Vector zvec, BoutReal gamma, BoutReal delta, int lr,
-                                 void* user_data, N_Vector UNUSED(tmp)) {
-  return cvode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data);
-}
-#else
-// Alias for newer versions
-constexpr auto& cvode_pre_shim = cvode_pre;
-#endif
-
-static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy,
-                     void* user_data, N_Vector tmp);
-
-#if SUNDIALS_VERSION_MAJOR < 3
-// Shim for earlier versions
-inline int CVSpilsSetJacTimes(void* arkode_mem, std::nullptr_t,
-                              CVSpilsJacTimesVecFn jtimes) {
-  return CVSpilsSetJacTimesVecFn(arkode_mem, jtimes);
-}
-#endif
+int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec,
+              BoutReal gamma, BoutReal delta, int lr, void* user_data);
 
-#if SUNDIALS_VERSION_MAJOR >= 4
-// Shim for newer versions
-constexpr auto CV_FUNCTIONAL = 0;
-constexpr auto CV_NEWTON = 0;
-#endif
-
-#if SUNDIALS_VERSION_MAJOR >= 3
-void* CVodeCreate(int lmm, [[maybe_unused]] int iter,
-                  [[maybe_unused]] SUNContext context) {
-#if SUNDIALS_VERSION_MAJOR == 3
-  return CVodeCreate(lmm, iter);
-#elif SUNDIALS_VERSION_MAJOR == 4 || SUNDIALS_VERSION_MAJOR == 5
-  return CVodeCreate(lmm);
-#else
-  return CVodeCreate(lmm, context);
-#endif
-}
-#endif
+int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy,
+              void* user_data, N_Vector tmp);
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 CvodeSolver::CvodeSolver(Options* opts)
     : Solver(opts), diagnose((*options)["diagnose"]
@@ -136,7 +84,7 @@ CvodeSolver::CvodeSolver(Options* opts)
                     .doc("Use functional iteration instead of Newton")
                     .withDefault(adams_moulton)),
       max_order((*options)["cvode_max_order"]
-                    .doc("Maximum order of method to use. < 0 means no limit.")
+                    .doc("Maximum order of method to use. <= 0 means default limit.")
                     .withDefault(-1)),
       stablimdet((*options)["cvode_stability_limit_detection"].withDefault(false)),
       abstol((*options)["atol"].doc("Absolute tolerance").withDefault(1.0e-12)),
@@ -148,19 +96,18 @@ CvodeSolver::CvodeSolver(Options* opts)
                   .doc("Maximum number of internal steps between outputs.")
                   .withDefault(500)),
       max_timestep(
-          (*options)["max_timestep"].doc("Maximum time step size").withDefault(-1.0)),
+          (*options)["max_timestep"].doc("Maximum time step size").withDefault(0.0)),
       min_timestep(
-          (*options)["min_timestep"].doc("Minimum time step size").withDefault(-1.0)),
+          (*options)["min_timestep"].doc("Minimum time step size").withDefault(0.0)),
       start_timestep((*options)["start_timestep"]
-                         .doc("Starting time step. < 0 then chosen by CVODE.")
-                         .withDefault(-1.0)),
+                         .doc("Starting time step. = 0 then chosen by CVODE.")
+                         .withDefault(0.0)),
       mxorder((*options)["mxorder"].doc("Maximum order").withDefault(-1)),
       max_nonlinear_iterations(
           (*options)["max_nonlinear_iterations"]
               .doc("Maximum number of nonlinear iterations allowed by CVODE before "
-                   "reducing "
-                   "timestep. CVODE default (used if this option is negative) is 3.")
-              .withDefault(-1)),
+                   "reducing timestep.")
+              .withDefault(3)),
       apply_positivity_constraints(
           (*options)["apply_positivity_constraints"]
               .doc("Use CVODE function CVodeSetConstraints to constrain variables - the "
@@ -184,7 +131,7 @@ CvodeSolver::CvodeSolver(Options* opts)
               .doc("Factor by which the Krylov linear solver’s convergence test constant "
                    "is reduced from the nonlinear solver test constant.")
               .withDefault(0.05)),
-      suncontext(static_cast<void*>(&BoutComm::get())) {
+      suncontext(createSUNContext(BoutComm::get())) {
   has_constraints = false; // This solver doesn't have constraints
   canReset = true;
 
@@ -210,7 +157,7 @@ CvodeSolver::CvodeSolver(Options* opts)
 
 CvodeSolver::~CvodeSolver() {
   if (cvode_initialised) {
-    N_VDestroy_Parallel(uvec);
+    N_VDestroy(uvec);
     CVodeFree(&cvode_mem);
     SUNLinSolFree(sun_solver);
     SUNNonlinSolFree(nonlinear_solver);
@@ -242,12 +189,13 @@ int CvodeSolver::init() {
                     n3Dvars(), n2Dvars(), neq, local_N);
 
   // Allocate memory
-  if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) {
+  uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq);
+  if (uvec == nullptr) {
     throw BoutException("SUNDIALS memory allocation failed\n");
   }
 
   // Put the variables into uvec
-  save_vars(NV_DATA_P(uvec));
+  save_vars(N_VGetArrayPointer(uvec));
 
   if (adams_moulton) {
     // By default use functional iteration for Adams-Moulton
@@ -258,31 +206,43 @@ int CvodeSolver::init() {
   }
 
   const auto lmm = adams_moulton ? CV_ADAMS : CV_BDF;
-  const auto iter = func_iter ? CV_FUNCTIONAL : CV_NEWTON;
 
-  if ((cvode_mem = CVodeCreate(lmm, iter, suncontext)) == nullptr) {
+  cvode_mem = callWithSUNContext(CVodeCreate, suncontext, lmm);
+  if (cvode_mem == nullptr) {
     throw BoutException("CVodeCreate failed\n");
   }
 
   // For callbacks, need pointer to solver object
-  if (CVodeSetUserData(cvode_mem, this) < 0) {
+  if (CVodeSetUserData(cvode_mem, this) != CV_SUCCESS) {
     throw BoutException("CVodeSetUserData failed\n");
   }
 
-  if (CVodeInit(cvode_mem, cvode_rhs, simtime, uvec) < 0) {
+#if SUNDIALS_VERSION_MAJOR >= 6
+  // Set the default RHS to linear, then pass nonlinear rhs to NL solver
+  if (CVodeInit(cvode_mem, cvode_linear_rhs, simtime, uvec) != CV_SUCCESS) {
+    throw BoutException("CVodeInit failed\n");
+  }
+#else
+  if (CVodeInit(cvode_mem, cvode_nonlinear_rhs, simtime, uvec) != CV_SUCCESS) {
     throw BoutException("CVodeInit failed\n");
   }
+#endif
 
+  if (mxorder > 0) {
+    output_warn << "WARNING: Option 'mxorder' is deprecated. Please use "
+                   "'cvode_max_order' instead\n";
+    if (CVodeSetMaxOrd(cvode_mem, mxorder) != CV_SUCCESS) {
+      throw BoutException("CVodeSetMaxOrder failed\n");
+    }
+  }
   if (max_order > 0) {
-    if (CVodeSetMaxOrd(cvode_mem, max_order) < 0) {
+    if (CVodeSetMaxOrd(cvode_mem, max_order) != CV_SUCCESS) {
       throw BoutException("CVodeSetMaxOrder failed\n");
     }
   }
 
-  if (stablimdet) {
-    if (CVodeSetStabLimDet(cvode_mem, stablimdet) < 0) {
-      throw BoutException("CVodeSetStabLimDet failed\n");
-    }
+  if (CVodeSetStabLimDet(cvode_mem, static_cast<int>(stablimdet)) != CV_SUCCESS) {
+    throw BoutException("CVodeSetStabLimDet failed\n");
   }
 
   if (use_vector_abstol) {
@@ -307,94 +267,97 @@ int CvodeSolver::init() {
                      return Options::root()[f3.name]["atol"].withDefault(abstol);
                    });
 
-    N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext);
+    N_Vector abstolvec = N_VClone(uvec);
     if (abstolvec == nullptr) {
       throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n");
     }
 
-    set_vector_option_values(NV_DATA_P(abstolvec), f2dtols, f3dtols);
+    set_vector_option_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols);
 
-    if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) < 0) {
+    if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) != CV_SUCCESS) {
       throw BoutException("CVodeSVtolerances failed\n");
     }
 
-    N_VDestroy_Parallel(abstolvec);
+    N_VDestroy(abstolvec);
   } else {
-    if (CVodeSStolerances(cvode_mem, reltol, abstol) < 0) {
+    if (CVodeSStolerances(cvode_mem, reltol, abstol) != CV_SUCCESS) {
       throw BoutException("CVodeSStolerances failed\n");
     }
   }
 
-  CVodeSetMaxNumSteps(cvode_mem, mxsteps);
-
-  if (max_timestep > 0.0) {
-    CVodeSetMaxStep(cvode_mem, max_timestep);
+  if (CVodeSetMaxNumSteps(cvode_mem, mxsteps) != CV_SUCCESS) {
+    throw BoutException("CVodeSetMaxNumSteps failed\n");
   }
 
-  if (min_timestep > 0.0) {
-    CVodeSetMinStep(cvode_mem, min_timestep);
+  if (CVodeSetMaxStep(cvode_mem, max_timestep) != CV_SUCCESS) {
+    throw BoutException("CVodeSetMaxStep failed\n");
   }
 
-  if (start_timestep > 0.0) {
-    CVodeSetInitStep(cvode_mem, start_timestep);
+  if (CVodeSetMinStep(cvode_mem, min_timestep) != CV_SUCCESS) {
+    throw BoutException("CVodeSetMinStep failed\n");
   }
 
-  if (mxorder > 0) {
-    CVodeSetMaxOrd(cvode_mem, mxorder);
+  if (CVodeSetInitStep(cvode_mem, start_timestep) != CV_SUCCESS) {
+    throw BoutException("CVodeSetInitStep failed\n");
   }
 
-  if (max_nonlinear_iterations > 0) {
-    CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations);
+  if (CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations) != CV_SUCCESS) {
+    throw BoutException("CVodeSetMaxNonlinIters failed\n");
   }
 
-#if not(SUNDIALS_VERSION_MAJOR >= 3 and SUNDIALS_VERSION_MINOR >= 2)
-  if (apply_positivity_constraints) {
-    throw BoutException("The apply_positivity_constraints option is only available with "
-                        "SUNDIALS>=3.2.0");
-  }
-#else
   if (apply_positivity_constraints) {
     auto f2d_constraints = create_constraints(f2d);
     auto f3d_constraints = create_constraints(f3d);
 
-    N_Vector constraints_vec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext);
+    N_Vector constraints_vec = N_VClone(uvec);
     if (constraints_vec == nullptr) {
       throw BoutException("SUNDIALS memory allocation (positivity constraints vector) "
                           "failed\n");
     }
 
-    set_vector_option_values(NV_DATA_P(constraints_vec), f2d_constraints,
+    set_vector_option_values(N_VGetArrayPointer(constraints_vec), f2d_constraints,
                              f3d_constraints);
 
-    if (CVodeSetConstraints(cvode_mem, constraints_vec) < 0) {
+    if (CVodeSetConstraints(cvode_mem, constraints_vec) != CV_SUCCESS) {
       throw BoutException("CVodeSetConstraints failed\n");
     }
 
-    N_VDestroy_Parallel(constraints_vec);
+    N_VDestroy(constraints_vec);
   }
-#endif
 
   /// Newton method can include Preconditioners and Jacobian function
-  if (!func_iter) {
+  if (func_iter) {
+    output_info.write("\tUsing Functional iteration\n");
+    nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 0);
+    if (nonlinear_solver == nullptr) {
+      throw BoutException("SUNNonlinSol_FixedPoint failed\n");
+    }
+
+    if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver) != 0) {
+      throw BoutException("CVodeSetNonlinearSolver failed\n");
+    }
+  } else {
     output_info.write("\tUsing Newton iteration\n");
     TRACE("Setting preconditioner");
-    if (use_precon) {
-      const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT;
 
-#if SUNDIALS_VERSION_MAJOR >= 3
-      if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) {
-        throw BoutException("Creating SUNDIALS linear solver failed\n");
-      }
-      if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) {
-        throw BoutException("CVSpilsSetLinearSolver failed\n");
-      }
-#else
-      if (CVSpgmr(cvode_mem, prectype, maxl) != CVSPILS_SUCCESS) {
-        throw BoutException("CVSpgmr failed\n");
-      }
-#endif
+    const auto prectype =
+        use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE;
+    sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl);
+    if (sun_solver == nullptr) {
+      throw BoutException("Creating SUNDIALS linear solver failed\n");
+    }
+    if (CVodeSetLinearSolver(cvode_mem, sun_solver, nullptr) != CVLS_SUCCESS) {
+      throw BoutException("CVodeSetLinearSolver failed\n");
+    }
+
+    if (use_precon) {
+      if (hasPreconditioner()) {
+        output_info.write("\tUsing user-supplied preconditioner\n");
 
-      if (!hasPreconditioner()) {
+        if (CVodeSetPreconditioner(cvode_mem, nullptr, cvode_pre) != CVLS_SUCCESS) {
+          throw BoutException("CVodeSetPreconditioner failed\n");
+        }
+      } else {
         output_info.write("\tUsing BBD preconditioner\n");
 
         /// Get options
@@ -415,62 +378,41 @@ int CvodeSolver::init() {
         const auto mukeep = (*options)["mukeep"].withDefault(n3Dvars() + n2Dvars());
         const auto mlkeep = (*options)["mlkeep"].withDefault(n3Dvars() + n2Dvars());
 
-        if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO,
-                          cvode_bbd_rhs, nullptr)) {
+        if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0.0,
+                          cvode_bbd_rhs, nullptr)
+            != CVLS_SUCCESS) {
           throw BoutException("CVBBDPrecInit failed\n");
         }
-
-      } else {
-        output_info.write("\tUsing user-supplied preconditioner\n");
-
-        if (CVSpilsSetPreconditioner(cvode_mem, nullptr, cvode_pre_shim)) {
-          throw BoutException("CVSpilsSetPreconditioner failed\n");
-        }
       }
     } else {
       output_info.write("\tNo preconditioning\n");
-
-#if SUNDIALS_VERSION_MAJOR >= 3
-      if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext))
-          == nullptr) {
-        throw BoutException("Creating SUNDIALS linear solver failed\n");
-      }
-      if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) {
-        throw BoutException("CVSpilsSetLinearSolver failed\n");
-      }
-#else
-      if (CVSpgmr(cvode_mem, SUN_PREC_NONE, maxl) != CVSPILS_SUCCESS) {
-        throw BoutException("CVSpgmr failed\n");
-      }
-#endif
     }
 
     /// Set Jacobian-vector multiplication function
     if (use_jacobian and hasJacobian()) {
       output_info.write("\tUsing user-supplied Jacobian function\n");
 
-      if (CVSpilsSetJacTimes(cvode_mem, nullptr, cvode_jac) != CV_SUCCESS) {
-        throw BoutException("CVSpilsSetJacTimesVecFn failed\n");
+      if (CVodeSetJacTimes(cvode_mem, nullptr, cvode_jac) != CVLS_SUCCESS) {
+        throw BoutException("CVodeSetJacTimes failed\n");
       }
     } else {
       output_info.write("\tUsing difference quotient approximation for Jacobian\n");
     }
-  } else {
-    output_info.write("\tUsing Functional iteration\n");
-#if SUNDIALS_VERSION_MAJOR >= 4
-    if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 0, suncontext)) == nullptr) {
-      throw BoutException("SUNNonlinSol_FixedPoint failed\n");
-    }
+  }
 
-    if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver)) {
-      throw BoutException("CVodeSetNonlinearSolver failed\n");
-    }
+#if SUNDIALS_VERSION_MAJOR >= 6
+  // Set the RHS function to be used in the nonlinear solver
+  CVodeSetNlsRhsFn(cvode_mem, cvode_nonlinear_rhs);
 #endif
-  }
 
   // Set internal tolerance factors
-  CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef);
-  CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef);
+  if (CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef) != CV_SUCCESS) {
+    throw BoutException("CVodeSetNonlinConvCoef failed\n");
+  }
+
+  if (CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef) != CV_SUCCESS) {
+    throw BoutException("CVodeSetEpsLin failed\n");
+  }
 
   cvode_initialised = true;
 
@@ -544,9 +486,9 @@ int CvodeSolver::run() {
     nfevals = int(temp_long_int);
     CVodeGetNumNonlinSolvIters(cvode_mem, &temp_long_int);
     nniters = int(temp_long_int);
-    CVSpilsGetNumPrecSolves(cvode_mem, &temp_long_int);
+    CVodeGetNumPrecSolves(cvode_mem, &temp_long_int);
     npevals = int(temp_long_int);
-    CVSpilsGetNumLinIters(cvode_mem, &temp_long_int);
+    CVodeGetNumLinIters(cvode_mem, &temp_long_int);
     nliters = int(temp_long_int);
 
     // Last step size
@@ -634,7 +576,7 @@ BoutReal CvodeSolver::run(BoutReal tout) {
   }
 
   // Copy variables
-  load_vars(NV_DATA_P(uvec));
+  load_vars(N_VGetArrayPointer(uvec));
 
   // Call rhs function to get extra variables at this time
   run_rhs(simtime);
@@ -651,7 +593,7 @@ BoutReal CvodeSolver::run(BoutReal tout) {
  * RHS function du = F(t, u)
  **************************************************************************/
 
-void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata) {
+void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata, bool linear) {
   TRACE("Running RHS: CvodeSolver::res({})", t);
 
   // Load state from udata
@@ -662,7 +604,7 @@ void CvodeSolver::rhs(BoutReal t, BoutReal* udata, BoutReal* dudata) {
   CVodeGetLastStep(cvode_mem, &hcur);
 
   // Call RHS function
-  run_rhs(t);
+  run_rhs(t, linear);
 
   // Save derivatives to dudata
   save_derivs(dudata);
@@ -678,11 +620,11 @@ void CvodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udat
 
   BoutReal tstart = bout::globals::mpi->MPI_Wtime();
 
-  int N = NV_LOCLENGTH_P(uvec);
+  const auto length = N_VGetLocalLength_Parallel(uvec);
 
   if (!hasPreconditioner()) {
     // Identity (but should never happen)
-    for (int i = 0; i < N; i++) {
+    for (int i = 0; i < length; i++) {
       zvec[i] = rvec[i];
     }
     return;
@@ -731,16 +673,34 @@ void CvodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jv
  * CVODE RHS functions
  **************************************************************************/
 
-static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int cvode_linear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
+
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
+
+  auto* s = static_cast<CvodeSolver*>(user_data);
+
+  // Calculate RHS function
+  try {
+    s->rhs(t, udata, dudata, true);
+  } catch (BoutRhsFail& error) {
+    return 1;
+  }
+  return 0;
+}
+
+int cvode_nonlinear_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 
-  BoutReal* udata = NV_DATA_P(u);
-  BoutReal* dudata = NV_DATA_P(du);
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
 
   auto* s = static_cast<CvodeSolver*>(user_data);
 
   // Calculate RHS function
   try {
-    s->rhs(t, udata, dudata);
+    s->rhs(t, udata, dudata, false);
   } catch (BoutRhsFail& error) {
     return 1;
   }
@@ -748,18 +708,17 @@ static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) {
 }
 
 /// RHS function for BBD preconditioner
-static int cvode_bbd_rhs(CVODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
-                         void* user_data) {
-  return cvode_rhs(t, u, du, user_data);
+int cvode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
+                  void* user_data) {
+  return cvode_linear_rhs(t, u, du, user_data);
 }
 
 /// Preconditioner function
-static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec,
-                     N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr),
-                     void* user_data) {
-  BoutReal* udata = NV_DATA_P(yy);
-  BoutReal* rdata = NV_DATA_P(rvec);
-  BoutReal* zdata = NV_DATA_P(zvec);
+int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec,
+              BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) {
+  BoutReal* udata = N_VGetArrayPointer(yy);
+  BoutReal* rdata = N_VGetArrayPointer(rvec);
+  BoutReal* zdata = N_VGetArrayPointer(zvec);
 
   auto* s = static_cast<CvodeSolver*>(user_data);
 
@@ -770,11 +729,11 @@ static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec
 }
 
 /// Jacobian-vector multiplication function
-static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector UNUSED(fy),
-                     void* user_data, N_Vector UNUSED(tmp)) {
-  BoutReal* ydata = NV_DATA_P(y);   ///< System state
-  BoutReal* vdata = NV_DATA_P(v);   ///< Input vector
-  BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output
+int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy),
+              void* user_data, N_Vector UNUSED(tmp)) {
+  BoutReal* ydata = N_VGetArrayPointer(y);   ///< System state
+  BoutReal* vdata = N_VGetArrayPointer(v);   ///< Input vector
+  BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output
 
   auto* s = static_cast<CvodeSolver*>(user_data);
 
@@ -782,6 +741,8 @@ static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector U
 
   return 0;
 }
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 /**************************************************************************
  * CVODE vector option functions
@@ -829,9 +790,9 @@ void CvodeSolver::loop_vector_option_values_op(Ind2D UNUSED(i2d), BoutReal* opti
 
 void CvodeSolver::resetInternalFields() {
   TRACE("CvodeSolver::resetInternalFields");
-  save_vars(NV_DATA_P(uvec));
+  save_vars(N_VGetArrayPointer(uvec));
 
-  if (CVodeReInit(cvode_mem, simtime, uvec) < 0) {
+  if (CVodeReInit(cvode_mem, simtime, uvec) != CV_SUCCESS) {
     throw BoutException("CVodeReInit failed\n");
   }
 }
diff --git a/src/solver/impls/cvode/cvode.hxx b/src/solver/impls/cvode/cvode.hxx
index 89c3a613a8..d44fcf2335 100644
--- a/src/solver/impls/cvode/cvode.hxx
+++ b/src/solver/impls/cvode/cvode.hxx
@@ -68,8 +68,8 @@ public:
 
   void resetInternalFields() override;
 
-  // These functions used internally (but need to be public)
-  void rhs(BoutReal t, BoutReal* udata, BoutReal* dudata);
+  // These functions are used internally (but need to be public)
+  void rhs(BoutReal t, BoutReal* udata, BoutReal* dudata, bool linear);
   void pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udata, BoutReal* rvec,
            BoutReal* zvec);
   void jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jvdata);
@@ -138,7 +138,7 @@ private:
   int nonlin_fails{0};
   int stab_lims{0};
 
-  bool cvode_initialised = false;
+  bool cvode_initialised{false};
 
   void set_vector_option_values(BoutReal* option_data, std::vector<BoutReal>& f2dtols,
                                 std::vector<BoutReal>& f3dtols);
diff --git a/src/solver/impls/ida/ida.cxx b/src/solver/impls/ida/ida.cxx
index 189a103bbe..cfc978f755 100644
--- a/src/solver/impls/ida/ida.cxx
+++ b/src/solver/impls/ida/ida.cxx
@@ -40,53 +40,23 @@
 #include "bout/unused.hxx"
 
 #include <ida/ida.h>
-
-#if SUNDIALS_VERSION_MAJOR >= 3
-#include <ida/ida_spils.h>
-#include <sunlinsol/sunlinsol_spgmr.h>
-#else
-#include <ida/ida_spgmr.h>
-#endif
-
 #include <ida/ida_bbdpre.h>
 #include <nvector/nvector_parallel.h>
 #include <sundials/sundials_types.h>
+#include <sunlinsol/sunlinsol_spgmr.h>
 
 #include <numeric>
 
-#define ZERO RCONST(0.)
-#define ONE RCONST(1.0)
-
-#ifndef IDAINT
-#if SUNDIALS_VERSION_MAJOR < 3
-using IDAINT = bout::utils::function_traits<IDABBDLocalFn>::arg_t<0>;
-#else
-using IDAINT = sunindextype;
-#endif
-#endif
-
-static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data);
-static int ida_bbd_res(IDAINT Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr,
-                       void* user_data);
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data);
+int ida_bbd_res(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr,
+                void* user_data);
 
-static int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec,
-                   N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data);
-
-#if SUNDIALS_VERSION_MAJOR < 3
-// Shim for earlier versions
-inline static int ida_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr,
-                               N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta,
-                               void* user_data, N_Vector UNUSED(tmp)) {
-  return ida_pre(t, yy, yp, rr, rvec, zvec, cj, delta, user_data);
-}
-#else
-// Alias for newer versions
-constexpr auto& ida_pre_shim = ida_pre;
-#endif
-
-#if SUNDIALS_VERSION_MAJOR < 6
-void* IDACreate([[maybe_unused]] SUNContext) { return IDACreate(); }
-#endif
+int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec,
+            N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data);
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 IdaSolver::IdaSolver(Options* opts)
     : Solver(opts),
@@ -101,15 +71,15 @@ IdaSolver::IdaSolver(Options* opts)
       correct_start((*options)["correct_start"]
                         .doc("Correct the initial values")
                         .withDefault(true)),
-      suncontext(static_cast<void*>(&BoutComm::get())) {
+      suncontext(createSUNContext(BoutComm::get())) {
   has_constraints = true; // This solver has constraints
 }
 
 IdaSolver::~IdaSolver() {
   if (initialised) {
-    N_VDestroy_Parallel(uvec);
-    N_VDestroy_Parallel(duvec);
-    N_VDestroy_Parallel(id);
+    N_VDestroy(uvec);
+    N_VDestroy(duvec);
+    N_VDestroy(id);
     IDAFree(&idamem);
     SUNLinSolFree(sun_solver);
   }
@@ -144,69 +114,75 @@ int IdaSolver::init() {
                neq, local_N);
 
   // Allocate memory
-  if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) {
+  uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq);
+  if (uvec == nullptr) {
     throw BoutException("SUNDIALS memory allocation failed\n");
   }
-  if ((duvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) {
+  duvec = N_VClone(uvec);
+  if (duvec == nullptr) {
     throw BoutException("SUNDIALS memory allocation failed\n");
   }
-  if ((id = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) {
+  id = N_VClone(uvec);
+  if (id == nullptr) {
     throw BoutException("SUNDIALS memory allocation failed\n");
   }
 
   // Put the variables into uvec
-  save_vars(NV_DATA_P(uvec));
+  save_vars(N_VGetArrayPointer(uvec));
 
   // Get the starting time derivative
   run_rhs(simtime);
 
   // Put the time-derivatives into duvec
-  save_derivs(NV_DATA_P(duvec));
+  save_derivs(N_VGetArrayPointer(duvec));
 
   // Set the equation type in id(Differential or Algebraic. This is optional)
-  set_id(NV_DATA_P(id));
+  set_id(N_VGetArrayPointer(id));
 
   // Call IDACreate to initialise
-  if ((idamem = IDACreate(suncontext)) == nullptr) {
+  idamem = callWithSUNContext(IDACreate, suncontext);
+  if (idamem == nullptr) {
     throw BoutException("IDACreate failed\n");
   }
 
   // For callbacks, need pointer to solver object
-  if (IDASetUserData(idamem, this) < 0) {
+  if (IDASetUserData(idamem, this) != IDA_SUCCESS) {
     throw BoutException("IDASetUserData failed\n");
   }
 
-  if (IDASetId(idamem, id) < 0) {
+  if (IDASetId(idamem, id) != IDA_SUCCESS) {
     throw BoutException("IDASetID failed\n");
   }
 
-  if (IDAInit(idamem, idares, simtime, uvec, duvec) < 0) {
+  if (IDAInit(idamem, idares, simtime, uvec, duvec) != IDA_SUCCESS) {
     throw BoutException("IDAInit failed\n");
   }
 
-  if (IDASStolerances(idamem, reltol, abstol) < 0) {
+  if (IDASStolerances(idamem, reltol, abstol) != IDA_SUCCESS) {
     throw BoutException("IDASStolerances failed\n");
   }
 
-  IDASetMaxNumSteps(idamem, mxsteps);
+  if (IDASetMaxNumSteps(idamem, mxsteps) != IDA_SUCCESS) {
+    throw BoutException("IDASetMaxNumSteps failed\n");
+  }
 
   // Call IDASpgmr to specify the IDA linear solver IDASPGMR
   const auto maxl = (*options)["maxl"].withDefault(6 * n3d);
-#if SUNDIALS_VERSION_MAJOR >= 3
-  if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) == nullptr) {
+  sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, SUN_PREC_NONE, maxl);
+  if (sun_solver == nullptr) {
     throw BoutException("Creating SUNDIALS linear solver failed\n");
   }
-  if (IDASpilsSetLinearSolver(idamem, sun_solver) != IDA_SUCCESS) {
-    throw BoutException("IDASpilsSetLinearSolver failed\n");
+  if (IDASetLinearSolver(idamem, sun_solver, nullptr) != IDALS_SUCCESS) {
+    throw BoutException("IDASetLinearSolver failed\n");
   }
-#else
-  if (IDASpgmr(idamem, maxl)) {
-    throw BoutException("IDASpgmr failed\n");
-  }
-#endif
 
   if (use_precon) {
-    if (!hasPreconditioner()) {
+    if (hasPreconditioner()) {
+      output.write("\tUsing user-supplied preconditioner\n");
+      if (IDASetPreconditioner(idamem, nullptr, ida_pre) != IDALS_SUCCESS) {
+        throw BoutException("IDASetPreconditioner failed\n");
+      }
+    } else {
       output.write("\tUsing BBD preconditioner\n");
       /// Get options
       // Compute band_width_default from actually added fields, to allow for multiple Mesh
@@ -225,21 +201,17 @@ int IdaSolver::init() {
       const auto mldq = (*options)["mldq"].withDefault(band_width_default);
       const auto mukeep = (*options)["mukeep"].withDefault(n3d);
       const auto mlkeep = (*options)["mlkeep"].withDefault(n3d);
-      if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, ida_bbd_res,
-                         nullptr)) {
+      if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, ida_bbd_res,
+                         nullptr)
+          != IDALS_SUCCESS) {
         throw BoutException("IDABBDPrecInit failed\n");
       }
-    } else {
-      output.write("\tUsing user-supplied preconditioner\n");
-      if (IDASpilsSetPreconditioner(idamem, nullptr, ida_pre_shim)) {
-        throw BoutException("IDASpilsSetPreconditioner failed\n");
-      }
     }
   }
 
   // Call IDACalcIC (with default options) to correct the initial values
   if (correct_start) {
-    if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6)) {
+    if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6) != IDA_SUCCESS) {
       throw BoutException("IDACalcIC failed\n");
     }
   }
@@ -291,7 +263,7 @@ BoutReal IdaSolver::run(BoutReal tout) {
   const int flag = IDASolve(idamem, tout, &simtime, uvec, duvec, IDA_NORMAL);
 
   // Copy variables
-  load_vars(NV_DATA_P(uvec));
+  load_vars(N_VGetArrayPointer(uvec));
 
   // Call rhs function to get extra variables at this time
   run_rhs(simtime);
@@ -322,9 +294,9 @@ void IdaSolver::res(BoutReal t, BoutReal* udata, BoutReal* dudata, BoutReal* rda
   save_derivs(rdata);
 
   // If a differential equation, subtract dudata
-  const int N = NV_LOCLENGTH_P(id);
-  const BoutReal* idd = NV_DATA_P(id);
-  for (int i = 0; i < N; i++) {
+  const auto length = N_VGetLocalLength_Parallel(id);
+  const BoutReal* idd = N_VGetArrayPointer(id);
+  for (int i = 0; i < length; i++) {
     if (idd[i] > 0.5) { // 1 -> differential, 0 -> algebraic
       rdata[i] -= dudata[i];
     }
@@ -343,8 +315,8 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata,
 
   if (!hasPreconditioner()) {
     // Identity (but should never happen)
-    const int N = NV_LOCLENGTH_P(id);
-    std::copy(rvec, rvec + N, zvec);
+    const auto length = N_VGetLocalLength_Parallel(id);
+    std::copy(rvec, rvec + length, zvec);
     return;
   }
 
@@ -367,10 +339,12 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata,
  * IDA res function
  **************************************************************************/
 
-static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) {
-  BoutReal* udata = NV_DATA_P(u);
-  BoutReal* dudata = NV_DATA_P(du);
-  BoutReal* rdata = NV_DATA_P(rr);
+// NOLINTBEGIN(readability-identifier-length)
+namespace {
+int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) {
+  BoutReal* udata = N_VGetArrayPointer(u);
+  BoutReal* dudata = N_VGetArrayPointer(du);
+  BoutReal* rdata = N_VGetArrayPointer(rr);
 
   auto* s = static_cast<IdaSolver*>(user_data);
 
@@ -381,18 +355,17 @@ static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_d
 }
 
 /// Residual function for BBD preconditioner
-static int ida_bbd_res(IDAINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
-                       N_Vector rr, void* user_data) {
+int ida_bbd_res(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du,
+                N_Vector rr, void* user_data) {
   return idares(t, u, du, rr, user_data);
 }
 
 // Preconditioner function
-static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr),
-                   N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta,
-                   void* user_data) {
-  BoutReal* udata = NV_DATA_P(yy);
-  BoutReal* rdata = NV_DATA_P(rvec);
-  BoutReal* zdata = NV_DATA_P(zvec);
+int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr),
+            N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data) {
+  BoutReal* udata = N_VGetArrayPointer(yy);
+  BoutReal* rdata = N_VGetArrayPointer(rvec);
+  BoutReal* zdata = N_VGetArrayPointer(zvec);
 
   auto* s = static_cast<IdaSolver*>(user_data);
 
@@ -401,5 +374,7 @@ static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED
 
   return 0;
 }
+} // namespace
+// NOLINTEND(readability-identifier-length)
 
 #endif
diff --git a/src/solver/solver.cxx b/src/solver/solver.cxx
index 1b7ec1fd74..8a75ff43a4 100644
--- a/src/solver/solver.cxx
+++ b/src/solver/solver.cxx
@@ -1364,6 +1364,12 @@ int Solver::run_rhs(BoutReal t, bool linear) {
 
   Timer timer("rhs");
 
+  if (first_rhs_call) {
+    // Ensure that nonlinear terms are calculated on first call
+    linear = false;
+    first_rhs_call = false;
+  }
+
   if (model->splitOperator()) {
     // Run both parts
 
diff --git a/src/sys/adios_object.cxx b/src/sys/adios_object.cxx
index c7d6dab9aa..477dae14ef 100644
--- a/src/sys/adios_object.cxx
+++ b/src/sys/adios_object.cxx
@@ -1,6 +1,6 @@
 #include "bout/build_config.hxx"
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
 
 #include "bout/adios_object.hxx"
 #include "bout/boutexception.hxx"
@@ -95,4 +95,4 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue,
 }
 
 } // namespace bout
-#endif //BOUT_HAS_ADIOS
+#endif //BOUT_HAS_ADIOS2
diff --git a/src/sys/options.cxx b/src/sys/options.cxx
index 49a81cfa88..e2f39542fd 100644
--- a/src/sys/options.cxx
+++ b/src/sys/options.cxx
@@ -18,6 +18,7 @@
 
 #include <fmt/core.h>
 #include <fmt/format.h>
+#include <fmt/ranges.h>
 
 #include <algorithm>
 #include <cmath>
@@ -221,6 +222,36 @@ Options::fuzzyFind(const std::string& name, std::string::size_type distance) con
   return matches;
 }
 
+Options::Options(const Options& other) { (*this) = other.copy(); }
+
+Options& Options::operator=(const Options& other) {
+  if (this == &other) {
+    return *this;
+  }
+
+  // Note: Here can't do copy-and-swap because pointers to parents are stored
+
+  value = other.value;
+
+  // Assigning the attributes.
+  // The simple assignment operator fails to compile with Apple Clang 12
+  //   attributes = other.attributes;
+  attributes.clear();
+  attributes.insert(other.attributes.begin(), other.attributes.end());
+
+  full_name = other.full_name;
+  is_section = other.is_section;
+  children = other.children;
+  value_used = other.value_used;
+
+  // Ensure that this is the parent of all children,
+  // otherwise will point to the original Options instance
+  for (auto& child : children) {
+    child.second.parent_instance = this;
+  }
+  return *this;
+}
+
 Options& Options::operator=(Options&& other) noexcept {
   if (this == &other) {
     return *this;
@@ -938,7 +969,7 @@ bout::details::OptionsFormatterBase::parse(fmt::format_parse_context& ctx) {
 
 fmt::format_context::iterator
 bout::details::OptionsFormatterBase::format(const Options& options,
-                                            fmt::format_context& ctx) {
+                                            fmt::format_context& ctx) const {
 
   const auto conditionally_used = [](const Options& option) -> bool {
     if (not option.hasAttribute(conditionally_used_attribute)) {
diff --git a/src/sys/options/options_adios.cxx b/src/sys/options/options_adios.cxx
index b313d7bc79..88df92df04 100644
--- a/src/sys/options/options_adios.cxx
+++ b/src/sys/options/options_adios.cxx
@@ -1,6 +1,6 @@
 #include "bout/build_config.hxx"
 
-#if BOUT_HAS_ADIOS
+#if BOUT_HAS_ADIOS2
 
 #include "options_adios.hxx"
 #include "bout/adios_object.hxx"
@@ -628,4 +628,4 @@ void OptionsADIOS::write(const Options& options, const std::string& time_dim) {
 
 } // namespace bout
 
-#endif // BOUT_HAS_ADIOS
+#endif // BOUT_HAS_ADIOS2
diff --git a/src/sys/options/options_adios.hxx b/src/sys/options/options_adios.hxx
index eddb3976ff..a942e6fed9 100644
--- a/src/sys/options/options_adios.hxx
+++ b/src/sys/options/options_adios.hxx
@@ -8,7 +8,7 @@
 #include "bout/options.hxx"
 #include "bout/options_io.hxx"
 
-#if !BOUT_HAS_ADIOS
+#if !BOUT_HAS_ADIOS2
 
 namespace {
 bout::RegisterUnavailableOptionsIO
@@ -79,5 +79,5 @@ RegisterOptionsIO<OptionsADIOS> registeroptionsadios("adios");
 
 } // namespace bout
 
-#endif // BOUT_HAS_ADIOS
+#endif // BOUT_HAS_ADIOS2
 #endif // OPTIONS_ADIOS_H
diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest
index 712442a795..204a9cc271 100755
--- a/tests/MMS/spatial/fci/runtest
+++ b/tests/MMS/spatial/fci/runtest
@@ -27,7 +27,7 @@ nx = 3  # Not changed for these tests
 nlist = [8, 16, 32, 64, 128]
 
 # Number of parallel slices (in each direction)
-nslices = [1, 2]
+nslices = [1]
 
 directory = "data"
 
diff --git a/tests/gitlab/ci-tests.sh b/tests/gitlab/ci-tests.sh
new file mode 100755
index 0000000000..a237d85be9
--- /dev/null
+++ b/tests/gitlab/ci-tests.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+set -e
+
+echo "===> Building BOUT-dev CUDA minimal"
+cmake -S . -B build \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DBOUT_ENABLE_RAJA=on \
+    -DBOUT_ENABLE_UMPIRE=on \
+    -DBOUT_ENABLE_CUDA=on \
+    -DCMAKE_CUDA_ARCHITECTURES=70 \
+    -DCUDA_ARCH=compute_70,code=sm_70 \
+    -DBOUT_ENABLE_WARNINGS=off \
+    -DBOUT_USE_SYSTEM_FMT=on
+
+pushd build
+make -j
+
+echo "===> Building and running blob2d-outerloop"
+pushd examples/blob2d-outerloop
+make -j
+# Check the output using Sim Time and RHS evals. Must be careful splitting the
+# regex string in mulitple lines and escaping characters.
+if ./blob2d-outerloop | grep -Pzoq "(?s)Sim Time  \|  RHS evals  \| Wall Time \|  Calc    Inv   Comm    I/O   SOLVER\n.*\n"\
+"0\.000e\+00          2       .*"\
+"5\.000e\+01         53       .*"\
+"1\.000e\+02         17       .*"\
+"1\.500e\+02         27       .*"; then
+    echo "Sim Time and RHS evals match"
+else
+    echo "Sim Time and RHS evals DO NOT match"
+    exit 1
+fi
+popd
+
+echo "===> Building and running elm-pb-outerloop"
+pushd examples/elm-pb-outerloop
+make -j
+if ./elm_pb_outerloop | grep -Pzoq "(?s)Sim Time  \|  RHS evals  \| Wall Time \|  Calc    Inv   Comm    I/O   SOLVER\n.*\n"\
+"0\.000e\+00          2       .*"\
+"1\.000e\+00         44       .*"\
+"2\.000e\+00         37       .*"\
+"3\.000e\+00         37       .*"\
+"4\.000e\+00         37       .*"\
+"5\.000e\+00         30       .*"\
+"6\.000e\+00         31       .*"\
+"7\.000e\+00         31       .*"\
+"8\.000e\+00         25       .*"\
+"9\.000e\+00         21       .*"\
+"1\.000e\+01         24       .*"\
+"1\.100e\+01         19       .*"\
+"1\.200e\+01         25       .*"\
+"1\.300e\+01         25       .*"\
+"1\.400e\+01         25       .*"\
+"1\.500e\+01         25       .*"\
+"1\.600e\+01         25       .*"\
+"1\.700e\+01         25       .*"\
+"1\.800e\+01         25       .*"\
+"1\.900e\+01         20       .*"\
+"2\.000e\+01         29       .*"\
+"2\.100e\+01         29       .*"\
+"2\.200e\+01         29       .*"\
+"2\.300e\+01         29       .*"\
+"2\.400e\+01         29       .*"\
+"2\.500e\+01         29       .*"\
+"2\.600e\+01         29       .*"\
+"2\.700e\+01         22       .*"\
+"2\.800e\+01         29       .*"\
+"2\.900e\+01         29       .*"\
+"3\.000e\+01         29       .*"\
+"3\.100e\+01         29       .*"\
+"3\.200e\+01         29       .*"\
+"3\.300e\+01         32       .*"\
+"3\.400e\+01         25       .*"\
+"3\.500e\+01         33       .*"\
+"3\.600e\+01         33       .*"\
+"3\.700e\+01         39       .*"\
+"3\.800e\+01         31       .*"\
+"3\.900e\+01         31       .*"\
+"4\.000e\+01         36       .*"; then
+    echo "Sim Time and RHS evals match"
+else
+    echo "Sim Time and RHS evals DO NOT match"
+    exit 1
+fi
+popd
+
+popd
\ No newline at end of file
diff --git a/tests/integrated/CMakeLists.txt b/tests/integrated/CMakeLists.txt
index 7d3e8e81ce..ef173db7df 100644
--- a/tests/integrated/CMakeLists.txt
+++ b/tests/integrated/CMakeLists.txt
@@ -11,6 +11,7 @@ add_subdirectory(test-datafilefacade)
 add_subdirectory(test-drift-instability)
 add_subdirectory(test-drift-instability-staggered)
 add_subdirectory(test-fieldgroupComm)
+add_subdirectory(test-fci-boundary)
 add_subdirectory(test-griddata)
 add_subdirectory(test-griddata-yboundary-guards)
 add_subdirectory(test-gyro)
diff --git a/tests/integrated/test-fci-boundary/CMakeLists.txt b/tests/integrated/test-fci-boundary/CMakeLists.txt
new file mode 100644
index 0000000000..bf25cd7c57
--- /dev/null
+++ b/tests/integrated/test-fci-boundary/CMakeLists.txt
@@ -0,0 +1,22 @@
+bout_add_mms_test(test-fci-boundary
+  SOURCES get_par_bndry.cxx
+  USE_RUNTEST
+  USE_DATA_BOUT_INP
+  REQUIRES zoidberg_FOUND
+  PROCESSORS 1
+  )
+
+if (zoidberg_FOUND)
+  set(gridfile ${CMAKE_CURRENT_BINARY_DIR}/grid.fci.nc)
+  add_custom_command(OUTPUT ${gridfile}
+    COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${BOUT_PYTHONPATH}:$ENV{PYTHONPATH} ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/grid.py ${gridfile}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/../../../tools/pylib/boutconfig/__init__.py
+    DEPENDS grid.py
+    IMPLICIT_DEPENDS  ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Creating test-fci-boundary grid file"
+  )
+  add_custom_target(test-fci-boundary-grid DEPENDS ${gridfile})
+  add_dependencies(test-fci-boundary
+    test-fci-boundary-grid)
+endif()
diff --git a/tests/integrated/test-fci-boundary/data/BOUT.inp b/tests/integrated/test-fci-boundary/data/BOUT.inp
new file mode 100644
index 0000000000..b631f16295
--- /dev/null
+++ b/tests/integrated/test-fci-boundary/data/BOUT.inp
@@ -0,0 +1,20 @@
+grid = grid.fci.nc
+
+MXG = 1
+NXPE = 1
+MYG = 1
+
+[mesh]
+symmetricglobalx = true
+
+[mesh:ddy]
+first = C2
+second = C2
+
+[mesh:paralleltransform]
+type = fci
+y_periodic = true
+z_periodic = true
+
+[mesh:paralleltransform:xzinterpolation]
+type = lagrange4pt
diff --git a/tests/integrated/test-fci-boundary/get_par_bndry.cxx b/tests/integrated/test-fci-boundary/get_par_bndry.cxx
new file mode 100644
index 0000000000..ac0f5de2a6
--- /dev/null
+++ b/tests/integrated/test-fci-boundary/get_par_bndry.cxx
@@ -0,0 +1,34 @@
+#include "bout/bout.hxx"
+#include "bout/derivs.hxx"
+#include "bout/field_factory.hxx"
+#include "bout/parallel_boundary_region.hxx"
+
+int main(int argc, char** argv) {
+  BoutInitialise(argc, argv);
+
+  using bout::globals::mesh;
+
+  std::vector<Field3D> fields;
+  fields.resize(static_cast<int>(BoundaryParType::SIZE));
+  Options dump;
+  for (int i = 0; i < fields.size(); i++) {
+    fields[i] = Field3D{0.0};
+    mesh->communicate(fields[i]);
+    for (const auto& bndry_par :
+         mesh->getBoundariesPar(static_cast<BoundaryParType>(i))) {
+      output.write("{:s} region\n", toString(static_cast<BoundaryParType>(i)));
+      for (bndry_par->first(); !bndry_par->isDone(); bndry_par->next()) {
+        fields[i][bndry_par->ind()] += 1;
+        output.write("{:s} increment\n", toString(static_cast<BoundaryParType>(i)));
+      }
+    }
+    output.write("{:s} done\n", toString(static_cast<BoundaryParType>(i)));
+
+    dump[fmt::format("field_{:s}", toString(static_cast<BoundaryParType>(i)))] =
+        fields[i];
+  }
+
+  bout::writeDefaultOutputFile(dump);
+
+  BoutFinalise();
+}
diff --git a/tests/integrated/test-fci-boundary/grid.py b/tests/integrated/test-fci-boundary/grid.py
new file mode 100644
index 0000000000..d544f0cdf7
--- /dev/null
+++ b/tests/integrated/test-fci-boundary/grid.py
@@ -0,0 +1,55 @@
+import zoidberg as zb
+import numpy as np
+import sys
+import boutconfig as bc
+
+
+def rotating_ellipse(
+    nx=68,
+    ny=16,
+    nz=128,
+    npoints=421,
+    xcentre=5.5,
+    I_coil=0.01,
+    curvilinear=True,
+    rectangular=False,
+    fname="rotating-ellipse.fci.nc",
+    a=0.4,
+    Btor=2.5,
+):
+    yperiod = 2 * np.pi / 5.0
+    field = zb.field.RotatingEllipse(
+        xcentre=xcentre,
+        I_coil=I_coil,
+        radius=2 * a,
+        yperiod=yperiod,
+        Btor=Btor,
+    )
+    # Define the y locations
+    ycoords = np.linspace(0.0, yperiod, ny, endpoint=False)
+
+    if rectangular:
+        print("Making rectangular poloidal grid")
+        poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid(
+            nx, nz, 1.0, 1.0, Rcentre=xcentre
+        )
+    elif curvilinear:
+        print("Making curvilinear poloidal grid")
+        inner = zb.rzline.shaped_line(
+            R0=xcentre, a=a / 2.0, elong=0, triang=0.0, indent=0, n=npoints
+        )
+        outer = zb.rzline.shaped_line(
+            R0=xcentre, a=a, elong=0, triang=0.0, indent=0, n=npoints
+        )
+
+        print("creating grid...")
+        poloidal_grid = zb.poloidal_grid.grid_elliptic(inner, outer, nx, nz)
+
+    # Create the 3D grid by putting together 2D poloidal grids
+    grid = zb.grid.Grid(poloidal_grid, ycoords, yperiod, yperiodic=True)
+    maps = zb.make_maps(grid, field, quiet=True)
+    zb.write_maps(grid, field, maps, str(fname), metric2d=bc.isMetric2D())
+
+
+if __name__ == "__main__":
+    rotating_ellipse(fname=sys.argv[1])
diff --git a/tests/integrated/test-fci-boundary/runtest b/tests/integrated/test-fci-boundary/runtest
new file mode 100755
index 0000000000..16cb4ee443
--- /dev/null
+++ b/tests/integrated/test-fci-boundary/runtest
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+#
+# Python script to run and analyse MMS test
+#
+
+# Cores: 2
+# only working with cmake
+# requires: False
+from boututils.run_wrapper import launch_safe
+from boututils.datafile import DataFile
+from boutdata.collect import collect as _collect
+
+import numpy as np
+
+
+def collect(var):
+    return _collect(
+        var,
+        info=False,
+        path=directory,
+        xguards=False,
+        yguards=False,
+    )
+
+
+nprocs = [1]  # , 2, 4]
+mthread = 2
+
+directory = "data"
+
+with DataFile("grid.fci.nc") as grid:
+    xfwd = grid.read("forward_xt_prime")[1:-1]
+    xbwd = grid.read("backward_xt_prime")[1:-1]
+
+nx = xfwd.shape[0]
+
+regions = {
+    "xin_fwd": xfwd < 1,
+    "xout_fwd": xfwd > nx,
+    "xin_bwd": xbwd < 1,
+    "xout_bwd": xbwd > nx,
+}
+regions = {k: v.astype(int) for k, v in regions.items()}
+
+# for x in "xout", "xin":
+#     regions[x] = np.logical_or(regions[f"{x}_fwd"], regions[f"{x}_bwd"])
+# for x in "fwd", "bwd":
+#     regions[x] = np.logical_or(regions[f"xin_{x}"], regions[f"xout_{x}"])
+# regions["all"] = np.logical_or(regions["xin"], regions["xout"])
+for x in "xout", "xin":
+    regions[x] = regions[f"{x}_fwd"] + regions[f"{x}_bwd"]
+for x in "fwd", "bwd":
+    regions[x] = regions[f"xin_{x}"] + regions[f"xout_{x}"]
+regions["all"] = regions["xin"] + regions["xout"]
+
+for nproc in nprocs:
+    cmd = "./get_par_bndry"
+
+    # Launch using MPI
+    _, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True)
+
+    for k, v in regions.items():
+        # Collect data
+        data = collect(f"field_{k}")
+        assert np.allclose(data, v), (
+            k + " does not match",
+            np.sum(data),
+            np.sum(v),
+            np.max(data),
+        )
diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp
index 9a6ac24fa1..46d3cb55ba 100644
--- a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp
+++ b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp
@@ -1,7 +1,7 @@
 [f]
 #function = 0.
 function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [rhs]
 function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.)
@@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.)
 [C2]
 #function = 0.
 function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [A]
 function = 0.
diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp
index eb78644f0f..be0c697d80 100644
--- a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp
+++ b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp
@@ -16,7 +16,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.)
 [C2]
 #function = 0.
 function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [A]
 function = 0.
diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp
index da1918dcc7..bc3c47eac7 100644
--- a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp
+++ b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp
@@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.)
 [C2]
 #function = 0.
 function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [A]
 function = 0.0
diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
index 6474b2604b..601531de84 100644
--- a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
+++ b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
@@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.)
 [C2]
 #function = 0.
 function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.)
-bndry_par_all = parallel_neumann
+bndry_par_all = parallel_neumann_o2
 
 [A]
 function = 0.0
diff --git a/tests/integrated/test-options-adios/CMakeLists.txt b/tests/integrated/test-options-adios/CMakeLists.txt
index 110773d6fd..cc61fabe57 100644
--- a/tests/integrated/test-options-adios/CMakeLists.txt
+++ b/tests/integrated/test-options-adios/CMakeLists.txt
@@ -2,5 +2,5 @@ bout_add_integrated_test(test-options-adios
   SOURCES test-options-adios.cxx
   USE_RUNTEST
   USE_DATA_BOUT_INP
-  REQUIRES BOUT_HAS_ADIOS
+  REQUIRES BOUT_HAS_ADIOS2
   )
diff --git a/tests/integrated/test-options-adios/runtest b/tests/integrated/test-options-adios/runtest
index 1621c686a3..03a83fc0ba 100755
--- a/tests/integrated/test-options-adios/runtest
+++ b/tests/integrated/test-options-adios/runtest
@@ -34,7 +34,7 @@ assert result["int"] == 42
 assert math.isclose(result["real"], 3.1415)
 assert result["string"] == "hello"
 
-print("Checking saved ADIOS test-out file -- Not implemented")
+print("Checking saved ADIOS2 test-out file -- Not implemented")
 
 # Check the output NetCDF file
 # with DataFile("test-out.nc") as f:
diff --git a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx
index bfd394194f..1e3cdde310 100644
--- a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx
+++ b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx
@@ -23,15 +23,90 @@
  *
  **************************************************************************/
 
-#include <bout/bout.hxx>
-#include <bout/constants.hxx>
-// #include <bout/sys/timer.hxx>
-#include <bout/boutexception.hxx>
-#include <bout/invert_laplace.hxx>
-#include <bout/options.hxx>
+#include "bout/bout.hxx" // NOLINT
+#include "bout/bout_types.hxx"
+#include "bout/boutexception.hxx"
+#include "bout/constants.hxx"
+#include "bout/difops.hxx"
+#include "bout/field2d.hxx"
+#include "bout/field3d.hxx"
+#include "bout/invert_laplace.hxx"
+#include "bout/options.hxx"
+#include "bout/options_io.hxx"
+#include "bout/output.hxx"
+#include "bout/traits.hxx"
+
+#include "fmt/core.h"
+#include <mpi.h>
+
 #include <cmath>
+#include <string_view>
 
 BoutReal max_error_at_ystart(const Field3D& error);
+void apply_flat_boundary(Field3D& bcoef);
+
+template <class T, class U>
+void check_laplace(int test_num, std::string_view test_name, Laplacian& invert,
+                   int inner_flags, int outer_flags, const T& acoef, const T& ccoef,
+                   const T& dcoef, const U& bcoef, const Field3D& field, int ystart,
+                   Options& dump) {
+  static_assert(bout::utils::is_Field_v<T>, "check_laplace requires Field2D or Field3D");
+  static_assert(bout::utils::is_Field_v<U>, "check_laplace requires Field2D or Field3D");
+
+  invert.setInnerBoundaryFlags(inner_flags);
+  invert.setOuterBoundaryFlags(outer_flags);
+  invert.setCoefA(acoef);
+  invert.setCoefC(ccoef);
+  invert.setCoefD(dcoef);
+
+  checkData(bcoef);
+
+  Field3D sol;
+  Field3D error;
+  Field3D abs_error;
+  BoutReal max_error = -1;
+
+  try {
+    sol = invert.solve(sliceXZ(bcoef, ystart));
+    error = (field - sol) / field;
+    abs_error = field - sol;
+    max_error = max_error_at_ystart(abs(abs_error));
+  } catch (BoutException& err) {
+    output.write("BoutException occured in invert->solve(b1): {}\n", err.what());
+  }
+
+  output.write("\nTest {}: {}\n", test_num, test_name);
+  output.write("Magnitude of maximum absolute error is {}\n", max_error);
+
+  dump[fmt::format("a{}", test_num)] = acoef;
+  dump[fmt::format("b{}", test_num)] = bcoef;
+  dump[fmt::format("c{}", test_num)] = ccoef;
+  dump[fmt::format("d{}", test_num)] = dcoef;
+  dump[fmt::format("f{}", test_num)] = field;
+  dump[fmt::format("sol{}", test_num)] = sol;
+  dump[fmt::format("error{}", test_num)] = error;
+  dump[fmt::format("absolute_error{}", test_num)] = abs_error;
+  dump[fmt::format("max_error{}", test_num)] = max_error;
+}
+
+template <class T>
+Field3D forward_laplace(const Field3D& field, const T& acoef, const T& ccoef,
+                        const T& dcoef) {
+  auto bcoef =
+      dcoef * Delp2(field) + Grad_perp(ccoef) * Grad_perp(field) / ccoef + acoef * field;
+  apply_flat_boundary(bcoef);
+  return bcoef;
+}
+
+Field3D generate_f1(const Mesh& mesh);
+Field3D generate_a1(const Mesh& mesh);
+Field3D generate_c1(const Mesh& mesh);
+Field3D generate_d1(const Mesh& mesh);
+
+Field3D generate_f5(const Mesh& mesh);
+Field3D generate_a5(const Mesh& mesh);
+Field3D generate_c5(const Mesh& mesh);
+Field3D generate_d5(const Mesh& mesh);
 
 int main(int argc, char** argv) {
 
@@ -42,829 +117,553 @@ int main(int argc, char** argv) {
     options = Options::getRoot()->getSection("petsc4th");
     auto invert_4th = Laplacian::create(options);
 
-    // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d
-    Field3D f1, a1, b1, c1, d1, sol1;
-    BoutReal p, q; //Use to set parameters in constructing trial functions
-    Field3D error1,
-        absolute_error1; //Absolute value of relative error: abs( (f1-sol1)/f1 )
-    BoutReal max_error1; //Output of test
+    Options dump;
 
+    // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d
     using bout::globals::mesh;
 
     // Only Neumann x-boundary conditions are implemented so far, so test functions should be Neumann in x and periodic in z.
     // Use Field3D's, but solver only works on FieldPerp slices, so only use 1 y-point
-    BoutReal nx = mesh->GlobalNx - 2 * mesh->xstart - 1;
-    BoutReal nz = mesh->GlobalNz;
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    /////////////////////////////////////////////////////
     // Test 1: Gaussian x-profiles, 2nd order Krylov
-    p = 0.39503274;
-    q = 0.20974396;
-    f1.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          f1(jx, jy, jz) =
-              0. + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-              - 50.
-                    * (2. * p * exp(-100. * pow(-p, 2)) * x
-                       + (-p * exp(-100. * pow(-p, 2))
-                          - (1 - p) * exp(-100. * pow(1 - p, 2)))
-                             * pow(x, 2))
-                    * exp(-(
-                        1.
-                        - cos(2. * PI
-                              * (z - q)))) //make the gradients zero at both x-boundaries
-              ;
-          ASSERT0(finite(f1(jx, jy, jz)));
-        }
-      }
-    }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            f1(jx, jy, jz) =
-                0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-                - 50.
-                      * (2. * p * exp(-60. * pow(-p, 2)) * x
-                         + (-p * exp(-60. * pow(-p, 2))
-                            - (1 - p) * exp(-60. * pow(1 - p, 2)))
-                               * pow(x, 2))
-                      * exp(-(
-                          1.
-                          - cos(
-                              2. * PI
-                              * (z - q)))); //make the gradients zero at both x-boundaries
-            ASSERT0(finite(f1(jx, jy, jz)));
-          }
-        }
-      }
-    }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            f1(jx, jy, jz) =
-                0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-                - 50.
-                      * (2. * p * exp(-60. * pow(-p, 2)) * x
-                         + (-p * exp(-60. * pow(-p, 2))
-                            - (1 - p) * exp(-60. * pow(1 - p, 2)))
-                               * pow(x, 2))
-                      * exp(-(
-                          1.
-                          - cos(
-                              2. * PI
-                              * (z - q)))); //make the gradients zero at both x-boundaries
-            ASSERT0(finite(f1(jx, jy, jz)));
-          }
-        }
-      }
-    }
+    Field3D f_1 = generate_f1(*mesh);
+    Field3D a_1 = generate_a1(*mesh);
+    Field3D c_1 = generate_c1(*mesh);
+    Field3D d_1 = generate_d1(*mesh);
 
-    f1.applyBoundary("neumann");
-
-    p = 0.512547;
-    q = 0.30908712;
-    d1.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          d1(jx, jy, jz) =
-              1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
-        }
-      }
-    }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            d1(jx, jy, jz) =
-                1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
-            // 	  d1(jx, jy, jz) = d1(jx+1, jy, jz);
-          }
-        }
-      }
-    }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            d1(jx, jy, jz) =
-                1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
-            // 	  d1(jx, jy, jz) = d1(jx-1, jy, jz);
-          }
-        }
-      }
-    }
+    mesh->communicate(f_1, a_1, c_1, d_1);
 
-    p = 0.18439023;
-    q = 0.401089473;
-    c1.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          c1(jx, jy, jz) =
-              1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
-        }
-      }
-    }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            c1(jx, jy, jz) =
-                1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
-            // 	  c1(jx, jy, jz) = c1(jx+1, jy, jz);
-          }
-        }
-      }
-    }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            c1(jx, jy, jz) =
-                1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
-            // 	  c1(jx, jy, jz) = c1(jx-1, jy, jz);
-          }
-        }
+    const Field3D b_1 = forward_laplace(f_1, a_1, c_1, d_1);
+
+    int test_num = 0;
+    check_laplace(++test_num, "PETSc 2nd order", *invert, INVERT_AC_GRAD, INVERT_AC_GRAD,
+                  a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump);
+
+    /////////////////////////////////////////////////
+    // Test 2: Gaussian x-profiles, 4th order Krylov
+
+    check_laplace(++test_num, "PETSc 4th order", *invert_4th, INVERT_AC_GRAD,
+                  INVERT_AC_GRAD, a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump);
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Test 3+4: Gaussian x-profiles, z-independent coefficients and compare with SPT method
+
+    const Field2D a_3 = DC(a_1);
+    const Field2D c_3 = DC(c_1);
+    const Field2D d_3 = DC(d_1);
+    const Field3D b_3 = forward_laplace(f_1, a_3, c_3, d_3);
+
+    check_laplace(++test_num, "with coefficients constant in z, PETSc 2nd order", *invert,
+                  INVERT_AC_GRAD, INVERT_AC_GRAD, a_3, c_3, d_3, b_3, f_1, mesh->ystart,
+                  dump);
+
+    Options* SPT_options = Options::getRoot()->getSection("SPT");
+    auto invert_SPT = Laplacian::create(SPT_options);
+
+    check_laplace(++test_num, "with coefficients constant in z, default solver",
+                  *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_3, c_3,
+                  d_3, b_3, f_1, mesh->ystart, dump);
+
+    //////////////////////////////////////////////
+    // Test 5: Cosine x-profiles, 2nd order Krylov
+    Field3D f_5 = generate_f5(*mesh);
+    Field3D a_5 = generate_a5(*mesh);
+    Field3D c_5 = generate_c5(*mesh);
+    Field3D d_5 = generate_d5(*mesh);
+
+    mesh->communicate(f_5, a_5, c_5, d_5);
+
+    const Field3D b_5 = forward_laplace(f_5, a_5, c_5, d_5);
+
+    check_laplace(++test_num, "different profiles, PETSc 2nd order", *invert,
+                  INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart,
+                  dump);
+
+    //////////////////////////////////////////////
+    // Test 6: Cosine x-profiles, 4th order Krylov
+
+    check_laplace(++test_num, "different profiles, PETSc 4th order", *invert_4th,
+                  INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart,
+                  dump);
+
+    //////////////////////////////////////////////////////////////////////////////////////
+    // Test 7+8: Cosine x-profiles, z-independent coefficients and compare with SPT method
+
+    const Field2D a_7 = DC(a_5);
+    const Field2D c_7 = DC(c_5);
+    const Field2D d_7 = DC(d_5);
+    const Field3D b_7 = forward_laplace(f_5, a_7, c_7, d_7);
+
+    check_laplace(++test_num,
+                  "different profiles, with coefficients constant in z, PETSc 2nd order",
+                  *invert, INVERT_AC_GRAD, INVERT_AC_GRAD, a_7, c_7, d_7, b_7, f_5,
+                  mesh->ystart, dump);
+
+    check_laplace(++test_num,
+                  "different profiles, with coefficients constant in z, default solver",
+                  *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_7, c_7,
+                  d_7, b_7, f_5, mesh->ystart, dump);
+
+    // Write and close the output file
+    bout::writeDefaultOutputFile(dump);
+
+    MPI_Barrier(BoutComm::get()); // Wait for all processors to write data
+  }
+
+  bout::checkForUnusedOptions();
+
+  BoutFinalise();
+  return 0;
+}
+
+BoutReal max_error_at_ystart(const Field3D& error) {
+  const auto* mesh = error.getMesh();
+  BoutReal local_max_error = error(mesh->xstart, mesh->ystart, 0);
+
+  for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
+    for (int jz = 0; jz < mesh->LocalNz; jz++) {
+      if (local_max_error < error(jx, mesh->ystart, jz)) {
+        local_max_error = error(jx, mesh->ystart, jz);
       }
     }
+  }
 
-    p = 0.612547;
-    q = 0.30908712;
-    a1.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          a1(jx, jy, jz) =
-              -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
+  BoutReal max_error = BoutNaN;
+
+  MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get());
+
+  return max_error;
+}
+
+void apply_flat_boundary(Field3D& bcoef) {
+  const Mesh& mesh = *bcoef.getMesh();
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          bcoef(jx, jy, jz) = bcoef(jx + 1, jy, jz);
         }
       }
     }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            a1(jx, jy, jz) =
-                -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
-            // 	  a1(jx, jy, jz) = a1(jx+1, jy, jz);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          bcoef(jx, jy, jz) = bcoef(jx - 1, jy, jz);
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            a1(jx, jy, jz) =
-                -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
-            // 	  a1(jx, jy, jz) = a1(jx-1, jy, jz);
-          }
-        }
+  }
+}
+
+Field3D generate_f1(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+
+  constexpr BoutReal p = 0.39503274; // NOLINT
+  constexpr BoutReal q = 0.20974396; // NOLINT
+
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        //make the gradients zero at both x-boundaries
+        result(jx, jy, jz) = 0.
+                             + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+                             - 50.
+                                   * (2. * p * exp(-100. * pow(-p, 2)) * x
+                                      + (-p * exp(-100. * pow(-p, 2))
+                                         - (1 - p) * exp(-100. * pow(1 - p, 2)))
+                                            * pow(x, 2))
+                                   * exp(-(1. - cos(2. * PI * (z - q))));
       }
     }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
 
-    checkData(f1);
-    checkData(a1);
-    checkData(c1);
-    checkData(d1);
-
-    mesh->communicate(f1, a1, c1, d1);
-
-    b1 = d1 * Delp2(f1) + Grad_perp(c1) * Grad_perp(f1) / c1 + a1 * f1;
-
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b1(jx, jy, jz) = b1(jx + 1, jy, jz);
-          }
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          //make the gradients zero at both x-boundaries
+          result(jx, jy, jz) = 0.
+                               + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+                               - 50.
+                                     * (2. * p * exp(-60. * pow(-p, 2)) * x
+                                        + (-p * exp(-60. * pow(-p, 2))
+                                           - (1 - p) * exp(-60. * pow(1 - p, 2)))
+                                              * pow(x, 2))
+                                     * exp(-(1. - cos(2. * PI * (z - q))));
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b1(jx, jy, jz) = b1(jx - 1, jy, jz);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          //make the gradients zero at both x-boundaries
+          result(jx, jy, jz) = 0.
+                               + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+                               - 50.
+                                     * (2. * p * exp(-60. * pow(-p, 2)) * x
+                                        + (-p * exp(-60. * pow(-p, 2))
+                                           - (1 - p) * exp(-60. * pow(1 - p, 2)))
+                                              * pow(x, 2))
+                                     * exp(-(1. - cos(2. * PI * (z - q))));
         }
       }
     }
+  }
 
-    invert->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert->setCoefA(a1);
-    invert->setCoefC(c1);
-    invert->setCoefD(d1);
-
-    checkData(b1);
-
-    try {
-      sol1 = invert->solve(sliceXZ(b1, mesh->ystart));
-      error1 = (f1 - sol1) / f1;
-      absolute_error1 = f1 - sol1;
-      //     max_error1 = max_error_at_ystart(abs(error1));
-      max_error1 = max_error_at_ystart(abs(absolute_error1));
-    } catch (BoutException& err) {
-      output << "BoutException occured in invert->solve(b1): " << err.what() << endl;
-      max_error1 = -1;
-    }
+  checkData(result);
+  result.applyBoundary("neumann");
+  return result;
+}
 
-    output << endl << "Test 1: PETSc 2nd order" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error1<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error1 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
+Field3D generate_d1(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
 
-    Options dump;
-    dump["a1"] = a1;
-    dump["b1"] = b1;
-    dump["c1"] = c1;
-    dump["d1"] = d1;
-    dump["f1"] = f1;
-    dump["sol1"] = sol1;
-    dump["error1"] = error1;
-    dump["absolute_error1"] = absolute_error1;
-    dump["max_error1"] = max_error1;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Test 2: Gaussian x-profiles, 4th order Krylov
-    Field3D sol2;
-    Field3D error2,
-        absolute_error2; //Absolute value of relative error: abs( (f3-sol3)/f3 )
-    BoutReal max_error2; //Output of test
-
-    invert_4th->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert_4th->setGlobalFlags(INVERT_4TH_ORDER);
-    invert_4th->setCoefA(a1);
-    invert_4th->setCoefC(c1);
-    invert_4th->setCoefD(d1);
-
-    try {
-      sol2 = invert_4th->solve(sliceXZ(b1, mesh->ystart));
-      error2 = (f1 - sol2) / f1;
-      absolute_error2 = f1 - sol2;
-      //     max_error2 = max_error_at_ystart(abs(error2));
-      max_error2 = max_error_at_ystart(abs(absolute_error2));
-    } catch (BoutException& err) {
-      output << "BoutException occured in invert->solve(b1): " << err.what() << endl;
-      max_error2 = -1;
+  constexpr BoutReal p = 0.512547;   // NOLINT
+  constexpr BoutReal q = 0.30908712; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) =
+            1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
+      }
     }
-
-    output << endl << "Test 2: PETSc 4th order" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error2<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error2 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a2"] = a1;
-    dump["b2"] = b1;
-    dump["c2"] = c1;
-    dump["d2"] = d1;
-    dump["f2"] = f1;
-    dump["sol2"] = sol2;
-    dump["error2"] = error2;
-    dump["absolute_error2"] = absolute_error2;
-    dump["max_error2"] = max_error2;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Test 3+4: Gaussian x-profiles, z-independent coefficients and compare with SPT method
-    Field2D a3, c3, d3;
-    Field3D b3;
-    Field3D sol3, sol4;
-    Field3D error3, absolute_error3, error4, absolute_error4;
-    BoutReal max_error3, max_error4;
-
-    a3 = DC(a1);
-    c3 = DC(c1);
-    d3 = DC(d1);
-    b3 = d3 * Delp2(f1) + Grad_perp(c3) * Grad_perp(f1) / c3 + a3 * f1;
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b3(jx, jy, jz) = b3(jx + 1, jy, jz);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b3(jx, jy, jz) = b3(jx - 1, jy, jz);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.);
         }
       }
     }
+  }
+  checkData(result);
+  return result;
+}
 
-    invert->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert->setCoefA(a3);
-    invert->setCoefC(c3);
-    invert->setCoefD(d3);
-
-    try {
-      sol3 = invert->solve(sliceXZ(b3, mesh->ystart));
-      error3 = (f1 - sol3) / f1;
-      absolute_error3 = f1 - sol3;
-      //     max_error3 = max_error_at_ystart(abs(error3));
-      max_error3 = max_error_at_ystart(abs(absolute_error3));
-    } catch (BoutException& err) {
-      output << "BoutException occured in invert->solve(b3): " << err.what() << endl;
-      max_error3 = -1;
-    }
+Field3D generate_c1(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
 
-    output << endl << "Test 3: with coefficients constant in z, PETSc 2nd order" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error3<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error3 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a3"] = a3;
-    dump["b3"] = b3;
-    dump["c3"] = c3;
-    dump["d3"] = d3;
-    dump["f3"] = f1;
-    dump["sol3"] = sol3;
-    dump["error3"] = error3;
-    dump["absolute_error3"] = absolute_error3;
-    dump["max_error3"] = max_error3;
-
-    Options* SPT_options;
-    SPT_options = Options::getRoot()->getSection("SPT");
-    auto invert_SPT = Laplacian::create(SPT_options);
-    invert_SPT->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD);
-    invert_SPT->setCoefA(a3);
-    invert_SPT->setCoefC(c3);
-    invert_SPT->setCoefD(d3);
-
-    sol4 = invert_SPT->solve(sliceXZ(b3, mesh->ystart));
-    error4 = (f1 - sol4) / f1;
-    absolute_error4 = f1 - sol4;
-    //   max_error4 = max_error_at_ystart(abs(error4));
-    max_error4 = max_error_at_ystart(abs(absolute_error4));
-
-    output << endl << "Test 4: with coefficients constant in z, default solver" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error4<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error4 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a4"] = a3;
-    dump["b4"] = b3;
-    dump["c4"] = c3;
-    dump["d4"] = d3;
-    dump["f4"] = f1;
-    dump["sol4"] = sol4;
-    dump["error4"] = error4;
-    dump["absolute_error4"] = absolute_error4;
-    dump["max_error4"] = max_error4;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Test 5: Cosine x-profiles, 2nd order Krylov
-    Field3D f5, a5, b5, c5, d5, sol5;
-    Field3D error5,
-        absolute_error5; //Absolute value of relative error: abs( (f5-sol5)/f5 )
-    BoutReal max_error5; //Output of test
-
-    p = 0.623901;
-    q = 0.01209489;
-    f5.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          f5(jx, jy, jz) =
-              0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-              - 50.
-                    * (2. * p * exp(-50. * pow(-p, 2)) * x
-                       + (-p * exp(-50. * pow(-p, 2))
-                          - (1 - p) * exp(-50. * pow(1 - p, 2)))
-                             * pow(x, 2))
-                    * exp(-(
-                        1.
-                        - cos(2. * PI
-                              * (z - q)))) //make the gradients zero at both x-boundaries
-              ;
-        }
+  constexpr BoutReal p = 0.18439023;  // NOLINT
+  constexpr BoutReal q = 0.401089473; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) =
+            1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
       }
     }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            f5(jx, jy, jz) =
-                0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-                - 50.
-                      * (2. * p * exp(-50. * pow(-p, 2)) * x
-                         + (-p * exp(-50. * pow(-p, 2))
-                            - (1 - p) * exp(-50. * pow(1 - p, 2)))
-                               * pow(x, 2))
-                      * exp(-(
-                          1.
-                          - cos(
-                              2. * PI
-                              * (z - q)))); //make the gradients zero at both x-boundaries
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            f5(jx, jy, jz) =
-                0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
-                - 50.
-                      * (2. * p * exp(-50. * pow(-p, 2)) * x
-                         + (-p * exp(-50. * pow(-p, 2))
-                            - (1 - p) * exp(-50. * pow(1 - p, 2)))
-                               * pow(x, 2))
-                      * exp(-(
-                          1.
-                          - cos(
-                              2. * PI
-                              * (z - q)))); //make the gradients zero at both x-boundaries
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.);
         }
       }
     }
+  }
 
-    p = 0.63298589;
-    q = 0.889237890;
-    d5.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
-        }
+  checkData(result);
+  return result;
+}
+
+Field3D generate_a1(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+
+  constexpr BoutReal p = 0.612547;   // NOLINT
+  constexpr BoutReal q = 0.30908712; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) =
+            -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
       }
     }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.);
         }
       }
     }
+  }
 
-    p = 0.160983834;
-    q = 0.73050121087;
-    c5.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
-        }
+  checkData(result);
+  return result;
+}
+
+Field3D generate_f5(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+  constexpr BoutReal p = 0.623901;   // NOLINT
+  constexpr BoutReal q = 0.01209489; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        //make the gradients zero at both x-boundaries
+        result(jx, jy, jz) =
+            0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+            - 50.
+                  * (2. * p * exp(-50. * pow(-p, 2)) * x
+                     + (-p * exp(-50. * pow(-p, 2)) - (1 - p) * exp(-50. * pow(1 - p, 2)))
+                           * pow(x, 2))
+                  * exp(-(1. - cos(2. * PI * (z - q))));
       }
     }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          //make the gradients zero at both x-boundaries
+          result(jx, jy, jz) = 0.
+                               + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+                               - 50.
+                                     * (2. * p * exp(-50. * pow(-p, 2)) * x
+                                        + (-p * exp(-50. * pow(-p, 2))
+                                           - (1 - p) * exp(-50. * pow(1 - p, 2)))
+                                              * pow(x, 2))
+                                     * exp(-(1. - cos(2. * PI * (z - q))));
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          //make the gradients zero at both x-boundaries
+          result(jx, jy, jz) = 0.
+                               + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q))))
+                               - 50.
+                                     * (2. * p * exp(-50. * pow(-p, 2)) * x
+                                        + (-p * exp(-50. * pow(-p, 2))
+                                           - (1 - p) * exp(-50. * pow(1 - p, 2)))
+                                              * pow(x, 2))
+                                     * exp(-(1. - cos(2. * PI * (z - q))));
         }
       }
     }
+  }
+  result.applyBoundary("neumann");
+  checkData(result);
+  return result;
+}
 
-    p = 0.5378950;
-    q = 0.2805870;
-    a5.allocate();
-    for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-      for (int jy = 0; jy < mesh->LocalNy; jy++) {
-        for (int jz = 0; jz < mesh->LocalNz; jz++) {
-          BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-          BoutReal z = BoutReal(jz) / nz;
-          a5(jx, jy, jz) = -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
-        }
+Field3D generate_d5(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+  constexpr BoutReal p = 0.63298589;  // NOLINT
+  constexpr BoutReal q = 0.889237890; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
       }
     }
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            a5(jx, jy, jz) =
-                -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
         }
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx;
-            BoutReal z = BoutReal(jz) / nz;
-            a5(jx, jy, jz) =
-                -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
-          }
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.);
         }
       }
     }
+  }
+  checkData(result);
+  return result;
+}
 
-    f5.applyBoundary("neumann");
-    mesh->communicate(f5, a5, c5, d5);
+Field3D generate_c5(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+  constexpr BoutReal p = 0.160983834;   // NOLINT
+  constexpr BoutReal q = 0.73050121087; // NOLINT
 
-    b5 = d5 * Delp2(f5) + Grad_perp(c5) * Grad_perp(f5) / c5 + a5 * f5;
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b5(jx, jy, jz) = b5(jx + 1, jy, jz);
-          }
-        }
+  Field3D result;
+
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b5(jx, jy, jz) = b5(jx - 1, jy, jz);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
         }
       }
     }
-
-    invert->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert->setCoefA(a5);
-    invert->setCoefC(c5);
-    invert->setCoefD(d5);
-
-    try {
-      sol5 = invert->solve(sliceXZ(b5, mesh->ystart));
-      error5 = (f5 - sol5) / f5;
-      absolute_error5 = f5 - sol5;
-      //     max_error5 = max_error_at_ystart(abs(error5));
-      max_error5 = max_error_at_ystart(abs(absolute_error5));
-    } catch (BoutException& err) {
-      output << "BoutException occured in invert->solve(b5): " << err.what() << endl;
-      max_error5 = -1;
-    }
-
-    output << endl << "Test 5: different profiles, PETSc 2nd order" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error5<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error5 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a5"] = a5;
-    dump["b5"] = b5;
-    dump["c5"] = c5;
-    dump["d5"] = d5;
-    dump["f5"] = f5;
-    dump["sol5"] = sol5;
-    dump["error5"] = error5;
-    dump["absolute_error5"] = absolute_error5;
-    dump["max_error5"] = max_error5;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Test 6: Cosine x-profiles, 4th order Krylov
-    Field3D sol6;
-    Field3D error6,
-        absolute_error6; //Absolute value of relative error: abs( (f5-sol5)/f5 )
-    BoutReal max_error6; //Output of test
-    invert_4th->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert_4th->setGlobalFlags(INVERT_4TH_ORDER);
-    invert_4th->setCoefA(a5);
-    invert_4th->setCoefC(c5);
-    invert_4th->setCoefD(d5);
-
-    try {
-      sol6 = invert_4th->solve(sliceXZ(b5, mesh->ystart));
-      error6 = (f5 - sol6) / f5;
-      absolute_error6 = f5 - sol6;
-      //     max_error6 = max_error_at_ystart(abs(error6));
-      max_error6 = max_error_at_ystart(abs(absolute_error6));
-    } catch (BoutException& err) {
-      output
-          << "BoutException occured in invert->solve(b6): Laplacian inversion failed to "
-             "converge (probably)"
-          << endl;
-      max_error6 = -1;
+  }
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.);
+        }
+      }
     }
+  }
+  checkData(result);
+  return result;
+}
 
-    output << endl << "Test 6: different profiles, PETSc 4th order" << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error6<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error6 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a6"] = a5;
-    dump["b6"] = b5;
-    dump["c6"] = c5;
-    dump["d6"] = d5;
-    dump["f6"] = f5;
-    dump["sol6"] = sol6;
-    dump["error6"] = error6;
-    dump["absolute_error6"] = absolute_error6;
-    dump["max_error6"] = max_error6;
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Test 7+8: Cosine x-profiles, z-independent coefficients and compare with SPT method
-    Field2D a7, c7, d7;
-    Field3D b7;
-    Field3D sol7, sol8;
-    Field3D error7, absolute_error7, error8, absolute_error8;
-    BoutReal max_error7, max_error8;
-
-    a7 = DC(a5);
-    c7 = DC(c5);
-    d7 = DC(d5);
-    b7 = d7 * Delp2(f5) + Grad_perp(c7) * Grad_perp(f5) / c7 + a7 * f5;
-    if (mesh->firstX()) {
-      for (int jx = mesh->xstart - 1; jx >= 0; jx--) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b7(jx, jy, jz) = b7(jx + 1, jy, jz);
-          }
-        }
+Field3D generate_a5(const Mesh& mesh) {
+  const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1;
+  const BoutReal nz = mesh.GlobalNz;
+  constexpr BoutReal p = 0.5378950; // NOLINT
+  constexpr BoutReal q = 0.2805870; // NOLINT
+  Field3D result;
+  result.allocate();
+  for (int jx = mesh.xstart; jx <= mesh.xend; jx++) {
+    const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+    for (int jy = 0; jy < mesh.LocalNy; jy++) {
+      for (int jz = 0; jz < mesh.LocalNz; jz++) {
+        const BoutReal z = BoutReal(jz) / nz;
+        result(jx, jy, jz) =
+            -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
       }
     }
-    if (mesh->lastX()) {
-      for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) {
-        for (int jy = 0; jy < mesh->LocalNy; jy++) {
-          for (int jz = 0; jz < mesh->LocalNz; jz++) {
-            b7(jx, jy, jz) = b7(jx - 1, jy, jz);
-          }
+  }
+  if (mesh.firstX()) {
+    for (int jx = mesh.xstart - 1; jx >= 0; jx--) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
         }
       }
     }
-
-    invert->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert->setOuterBoundaryFlags(INVERT_AC_GRAD);
-    invert->setCoefA(a7);
-    invert->setCoefC(c7);
-    invert->setCoefD(d7);
-
-    try {
-      sol7 = invert->solve(sliceXZ(b7, mesh->ystart));
-      error7 = (f5 - sol7) / f5;
-      absolute_error7 = f5 - sol7;
-      //     max_error7 = max_error_at_ystart(abs(error7));
-      max_error7 = max_error_at_ystart(abs(absolute_error7));
-    } catch (BoutException& err) {
-      output << "BoutException occured in invert->solve(b7): " << err.what() << endl;
-      max_error7 = -1;
-    }
-
-    output
-        << endl
-        << "Test 7: different profiles, with coefficients constant in z, PETSc 2nd order"
-        << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error7<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error7 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a7"] = a7;
-    dump["b7"] = b7;
-    dump["c7"] = c7;
-    dump["d7"] = d7;
-    dump["f7"] = f5;
-    dump["sol7"] = sol7;
-    dump["error7"] = error7;
-    dump["absolute_error7"] = absolute_error7;
-    dump["max_error7"] = max_error7;
-
-    invert_SPT->setInnerBoundaryFlags(INVERT_AC_GRAD);
-    invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD);
-    invert_SPT->setCoefA(a7);
-    invert_SPT->setCoefC(c7);
-    invert_SPT->setCoefD(d7);
-
-    sol8 = invert_SPT->solve(sliceXZ(b7, mesh->ystart));
-    error8 = (f5 - sol8) / f5;
-    absolute_error8 = f5 - sol8;
-    //   max_error8 = max_error_at_ystart(abs(error8));
-    max_error8 = max_error_at_ystart(abs(absolute_error8));
-
-    output
-        << endl
-        << "Test 8: different profiles, with coefficients constant in z, default solver"
-        << endl;
-    //   output<<"Time to set up is "<<Timer::getTime("petscsetup")<<". Time to solve is "<<Timer::getTime("petscsolve")<<endl;
-    //   output<<"Magnitude of maximum relative error is "<<max_error8<<endl;
-    output << "Magnitude of maximum absolute error is " << max_error8 << endl;
-    //   Timer::resetTime("petscsetup");
-    //   Timer::resetTime("petscsolve");
-
-    dump["a8"] = a7;
-    dump["b8"] = b7;
-    dump["c8"] = c7;
-    dump["d8"] = d7;
-    dump["f8"] = f5;
-    dump["sol8"] = sol8;
-    dump["error8"] = error8;
-    dump["absolute_error8"] = absolute_error8;
-    dump["max_error8"] = max_error8;
-
-    // Write and close the output file
-    bout::writeDefaultOutputFile(dump);
-
-    MPI_Barrier(BoutComm::get()); // Wait for all processors to write data
   }
-
-  bout::checkForUnusedOptions();
-
-  BoutFinalise();
-  return 0;
-}
-
-BoutReal max_error_at_ystart(const Field3D& error) {
-  const auto* mesh = error.getMesh();
-  BoutReal local_max_error = error(mesh->xstart, mesh->ystart, 0);
-
-  for (int jx = mesh->xstart; jx <= mesh->xend; jx++) {
-    for (int jz = 0; jz < mesh->LocalNz; jz++) {
-      if (local_max_error < error(jx, mesh->ystart, jz)) {
-        local_max_error = error(jx, mesh->ystart, jz);
+  if (mesh.lastX()) {
+    for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) {
+      const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx;
+      for (int jy = 0; jy < mesh.LocalNy; jy++) {
+        for (int jz = 0; jz < mesh.LocalNz; jz++) {
+          const BoutReal z = BoutReal(jz) / nz;
+          result(jx, jy, jz) =
+              -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.);
+        }
       }
     }
   }
-
-  BoutReal max_error;
-
-  MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get());
-
-  return max_error;
+  checkData(result);
+  return result;
 }
diff --git a/tests/unit/fake_parallel_mesh.hxx b/tests/unit/fake_parallel_mesh.hxx
index c648bbab9c..805dcb2a0a 100644
--- a/tests/unit/fake_parallel_mesh.hxx
+++ b/tests/unit/fake_parallel_mesh.hxx
@@ -8,6 +8,8 @@
 #include <memory>
 
 #include "../../src/mesh/impls/bout/boutmesh.hxx"
+#include "bout/boundary_op.hxx"
+#include "bout/boundary_region.hxx"
 #include "bout/boutcomm.hxx"
 #include "bout/coordinates.hxx"
 #include "bout/field2d.hxx"
diff --git a/tests/unit/mesh/test_boundary_factory.cxx b/tests/unit/mesh/test_boundary_factory.cxx
index 6637e73711..b552f7629e 100644
--- a/tests/unit/mesh/test_boundary_factory.cxx
+++ b/tests/unit/mesh/test_boundary_factory.cxx
@@ -1,6 +1,7 @@
 #include "gtest/gtest.h"
 
 #include "bout/boundary_factory.hxx"
+#include "bout/boundary_op.hxx"
 #include "bout/boundary_region.hxx"
 
 #include "test_extras.hxx"
diff --git a/tests/unit/test_extras.hxx b/tests/unit/test_extras.hxx
index 6f78e99fd3..700b977ac8 100644
--- a/tests/unit/test_extras.hxx
+++ b/tests/unit/test_extras.hxx
@@ -8,6 +8,7 @@
 #include <numeric>
 #include <vector>
 
+#include "bout/boundary_region.hxx"
 #include "bout/boutcomm.hxx"
 #include "bout/coordinates.hxx"
 #include "bout/field3d.hxx"
@@ -232,8 +233,9 @@ public:
   RangeIterator iterateBndryUpperInnerY() const override { return RangeIterator(); }
   void addBoundary(BoundaryRegion* region) override { boundaries.push_back(region); }
   std::vector<BoundaryRegion*> getBoundaries() override { return boundaries; }
-  std::vector<BoundaryRegionPar*> getBoundariesPar() override {
-    return std::vector<BoundaryRegionPar*>();
+  std::vector<std::shared_ptr<BoundaryRegionPar>>
+  getBoundariesPar(BoundaryParType UNUSED(type)) override {
+    return std::vector<std::shared_ptr<BoundaryRegionPar>>();
   }
   BoutReal GlobalX(int jx) const override { return jx; }
   BoutReal GlobalY(int jy) const override { return jy; }
diff --git a/tools/archiving/sdctools/sdclib/sdclib.c b/tools/archiving/sdctools/sdclib/sdclib.c
index f7db255a47..7294cc0791 100644
--- a/tools/archiving/sdctools/sdclib/sdclib.c
+++ b/tools/archiving/sdctools/sdclib/sdclib.c
@@ -34,8 +34,6 @@
 
 #include "sdclib.h"
 
-//#define DEBUG
-
 #define DEFAULT_IFRAME 10
 #define DEFAULT_ORDER 4
 
diff --git a/tools/pylib/_boutpp_build/CMakeLists.txt b/tools/pylib/_boutpp_build/CMakeLists.txt
index 6b88986a28..3be2a5d2aa 100644
--- a/tools/pylib/_boutpp_build/CMakeLists.txt
+++ b/tools/pylib/_boutpp_build/CMakeLists.txt
@@ -25,7 +25,7 @@ bout_python_maybe_error(${Cython_FOUND} Cython)
 find_package(Bash)
 bout_python_maybe_error(${Bash_FOUND} Bash)
 
-execute_process(COMMAND ${Python_EXECUTABLE} -c "import jinja2"
+execute_process(COMMAND ${Python3_EXECUTABLE} -c "import jinja2"
   RESULT_VARIABLE jinja2_FOUND)
 if (jinja2_FOUND EQUAL 0)
   # We have jinja2 - all good
@@ -33,7 +33,7 @@ else()
   bout_python_maybe_error(OFF jinja2)
 endif()
 
-execute_process(COMMAND ${Python_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])"
+execute_process(COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])"
   RESULT_VARIABLE PYTHON_WORKING
   OUTPUT_VARIABLE PYTHON_EXT_SUFFIX
   OUTPUT_STRIP_TRAILING_WHITESPACE
@@ -73,7 +73,7 @@ foreach(file IN LISTS files)
   #message(FATAL_ERROR "${gen} ${src}/${file}.jinja")
   add_custom_command(OUTPUT ${gen}
 	COMMAND ${CMAKE_COMMAND} -E make_directory ${tar}
-	COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python_EXECUTABLE} generate.py ${file}.jinja ${gen}
+	COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python3_EXECUTABLE} generate.py ${file}.jinja ${gen}
 	DEPENDS ${src}/${file}.jinja
 	DEPENDS ${src}/helper.py
 	DEPENDS ${src}/resolve_enum_inv.pyx.jinja
@@ -93,8 +93,7 @@ endforeach()
 
 add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libboutpp.cpp
   COMMAND ${CMAKE_COMMAND} -E copy boutpp.pyx libboutpp.pyx
-  COMMAND ${Python_EXECUTABLE} -m cython libboutpp.pyx --cplus -3  -X binding=True -X embedsignature=True
-  COMMENT "Cythonizing python interface"
+  COMMAND ${Python3_EXECUTABLE} -m cython libboutpp.pyx --cplus -3  -X binding=True -X embedsignature=True
   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   DEPENDS ${boutpp_depends}
   )
@@ -120,5 +119,6 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/boutpp.py
   DESTINATION ${CMAKE_INSTALL_PYTHON_SITEARCH}/boutpp/
   RENAME __init__.py
   )
+
 target_link_libraries(boutpp${PYTHON_EXT_SUFFIX} bout++)
-target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> ${Numpy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS})
+target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> ${Numpy_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS})
diff --git a/tools/pylib/_boutpp_build/bout_options.pxd b/tools/pylib/_boutpp_build/bout_options.pxd
index be17608cea..365e08bcc7 100644
--- a/tools/pylib/_boutpp_build/bout_options.pxd
+++ b/tools/pylib/_boutpp_build/bout_options.pxd
@@ -43,6 +43,7 @@ cdef extern from "bout/options.hxx":
         void get(string, double&, double)
         void get(string, bool&, bool)
         void cleanCache()
+        void setConditionallyUsed()
 
 
 cdef extern from "bout/optionsreader.hxx":
diff --git a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja
index 12e210a5b5..8f838b864c 100644
--- a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja
+++ b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja
@@ -148,10 +148,10 @@ cdef extern from "bout/physicsmodel.hxx":
 ctypedef void (*Method)(void *param, void *user_data)
 cdef extern from "helper.h":
     cppclass PythonModel(PhysicsModel):
-        int rhs(double t)
+        int rhs(double t) except +raise_bout_py_error
         void pyinit()
         void free()
-        void solve()
+        void solve() except +raise_bout_py_error
         Solver * getSolver()
         void set_rhs_func(PythonModelCallback*)
         void set_init_func(PythonModelCallback*)
diff --git a/tools/pylib/_boutpp_build/boutpp.pyx.jinja b/tools/pylib/_boutpp_build/boutpp.pyx.jinja
index 3aeb1428eb..9aedbb291a 100644
--- a/tools/pylib/_boutpp_build/boutpp.pyx.jinja
+++ b/tools/pylib/_boutpp_build/boutpp.pyx.jinja
@@ -583,9 +583,9 @@ cdef class {{ field.field_type }}:
 
 {% endfor %}
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.isSelfOwned and self.cobj != NULL:
             del self.cobj
             self.cobj = NULL
@@ -645,9 +645,9 @@ cdef class {{ vec }}:
 
 
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.isSelfOwned and self.cobj != NULL:
             del self.cobj
             self.cobj=NULL
@@ -742,9 +742,9 @@ cdef class Mesh:
         return msh
 
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.cobj and self.isSelfOwned:
             del self.cobj
             self.cobj = NULL
@@ -850,9 +850,9 @@ cdef class Coordinates:
 {% endfor %}
 
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.cobj and self.isSelfOwned:
             del self.cobj
             self.cobj = NULL
@@ -931,9 +931,9 @@ cdef class FieldFactory:
         checkInit()
         cobj=< c.FieldFactory*>0
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.cobj != NULL:
             del self.cobj
             self.cobj = NULL
@@ -965,9 +965,9 @@ cdef class PythonModelCallback:
         self.cobj = new c.PythonModelCallback(callback, <void*>method)
 
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.cobj:
             del self.cobj
             self.cobj = NULL
@@ -1037,12 +1037,12 @@ cdef class PhysicsModelBase(object):
         self.cmodel.set_init_func(self.callbackinit)
 
     def __dealloc__(self):
-        if hasattr(self, "__boutpp_dealloc"):
-            self.__boutpp_dealloc()
+        if hasattr(self, "_boutpp_dealloc"):
+            self._boutpp_dealloc()
         else:
-            PhysicsModelBase.__boutpp_dealloc(self)
+            PhysicsModelBase._boutpp_dealloc(self)
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.cmodel != <c.PythonModel *> 0:
             self.cmodel.free()
             del self.cmodel
@@ -1123,8 +1123,8 @@ class PhysicsModel(PhysicsModelBase):
     def __dealloc__(self):
         super(PhysicsModel,self).__dealloc__()
 
-    def __boutpp_dealloc(self):
-        super(PhysicsModel,self).__boutpp_dealloc()
+    def _boutpp_dealloc(self):
+        super(PhysicsModel,self)._boutpp_dealloc()
 
 cdef extern from "bout/bout.hxx":
     int BoutInitialise(int&, char **&) except +raise_bout_py_error
@@ -1204,13 +1204,14 @@ def finalise():
                   PythonModelCallback)
     for obj in objects:
         if isinstance(obj, ourClasses):
-            if hasattr(obj, "__boutpp_dealloc"):
-                obj.__boutpp_dealloc()
+            if hasattr(obj, "_boutpp_dealloc"):
+                obj._boutpp_dealloc()
             else:
                 for ourClass in ourClasses:
                     if isinstance(obj, ourClass):
-                         ourClass.__boutpp_dealloc(obj)
-                         break
+                        if hasattr(ourClass, "_boutpp_dealloc"):
+                            ourClass._boutpp_dealloc(obj)
+                            break
     del objects
     # Actually finalise
     if wasInit:
@@ -1715,10 +1716,19 @@ cdef class Options:
             opt.get(key, ret_str, default_)
             return ret_str.decode()
 
+    def setConditionallyUsed(self):
+        """Set the attribute "conditionally used" to be true for \p options
+        and all its children/sections, causing `Options::getUnused` to
+        assume those options have been used. This is useful to ignore
+        options when checking for typos etc.
+        """
+        cdef c.Options* opt = self.cobj
+        opt.setConditionallyUsed()
+
     def __dealloc__(self):
-        self.__boutpp_dealloc()
+        self._boutpp_dealloc()
 
-    def __boutpp_dealloc(self):
+    def _boutpp_dealloc(self):
         if self.isSelfOwned and self.cobj != NULL:
             del self.cobj
             self.cobj = NULL