From 6ee9f7452468a8520a6671345279f95115b73b16 Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Sat, 3 Feb 2024 11:14:39 -0800 Subject: [PATCH 01/13] Simplify XZInterpolation class Reduce duplication by moving some overloads to the base class. --- include/bout/interpolation_xz.hxx | 77 ++++++++------------ src/mesh/interpolation/bilinear_xz.cxx | 19 ----- src/mesh/interpolation/hermite_spline_xz.cxx | 19 ----- src/mesh/interpolation/lagrange_4pt_xz.cxx | 19 ----- src/mesh/interpolation_xz.cxx | 6 +- 5 files changed, 35 insertions(+), 105 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 3f8e37d3fd..8a815dfb6b 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -1,8 +1,7 @@ /************************************************************************** - * Copyright 2010-2020 B.D.Dudson, S.Farley, P. Hill, J.T. Omotani, J.T. Parker, - * M.V.Umansky, X.Q.Xu + * Copyright 2010-2024 BOUT++ contributors * - * Contact: Ben Dudson, bd512@york.ac.uk + * Contact: Ben Dudson, dudson2@llnl.gov * * This file is part of BOUT++. * @@ -21,8 +20,8 @@ * **************************************************************************/ -#ifndef __INTERP_XZ_H__ -#define __INTERP_XZ_H__ +#ifndef INTERP_XZ_H +#define INTERP_XZ_H #include "bout/mask.hxx" @@ -95,20 +94,30 @@ public: ASSERT1(has_region); return getRegion(); } + /// Calculate weights using given offsets in X and Z virtual void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") = 0; - virtual void calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") = 0; + void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const BoutMask& mask, + const std::string& region = "RGN_NOBNDRY") { + setMask(mask); + calcWeights(delta_x, delta_z, region); + } + /// Use pre-calculated weights virtual Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const = 0; - virtual Field3D interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, - const std::string& region = "RGN_NOBNDRY") = 0; - virtual Field3D interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") = 0; + + /// Calculate weights then interpolate + Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, + const std::string& region = "RGN_NOBNDRY") { + calcWeights(delta_x, delta_z, region); + return interpolate(f, region); + } + Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, + const BoutMask& mask, const std::string& region = "RGN_NOBNDRY") { + calcWeights(delta_x, delta_z, mask, region); + return interpolate(f, region); + } // Interpolate using the field at (x,y+y_offset,z), rather than (x,y,z) void setYOffset(int offset) { y_offset = offset; } @@ -162,18 +171,10 @@ public: void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; - void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; - // Use precalculated weights Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const override; - // Calculate weights and interpolate - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const std::string& region = "RGN_NOBNDRY") override; - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; + std::vector getWeightsForYApproximation(int i, int j, int k, int yoffset) override; }; @@ -208,6 +209,10 @@ class XZLagrange4pt : public XZInterpolation { Field3D t_x, t_z; + BoutReal lagrange_4pt(BoutReal v2m, BoutReal vm, BoutReal vp, BoutReal v2p, + BoutReal offset) const; + BoutReal lagrange_4pt(const BoutReal v[], BoutReal offset) const; + public: XZLagrange4pt(Mesh* mesh = nullptr) : XZLagrange4pt(0, mesh) {} XZLagrange4pt(int y_offset = 0, Mesh* mesh = nullptr); @@ -218,21 +223,10 @@ public: void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; - void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; // Use precalculated weights Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const override; - // Calculate weights and interpolate - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const std::string& region = "RGN_NOBNDRY") override; - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; - BoutReal lagrange_4pt(BoutReal v2m, BoutReal vm, BoutReal vp, BoutReal v2p, - BoutReal offset) const; - BoutReal lagrange_4pt(const BoutReal v[], BoutReal offset) const; }; class XZBilinear : public XZInterpolation { @@ -251,18 +245,10 @@ public: void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; - void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; // Use precalculated weights Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const override; - // Calculate weights and interpolate - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const std::string& region = "RGN_NOBNDRY") override; - Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, - const std::string& region = "RGN_NOBNDRY") override; }; class XZInterpolationFactory @@ -279,11 +265,12 @@ public: ReturnType create(const std::string& type, [[maybe_unused]] Options* options) const { return Factory::create(type, nullptr); } - - static void ensureRegistered(); }; template using RegisterXZInterpolation = XZInterpolationFactory::RegisterInFactory; -#endif // __INTERP_XZ_H__ +using RegisterUnavailableXZInterpolation = + XZInterpolationFactory::RegisterUnavailableInFactory; + +#endif // INTERP_XZ_H diff --git a/src/mesh/interpolation/bilinear_xz.cxx b/src/mesh/interpolation/bilinear_xz.cxx index 8445764a8f..4c0603f983 100644 --- a/src/mesh/interpolation/bilinear_xz.cxx +++ b/src/mesh/interpolation/bilinear_xz.cxx @@ -83,12 +83,6 @@ void XZBilinear::calcWeights(const Field3D& delta_x, const Field3D& delta_z, } } -void XZBilinear::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { - setMask(mask); - calcWeights(delta_x, delta_z, region); -} - Field3D XZBilinear::interpolate(const Field3D& f, const std::string& region) const { ASSERT1(f.getMesh() == localmesh); Field3D f_interp{emptyFrom(f)}; @@ -113,16 +107,3 @@ Field3D XZBilinear::interpolate(const Field3D& f, const std::string& region) con } return f_interp; } - -Field3D XZBilinear::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { - calcWeights(delta_x, delta_z, region); - return interpolate(f, region); -} - -Field3D XZBilinear::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { - calcWeights(delta_x, delta_z, mask, region); - return interpolate(f, region); -} diff --git a/src/mesh/interpolation/hermite_spline_xz.cxx b/src/mesh/interpolation/hermite_spline_xz.cxx index a8e5df7cdf..e2ea540a1f 100644 --- a/src/mesh/interpolation/hermite_spline_xz.cxx +++ b/src/mesh/interpolation/hermite_spline_xz.cxx @@ -111,12 +111,6 @@ void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z } } -void XZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { - setMask(mask); - calcWeights(delta_x, delta_z, region); -} - /*! * Return position and weight of points needed to approximate the function value at the * point that the field line through (i,j,k) meets the (j+1)-plane. For the case where @@ -223,16 +217,3 @@ Field3D XZHermiteSpline::interpolate(const Field3D& f, const std::string& region } return f_interp; } - -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { - calcWeights(delta_x, delta_z, region); - return interpolate(f, region); -} - -Field3D XZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { - calcWeights(delta_x, delta_z, mask, region); - return interpolate(f, region); -} diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index 92c14ecfd5..903e64767b 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -75,12 +75,6 @@ void XZLagrange4pt::calcWeights(const Field3D& delta_x, const Field3D& delta_z, } } -void XZLagrange4pt::calcWeights(const Field3D& delta_x, const Field3D& delta_z, - const BoutMask& mask, const std::string& region) { - setMask(mask); - calcWeights(delta_x, delta_z, region); -} - Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) const { ASSERT1(f.getMesh() == localmesh); @@ -132,19 +126,6 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) return f_interp; } -Field3D XZLagrange4pt::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const std::string& region) { - calcWeights(delta_x, delta_z, region); - return interpolate(f, region); -} - -Field3D XZLagrange4pt::interpolate(const Field3D& f, const Field3D& delta_x, - const Field3D& delta_z, const BoutMask& mask, - const std::string& region) { - calcWeights(delta_x, delta_z, mask, region); - return interpolate(f, region); -} - // 4-point Lagrangian interpolation // offset must be between 0 and 1 BoutReal XZLagrange4pt::lagrange_4pt(const BoutReal v2m, const BoutReal vm, diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index f7f0b457f2..7dc48eca90 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -38,7 +38,9 @@ const Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z) { TRACE("Interpolating 3D field"); XZLagrange4pt interpolateMethod{f.getMesh()}; - return interpolateMethod.interpolate(f, delta_x, delta_z); + // Cast to base pointer so virtual function overload is resolved + return static_cast(&interpolateMethod) + ->interpolate(f, delta_x, delta_z); } const Field3D interpolate(const Field2D& f, const Field3D& delta_x, @@ -84,8 +86,6 @@ const Field3D interpolate(const Field2D& f, const Field3D& delta_x) { return result; } -void XZInterpolationFactory::ensureRegistered() {} - namespace { RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; RegisterXZInterpolation registerinterpmonotonichermitespline{ From 936cd55ad7468b8a971a1b1e53ca60092b7e297a Mon Sep 17 00:00:00 2001 From: Ben Dudson Date: Sun, 4 Feb 2024 10:25:04 -0800 Subject: [PATCH 02/13] Moving registration to header Optimised build was omitting XZInterpolation factory registrations. --- include/bout/interpolation_xz.hxx | 8 ++++++++ src/mesh/interpolation_xz.cxx | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 8a815dfb6b..d8dbeebc7d 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -273,4 +273,12 @@ using RegisterXZInterpolation = XZInterpolationFactory::RegisterInFactory registerinterphermitespline{"hermitespline"}; +RegisterXZInterpolation registerinterpmonotonichermitespline{ + "monotonichermitespline"}; +RegisterXZInterpolation registerinterplagrange4pt{"lagrange4pt"}; +RegisterXZInterpolation registerinterpbilinear{"bilinear"}; +} // namespace + #endif // INTERP_XZ_H diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index 7dc48eca90..d151c35613 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -85,11 +85,3 @@ const Field3D interpolate(const Field2D& f, const Field3D& delta_x) { } return result; } - -namespace { -RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; -RegisterXZInterpolation registerinterpmonotonichermitespline{ - "monotonichermitespline"}; -RegisterXZInterpolation registerinterplagrange4pt{"lagrange4pt"}; -RegisterXZInterpolation registerinterpbilinear{"bilinear"}; -} // namespace From 0cdb962c2e9431c29fdd0cd62c8fdbe1e74298a2 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 10:16:59 +0100 Subject: [PATCH 03/13] Make interpolation registration variables `const inline` --- include/bout/interpolation_xz.hxx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index a3b722f824..5d97ff004f 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -274,11 +274,12 @@ using RegisterUnavailableXZInterpolation = XZInterpolationFactory::RegisterUnavailableInFactory; namespace { -RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; -RegisterXZInterpolation registerinterpmonotonichermitespline{ +const inline RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; +const inline RegisterXZInterpolation registerinterpmonotonichermitespline{ "monotonichermitespline"}; -RegisterXZInterpolation registerinterplagrange4pt{"lagrange4pt"}; -RegisterXZInterpolation registerinterpbilinear{"bilinear"}; +const inline RegisterXZInterpolation registerinterplagrange4pt{ + "lagrange4pt"}; +const inline RegisterXZInterpolation registerinterpbilinear{"bilinear"}; } // namespace #endif // BOUT_INTERP_XZ_H From 86ee8be68c982c3d2989aa0f14bb2d094543605a Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 10:25:22 +0100 Subject: [PATCH 04/13] Fix issue with overload hiding base virtual functions --- include/bout/interpolation_xz.hxx | 4 ++++ src/mesh/interpolation_xz.cxx | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 5d97ff004f..d97c4a4776 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -224,6 +224,8 @@ public: void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; + using XZInterpolation::interpolate; + // Use precalculated weights Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const override; @@ -246,6 +248,8 @@ public: void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; + using XZInterpolation::interpolate; + // Use precalculated weights Field3D interpolate(const Field3D& f, const std::string& region = "RGN_NOBNDRY") const override; diff --git a/src/mesh/interpolation_xz.cxx b/src/mesh/interpolation_xz.cxx index d151c35613..52c348921c 100644 --- a/src/mesh/interpolation_xz.cxx +++ b/src/mesh/interpolation_xz.cxx @@ -38,9 +38,7 @@ const Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z) { TRACE("Interpolating 3D field"); XZLagrange4pt interpolateMethod{f.getMesh()}; - // Cast to base pointer so virtual function overload is resolved - return static_cast(&interpolateMethod) - ->interpolate(f, delta_x, delta_z); + return interpolateMethod.interpolate(f, delta_x, delta_z); } const Field3D interpolate(const Field2D& f, const Field3D& delta_x, From cb8b5501395ce9fa84ac2fe87c79a29fe73289c3 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 10:37:28 +0100 Subject: [PATCH 05/13] Make `lagrange_4pt` interpolation function a static free function --- include/bout/interpolation_xz.hxx | 4 -- src/mesh/interpolation/lagrange_4pt_xz.cxx | 59 +++++++++++----------- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index d97c4a4776..3c26978132 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -209,10 +209,6 @@ class XZLagrange4pt : public XZInterpolation { Field3D t_x, t_z; - BoutReal lagrange_4pt(BoutReal v2m, BoutReal vm, BoutReal vp, BoutReal v2p, - BoutReal offset) const; - BoutReal lagrange_4pt(const BoutReal v[], BoutReal offset) const; - public: XZLagrange4pt(Mesh* mesh = nullptr) : XZLagrange4pt(0, mesh) {} XZLagrange4pt(int y_offset = 0, Mesh* mesh = nullptr); diff --git a/src/mesh/interpolation/lagrange_4pt_xz.cxx b/src/mesh/interpolation/lagrange_4pt_xz.cxx index 903e64767b..9d37bf501b 100644 --- a/src/mesh/interpolation/lagrange_4pt_xz.cxx +++ b/src/mesh/interpolation/lagrange_4pt_xz.cxx @@ -20,11 +20,29 @@ * **************************************************************************/ +#include "bout/bout_types.hxx" #include "bout/globals.hxx" #include "bout/interpolation_xz.hxx" #include "bout/mesh.hxx" -#include +#include + +namespace { +// 4-point Lagrangian interpolation +// offset must be between 0 and 1 +BoutReal lagrange_4pt(const BoutReal v2m, const BoutReal v1m, const BoutReal v1p, + const BoutReal v2p, const BoutReal offset) { + return -offset * (offset - 1.0) * (offset - 2.0) * v2m / 6.0 + + 0.5 * (offset * offset - 1.0) * (offset - 2.0) * v1m + - 0.5 * offset * (offset + 1.0) * (offset - 2.0) * v1p + + offset * (offset * offset - 1.0) * v2p / 6.0; +} +// Convenience helper +BoutReal lagrange_4pt(const std::array& x, const BoutReal offset) { + return lagrange_4pt(x[0], x[1], x[2], x[3], offset); +} + +} // namespace XZLagrange4pt::XZLagrange4pt(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), t_x(localmesh), t_z(localmesh) { @@ -101,42 +119,25 @@ Field3D XZLagrange4pt::interpolate(const Field3D& f, const std::string& region) const int jz2mnew = (jz - 1 + ncz) % ncz; // Interpolate in Z first - BoutReal xvals[4]; - const int y_next = y + y_offset; - xvals[0] = lagrange_4pt(f(jx2mnew, y_next, jz2mnew), f(jx2mnew, y_next, jz), - f(jx2mnew, y_next, jzpnew), f(jx2mnew, y_next, jz2pnew), - t_z(x, y, z)); + const std::array xvals{ + lagrange_4pt(f(jx2mnew, y_next, jz2mnew), f(jx2mnew, y_next, jz), + f(jx2mnew, y_next, jzpnew), f(jx2mnew, y_next, jz2pnew), + t_z(x, y, z)), - xvals[1] = lagrange_4pt(f(jx, y_next, jz2mnew), f(jx, y_next, jz), - f(jx, y_next, jzpnew), f(jx, y_next, jz2pnew), t_z(x, y, z)); + lagrange_4pt(f(jx, y_next, jz2mnew), f(jx, y_next, jz), f(jx, y_next, jzpnew), + f(jx, y_next, jz2pnew), t_z(x, y, z)), - xvals[2] = lagrange_4pt(f(jxpnew, y_next, jz2mnew), f(jxpnew, y_next, jz), - f(jxpnew, y_next, jzpnew), f(jxpnew, y_next, jz2pnew), t_z(x, y, z)); - - xvals[3] = lagrange_4pt(f(jx2pnew, y_next, jz2mnew), f(jx2pnew, y_next, jz), - f(jx2pnew, y_next, jzpnew), f(jx2pnew, y_next, jz2pnew), - t_z(x, y, z)); + f(jxpnew, y_next, jzpnew), f(jxpnew, y_next, jz2pnew), t_z(x, y, z)), + lagrange_4pt(f(jx2pnew, y_next, jz2mnew), f(jx2pnew, y_next, jz), + f(jx2pnew, y_next, jzpnew), f(jx2pnew, y_next, jz2pnew), + t_z(x, y, z)), + }; // Then in X f_interp(x, y_next, z) = lagrange_4pt(xvals, t_x(x, y, z)); } return f_interp; } - -// 4-point Lagrangian interpolation -// offset must be between 0 and 1 -BoutReal XZLagrange4pt::lagrange_4pt(const BoutReal v2m, const BoutReal vm, - const BoutReal vp, const BoutReal v2p, - const BoutReal offset) const { - return -offset * (offset - 1.0) * (offset - 2.0) * v2m / 6.0 - + 0.5 * (offset * offset - 1.0) * (offset - 2.0) * vm - - 0.5 * offset * (offset + 1.0) * (offset - 2.0) * vp - + offset * (offset * offset - 1.0) * v2p / 6.0; -} - -BoutReal XZLagrange4pt::lagrange_4pt(const BoutReal v[], const BoutReal offset) const { - return lagrange_4pt(v[0], v[1], v[2], v[3], offset); -} From b9cbb525a171e04f44381823d3391348a58fb867 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 13:02:01 +0100 Subject: [PATCH 06/13] Add `PetscXZHermiteSpline` that can be split in x direction Based on @dschwoerer's implementation in #2651 --- CMakeLists.txt | 1 + include/bout/interpolation_xz.hxx | 75 +++- .../interpolation/petsc_hermite_spline_xz.cxx | 367 ++++++++++++++++++ .../integrated/test-interpolate/data/BOUT.inp | 1 - tests/integrated/test-interpolate/runtest | 23 +- .../test-interpolate/test_interpolate.cxx | 148 +++---- 6 files changed, 524 insertions(+), 91 deletions(-) create mode 100644 src/mesh/interpolation/petsc_hermite_spline_xz.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index f57a78a14a..83dde0f1b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -270,6 +270,7 @@ set(BOUT_SOURCES ./src/mesh/interpolation/interpolation_z.cxx ./src/mesh/interpolation/lagrange_4pt_xz.cxx ./src/mesh/interpolation/monotonic_hermite_spline_xz.cxx + ./src/mesh/interpolation/petsc_hermite_spline_xz.cxx ./src/mesh/mesh.cxx ./src/mesh/parallel/fci.cxx ./src/mesh/parallel/fci.hxx diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 3c26978132..ddb891ad19 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -23,7 +23,9 @@ #ifndef BOUT_INTERP_XZ_H #define BOUT_INTERP_XZ_H +#include "bout/build_defines.hxx" #include "bout/mask.hxx" +#include "bout/petsclib.hxx" class Options; @@ -251,6 +253,64 @@ public: const std::string& region = "RGN_NOBNDRY") const override; }; +#if BOUT_HAS_PETSC +class PetscXZHermiteSpline : public XZInterpolation { + PetscLib petsclib; + bool isInit{false}; + Mat petscWeights{nullptr}; + Vec rhs{nullptr}; + Vec result{nullptr}; + std::vector newWeights; + + Tensor> i_corner; // x-index of bottom-left grid point + Tensor k_corner; // z-index of bottom-left grid point + + // Basis functions for cubic Hermite spline interpolation + // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline + // The h00 and h01 basis functions are applied to the function itself + // and the h10 and h11 basis functions are applied to its derivative + // along the interpolation direction. + Field3D h00_x; + Field3D h01_x; + Field3D h10_x; + Field3D h11_x; + Field3D h00_z; + Field3D h01_z; + Field3D h10_z; + Field3D h11_z; + +public: + PetscXZHermiteSpline(int y_offset = 0, Mesh* mesh = nullptr); + PetscXZHermiteSpline(Mesh* mesh = nullptr) : PetscXZHermiteSpline(0, mesh) {} + PetscXZHermiteSpline(const BoutMask& mask, int y_offset = 0, Mesh* mesh = nullptr) + : PetscXZHermiteSpline(y_offset, mesh) { + region = regionFromMask(mask, localmesh); + } + PetscXZHermiteSpline(const PetscXZHermiteSpline& other) = delete; + PetscXZHermiteSpline(PetscXZHermiteSpline&& other) = default; + PetscXZHermiteSpline& operator=(const PetscXZHermiteSpline& other) = delete; + PetscXZHermiteSpline& operator=(PetscXZHermiteSpline&& other) = delete; + ~PetscXZHermiteSpline() override; + + void calcWeights(const Field3D& delta_x, const Field3D& delta_z, + const std::string& region = "RGN_NOBNDRY") override; + void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const BoutMask& mask, + const std::string& region = "RGN_NOBNDRY"); + + // Use precalculated weights + Field3D interpolate(const Field3D& f, + const std::string& region = "RGN_NOBNDRY") const override; + // Calculate weights and interpolate + Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, + const std::string& region = "RGN_NOBNDRY"); + Field3D interpolate(const Field3D& f, const Field3D& delta_x, const Field3D& delta_z, + const BoutMask& mask, const std::string& region = "RGN_NOBNDRY"); + + std::vector + getWeightsForYApproximation(int i, int j, int k, int yoffset) override; +}; +#endif // BOUT_HAS_PETSC + class XZInterpolationFactory : public Factory { public: @@ -274,12 +334,21 @@ using RegisterUnavailableXZInterpolation = XZInterpolationFactory::RegisterUnavailableInFactory; namespace { -const inline RegisterXZInterpolation registerinterphermitespline{"hermitespline"}; -const inline RegisterXZInterpolation registerinterpmonotonichermitespline{ - "monotonichermitespline"}; +const inline RegisterXZInterpolation registerinterphermitespline{ + "hermitespline"}; +const inline RegisterXZInterpolation + registerinterpmonotonichermitespline{"monotonichermitespline"}; const inline RegisterXZInterpolation registerinterplagrange4pt{ "lagrange4pt"}; const inline RegisterXZInterpolation registerinterpbilinear{"bilinear"}; + +#if BOUT_HAS_PETSC +const inline RegisterXZInterpolation registerpetschermitespline{ + "petschermitespline"}; +#else +const inline RegisterUnavailableXZInterpolation register_unavailable_petsc_hermite_spline{ + "petschermitespline", "BOUT++ was not configured with PETSc"}; +#endif } // namespace #endif // BOUT_INTERP_XZ_H diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx new file mode 100644 index 0000000000..df5ad46261 --- /dev/null +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -0,0 +1,367 @@ +/************************************************************************** + * Copyright 2024 The BOUT++ Team + * + * Contact: Ben Dudson, bd512@york.ac.uk + * + * This file is part of BOUT++. + * + * BOUT++ is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * BOUT++ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with BOUT++. If not, see . + * + **************************************************************************/ + +#include "../impls/bout/boutmesh.hxx" +#include "bout/globals.hxx" +#include "bout/index_derivs_interface.hxx" +#include "bout/interpolation_xz.hxx" + +#include +#include +#include +#include + +class IndConverter { +public: + IndConverter(Mesh* mesh) + : mesh(dynamic_cast(mesh)), nxpe(mesh->getNXPE()), + nype(mesh->getNYPE()) {} + // ix and iy are global indices + // iy is local + int fromMeshToGlobal(int ix, int iy, int iz) { + const int xstart = mesh->xstart; + const int lnx = mesh->LocalNx - xstart * 2; + // x-proc-id + int pex = divToNeg(ix - xstart, lnx); + if (pex < 0) { + pex = 0; + } + if (pex >= nxpe) { + pex = nxpe - 1; + } + const int zstart = 0; + const int lnz = mesh->LocalNz - zstart * 2; + // z-proc-id + // pez only for wrapping around ; later needs similar treatment than pey + const int pez = divToNeg(iz - zstart, lnz); + // y proc-id - y is already local + const int ystart = mesh->ystart; + const int lny = mesh->LocalNy - ystart * 2; + const int pey_offset = divToNeg(iy - ystart, lny); + int pey = pey_offset + mesh->getYProcIndex(); + while (pey < 0) { + pey += nype; + } + while (pey >= nype) { + pey -= nype; + } + ASSERT2(pex >= 0); + ASSERT2(pex < nxpe); + ASSERT2(pey >= 0); + ASSERT2(pey < nype); + return fromLocalToGlobal(ix - pex * lnx, iy - pey_offset * lny, iz - pez * lnz, pex, + pey, 0); + } + int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz) { + return fromLocalToGlobal(ilocalx, ilocaly, ilocalz, mesh->getXProcIndex(), + mesh->getYProcIndex(), 0); + } + int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz, + const int pex, const int pey, const int pez) { + ASSERT3(ilocalx >= 0); + ASSERT3(ilocaly >= 0); + ASSERT3(ilocalz >= 0); + const int ilocal = ((ilocalx * mesh->LocalNy) + ilocaly) * mesh->LocalNz + ilocalz; + const int ret = ilocal + + mesh->LocalNx * mesh->LocalNy * mesh->LocalNz + * ((pey * nxpe + pex) * nzpe + pez); + ASSERT3(ret >= 0); + ASSERT3(ret < nxpe * nype * mesh->LocalNx * mesh->LocalNy * mesh->LocalNz); + return ret; + } + +private: + // number of procs + BoutMesh* mesh; + int nxpe; + int nype; + int nzpe{1}; + static int divToNeg(const int n, const int d) { + return (n < 0) ? ((n - d + 1) / d) : (n / d); + } +}; + +#if BOUT_HAS_PETSC + +PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) + : XZInterpolation(y_offset, mesh), + petsclib(&Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]), h00_x(localmesh), h01_x(localmesh), + h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), + h10_z(localmesh), h11_z(localmesh) { + + // Index arrays contain guard cells in order to get subscripts right + i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); + k_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); + + // Initialise in order to avoid 'uninitialized value' errors from Valgrind when using + // guard-cell values + k_corner = -1; + + // Allocate Field3D members + h00_x.allocate(); + h01_x.allocate(); + h10_x.allocate(); + h11_x.allocate(); + h00_z.allocate(); + h01_z.allocate(); + h10_z.allocate(); + h11_z.allocate(); + + newWeights.reserve(16); + for (int w = 0; w < 16; ++w) { + newWeights.emplace_back(localmesh); + newWeights[w].allocate(); + } + + const int grid_size = localmesh->LocalNx * localmesh->LocalNy * localmesh->LocalNz; + const int proc_size = grid_size * localmesh->getNXPE() * localmesh->getNYPE(); + MatCreateAIJ(MPI_COMM_WORLD, grid_size, grid_size, proc_size, proc_size, 16, nullptr, + 16, nullptr, &petscWeights); +} + +PetscXZHermiteSpline::~PetscXZHermiteSpline() { + if (!isInit) { + return; + } + MatDestroy(&petscWeights); + VecDestroy(&rhs); + VecDestroy(&result); +} + +void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, + const std::string& region) { + + const int ny = localmesh->LocalNy; + const int nz = localmesh->LocalNz; + const int xend = (localmesh->xend - localmesh->xstart + 1) * localmesh->getNXPE() + + localmesh->xstart - 1; + + IndConverter conv{localmesh}; + + const auto actual_region = getRegion(region); + + BOUT_FOR(i, actual_region) { + const int x = i.x(); + const int y = i.y(); + const int z = i.z(); + + // The integer part of xt_prime, zt_prime are the indices of the cell + // containing the field line end-point + int i_corn = static_cast(floor(delta_x(x, y, z))); + k_corner(x, y, z) = static_cast(floor(delta_z(x, y, z))); + + // t_x, t_z are the normalised coordinates \in [0,1) within the cell + // calculated by taking the remainder of the floating point index + BoutReal t_x = delta_x(x, y, z) - static_cast(i_corn); + BoutReal t_z = delta_z(x, y, z) - static_cast(k_corner(x, y, z)); + + // NOTE: A (small) hack to avoid one-sided differences + if (i_corn >= xend) { + i_corn = xend - 1; + t_x = 1.0; + } + if (i_corn < localmesh->xstart) { + i_corn = localmesh->xstart; + t_x = 0.0; + } + + k_corner(x, y, z) = ((k_corner(x, y, z) % nz) + nz) % nz; + + // Check that t_x and t_z are in range + if ((t_x < 0.0) || (t_x > 1.0)) { + throw BoutException( + "t_x={:e} out of range at ({:d},{:d},{:d}) (delta_x={:e}, i_corn={:d})", t_x, x, + y, z, delta_x(x, y, z), i_corn); + } + + if ((t_z < 0.0) || (t_z > 1.0)) { + throw BoutException( + "t_z={:e} out of range at ({:d},{:d},{:d}) (delta_z={:e}, k_corner={:d})", t_z, + x, y, z, delta_z(x, y, z), k_corner(x, y, z)); + } + + i_corner[i] = SpecificInd( + (((i_corn * ny) + (y + y_offset)) * nz + k_corner(x, y, z)), ny, nz); + + h00_x[i] = (2. * t_x * t_x * t_x) - (3. * t_x * t_x) + 1.; + h00_z[i] = (2. * t_z * t_z * t_z) - (3. * t_z * t_z) + 1.; + + h01_x[i] = (-2. * t_x * t_x * t_x) + (3. * t_x * t_x); + h01_z[i] = (-2. * t_z * t_z * t_z) + (3. * t_z * t_z); + + h10_x[i] = t_x * (1. - t_x) * (1. - t_x); + h10_z[i] = t_z * (1. - t_z) * (1. - t_z); + + h11_x[i] = (t_x * t_x * t_x) - (t_x * t_x); + h11_z[i] = (t_z * t_z * t_z) - (t_z * t_z); + + for (int w = 0; w < 16; ++w) { + newWeights[w][i] = 0; + } + // The distribution of our weights: + // 0 4 8 12 + // 1 5 9 13 + // 2 6 10 14 + // 3 7 11 15 + // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); + + // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; + newWeights[5][i] += h00_x[i] * h00_z[i]; + newWeights[9][i] += h01_x[i] * h00_z[i]; + newWeights[9][i] += h10_x[i] * h00_z[i] / 2; + newWeights[1][i] -= h10_x[i] * h00_z[i] / 2; + newWeights[13][i] += h11_x[i] * h00_z[i] / 2; + newWeights[5][i] -= h11_x[i] * h00_z[i] / 2; + + // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + + // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h01_z[i]; + newWeights[10][i] += h01_x[i] * h01_z[i]; + newWeights[10][i] += h10_x[i] * h01_z[i] / 2; + newWeights[2][i] -= h10_x[i] * h01_z[i] / 2; + newWeights[14][i] += h11_x[i] * h01_z[i] / 2; + newWeights[6][i] -= h11_x[i] * h01_z[i] / 2; + + // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + + // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; + newWeights[6][i] += h00_x[i] * h10_z[i] / 2; + newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; + newWeights[10][i] += h01_x[i] * h10_z[i] / 2; + newWeights[8][i] -= h01_x[i] * h10_z[i] / 2; + newWeights[10][i] += h10_x[i] * h10_z[i] / 4; + newWeights[8][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[2][i] -= h10_x[i] * h10_z[i] / 4; + newWeights[0][i] += h10_x[i] * h10_z[i] / 4; + newWeights[14][i] += h11_x[i] * h10_z[i] / 4; + newWeights[12][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[6][i] -= h11_x[i] * h10_z[i] / 4; + newWeights[4][i] += h11_x[i] * h10_z[i] / 4; + + // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + + // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + newWeights[7][i] += h00_x[i] * h11_z[i] / 2; + newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; + newWeights[11][i] += h01_x[i] * h11_z[i] / 2; + newWeights[9][i] -= h01_x[i] * h11_z[i] / 2; + newWeights[11][i] += h10_x[i] * h11_z[i] / 4; + newWeights[9][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[3][i] -= h10_x[i] * h11_z[i] / 4; + newWeights[1][i] += h10_x[i] * h11_z[i] / 4; + newWeights[15][i] += h11_x[i] * h11_z[i] / 4; + newWeights[13][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[7][i] -= h11_x[i] * h11_z[i] / 4; + newWeights[5][i] += h11_x[i] * h11_z[i] / 4; + + std::array idxn = {conv.fromLocalToGlobal(x, y + y_offset, z)}; + for (int j = 0; j < 4; ++j) { + std::array idxm{}; + std::array vals{}; + for (std::size_t k = 0; k < 4; ++k) { + idxm[k] = conv.fromMeshToGlobal(i_corn - 1 + j, y + y_offset, + k_corner(x, y, z) - 1 + k); + vals[k] = newWeights[j * 4 + k][i]; + } + MatSetValues(petscWeights, 1, idxn.data(), 4, idxm.data(), vals.data(), INSERT_VALUES); + } + } + + MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); + if (!isInit) { + MatCreateVecs(petscWeights, &rhs, &result); + } + isInit = true; +} + +void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, + const BoutMask& mask, const std::string& region) { + setMask(mask); + calcWeights(delta_x, delta_z, region); +} + +/*! + * Return position and weight of points needed to approximate the function value at the + * point that the field line through (i,j,k) meets the (j+1)-plane. For the case where + * only the z-direction is not aligned to grid points, the approximation is: f(i,j+1,k*) = + * h00_z * f(i,j+1,k) + h01_z * f(i,j+1,k+1) + * + h10_z * dfdz(i,j+1,k) + h11_z * dfdz(i,j+1,k+1) + * = h00_z * f(i,j+1,k) + h01_z * f(i,j+1,k+1) + * + h10_z * ( f(i,j+1,k+1) - f(i,j+1,k-1) ) / 2 + * + h11_z * ( f(i,j+1,k+2) - f(i,j+1,k) ) / 2 + * for k* a point between k and k+1. Therefore, this function returns + * position weight + * (i, j+1, k-1) - h10_z / 2 + * (i, j+1, k) h00_z - h11_z / 2 + * (i, j+1, k+1) h01_z + h10_z / 2 + * (i, j+1, k+2) h11_z / 2 + */ +std::vector +PetscXZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffset) { + const int nz = localmesh->LocalNz; + const int k_mod = k_corner(i, j, k); + const int k_mod_m1 = (k_mod > 0) ? (k_mod - 1) : (nz - 1); + const int k_mod_p1 = (k_mod == nz) ? 0 : k_mod + 1; + const int k_mod_p2 = (k_mod_p1 == nz) ? 0 : k_mod_p1 + 1; + + return {{i, j + yoffset, k_mod_m1, -0.5 * h10_z(i, j, k)}, + {i, j + yoffset, k_mod, h00_z(i, j, k) - 0.5 * h11_z(i, j, k)}, + {i, j + yoffset, k_mod_p1, h01_z(i, j, k) + 0.5 * h10_z(i, j, k)}, + {i, j + yoffset, k_mod_p2, 0.5 * h11_z(i, j, k)}}; +} + +Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, + const std::string& region) const { + + ASSERT1(f.getMesh() == localmesh); + Field3D f_interp{emptyFrom(f)}; + + BoutReal* ptr = nullptr; + const BoutReal* cptr = nullptr; + VecGetArray(rhs, &ptr); + BOUT_FOR(i, f.getRegion("RGN_NOY")) { ptr[int(i)] = f[i]; } + VecRestoreArray(rhs, &ptr); + MatMult(petscWeights, rhs, result); + VecGetArrayRead(result, &cptr); + const auto region2 = y_offset == 0 ? region : fmt::format("RGN_YPAR_{:+d}", y_offset); + BOUT_FOR(i, f.getRegion(region2)) { + f_interp[i] = cptr[int(i)]; + ASSERT2(std::isfinite(cptr[int(i)])); + } + VecRestoreArrayRead(result, &cptr); + return f_interp; +} + +Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, + const Field3D& delta_z, + const std::string& region) { + calcWeights(delta_x, delta_z, region); + return interpolate(f, region); +} + +Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, + const Field3D& delta_z, const BoutMask& mask, + const std::string& region) { + calcWeights(delta_x, delta_z, mask, region); + return interpolate(f, region); +} + +#endif diff --git a/tests/integrated/test-interpolate/data/BOUT.inp b/tests/integrated/test-interpolate/data/BOUT.inp index 101c63f3c7..804e780bbe 100644 --- a/tests/integrated/test-interpolate/data/BOUT.inp +++ b/tests/integrated/test-interpolate/data/BOUT.inp @@ -4,7 +4,6 @@ # MZ = 4 # Z size -NXPE = 1 ZMAX = 1 MXG = 2 diff --git a/tests/integrated/test-interpolate/runtest b/tests/integrated/test-interpolate/runtest index 08975cfd33..77e8d0b99b 100755 --- a/tests/integrated/test-interpolate/runtest +++ b/tests/integrated/test-interpolate/runtest @@ -6,6 +6,7 @@ from boututils.run_wrapper import build_and_log, shell, launch_safe from boutdata import collect +import boutconfig from numpy import sqrt, max, abs, mean, array, log, polyfit from sys import stdout, exit @@ -15,9 +16,6 @@ show_plot = False # List of NX values to use nxlist = [16, 32, 64, 128] -# Only testing 2D (x, z) slices, so only need one processor -nproc = 1 - # Variables to compare varlist = ["a", "b", "c"] markers = ["bo", "r^", "kx"] @@ -29,6 +27,8 @@ methods = { "bilinear": 2, } +if boutconfig.has["petsc"]: + methods["petschermitespline"] = 3 build_and_log("Interpolation test") @@ -37,7 +37,7 @@ success = True for method in methods: print("------------------------------") - print("Using {} interpolation".format(method)) + print(f"Using {method} interpolation") error_2 = {} error_inf = {} @@ -48,25 +48,20 @@ for method in methods: for nx in nxlist: dx = 1.0 / (nx) - args = ( - " mesh:nx={nx4} mesh:dx={dx} MZ={nx} xzinterpolation:type={method}".format( - nx4=nx + 4, dx=dx, nx=nx, method=method - ) - ) - - cmd = "./test_interpolate" + args + nproc = 2 if method == "petschermitespline" else 1 + cmd = f"./test_interpolate mesh:nx={nx + 4} mesh:dx={dx} MZ={nx} xzinterpolation:type={method} NXPE={nproc}" shell("rm data/BOUT.dmp.*.nc") s, out = launch_safe(cmd, nproc=nproc, pipe=True) - with open("run.log.{}.{}".format(method, nx), "w") as f: + with open(f"run.log.{method}.{nx}", "w") as f: f.write(out) # Collect output data for var in varlist: - interp = collect(var + "_interp", path="data", xguards=False, info=False) + interp = collect(f"{var}_interp", path="data", xguards=False, info=False) solution = collect( - var + "_solution", path="data", xguards=False, info=False + f"{var}_solution", path="data", xguards=False, info=False ) E = interp - solution diff --git a/tests/integrated/test-interpolate/test_interpolate.cxx b/tests/integrated/test-interpolate/test_interpolate.cxx index a090877552..d71228933f 100644 --- a/tests/integrated/test-interpolate/test_interpolate.cxx +++ b/tests/integrated/test-interpolate/test_interpolate.cxx @@ -30,88 +30,90 @@ std::shared_ptr getGeneratorFromOptions(const std::string& varna int main(int argc, char** argv) { BoutInitialise(argc, argv); - - // Random number generator - std::default_random_engine generator; - // Uniform distribution of BoutReals from 0 to 1 - std::uniform_real_distribution distribution{0.0, 1.0}; - - using bout::globals::mesh; - - FieldFactory f(mesh); - - // Set up generators and solutions for three different analtyic functions - std::string a_func; - auto a_gen = getGeneratorFromOptions("a", a_func); - Field3D a = f.create3D(a_func); - Field3D a_solution = 0.0; - Field3D a_interp = 0.0; - - std::string b_func; - auto b_gen = getGeneratorFromOptions("b", b_func); - Field3D b = f.create3D(b_func); - Field3D b_solution = 0.0; - Field3D b_interp = 0.0; - - std::string c_func; - auto c_gen = getGeneratorFromOptions("c", c_func); - Field3D c = f.create3D(c_func); - Field3D c_solution = 0.0; - Field3D c_interp = 0.0; - - // x and z displacements - Field3D deltax = 0.0; - Field3D deltaz = 0.0; - - // Bind the random number generator and distribution into a single function - auto dice = std::bind(distribution, generator); - - for (const auto& index : deltax) { - // Get some random displacements - BoutReal dx = index.x() + dice(); - BoutReal dz = index.z() + dice(); - // For the last point, put the displacement inwards - // Otherwise we try to interpolate in the guard cells, which doesn't work so well - if (index.x() >= mesh->xend) { - dx = index.x() - dice(); + { + // Random number generator + const std::default_random_engine generator; + // Uniform distribution of BoutReals from 0 to 1 + const std::uniform_real_distribution distribution{0.0, 1.0}; + + using bout::globals::mesh; + + const FieldFactory fieldfact(mesh); + + // Set up generators and solutions for three different analtyic functions + std::string a_func; + auto a_gen = getGeneratorFromOptions("a", a_func); + const Field3D a_field = fieldfact.create3D(a_func); + Field3D a_solution = 0.0; + Field3D a_interp = 0.0; + + std::string b_func; + auto b_gen = getGeneratorFromOptions("b", b_func); + const Field3D b_field = fieldfact.create3D(b_func); + Field3D b_solution = 0.0; + Field3D b_interp = 0.0; + + std::string c_func; + auto c_gen = getGeneratorFromOptions("c", c_func); + const Field3D c_field = fieldfact.create3D(c_func); + Field3D c_solution = 0.0; + Field3D c_interp = 0.0; + + // x and z displacements + Field3D deltax = 0.0; + Field3D deltaz = 0.0; + + // Bind the random number generator and distribution into a single function + auto dice = std::bind(distribution, generator); + + for (const auto& index : deltax) { + // Get some random displacements + BoutReal dx = index.x() + dice(); + BoutReal dz = index.z() + dice(); + // For the last point, put the displacement inwards + // Otherwise we try to interpolate in the guard cells, which doesn't work so well + if (index.x() >= mesh->xend && mesh->getNXPE() - 1 == mesh->getXProcIndex()) { + dx = index.x() - dice(); + } + deltax[index] = dx; + deltaz[index] = dz; + // Get the global indices + bout::generator::Context pos{index, CELL_CENTRE, deltax.getMesh(), 0.0}; + pos.set("x", mesh->GlobalX(dx), "z", + TWOPI * static_cast(dz) / static_cast(mesh->LocalNz)); + // Generate the analytic solution at the displacements + a_solution[index] = a_gen->generate(pos); + b_solution[index] = b_gen->generate(pos); + c_solution[index] = c_gen->generate(pos); } - deltax[index] = dx; - deltaz[index] = dz; - // Get the global indices - bout::generator::Context pos{index, CELL_CENTRE, deltax.getMesh(), 0.0}; - pos.set("x", mesh->GlobalX(dx), "z", - TWOPI * static_cast(dz) / static_cast(mesh->LocalNz)); - // Generate the analytic solution at the displacements - a_solution[index] = a_gen->generate(pos); - b_solution[index] = b_gen->generate(pos); - c_solution[index] = c_gen->generate(pos); - } - // Create the interpolation object from the input options - auto interp = XZInterpolationFactory::getInstance().create(); + deltax += (mesh->LocalNx - mesh->xstart * 2) * mesh->getXProcIndex(); + // Create the interpolation object from the input options + auto interp = XZInterpolationFactory::getInstance().create(); - // Interpolate the analytic functions at the displacements - a_interp = interp->interpolate(a, deltax, deltaz); - b_interp = interp->interpolate(b, deltax, deltaz); - c_interp = interp->interpolate(c, deltax, deltaz); + // Interpolate the analytic functions at the displacements + a_interp = interp->interpolate(a_field, deltax, deltaz); + b_interp = interp->interpolate(b_field, deltax, deltaz); + c_interp = interp->interpolate(c_field, deltax, deltaz); - Options dump; + Options dump; - dump["a"] = a; - dump["a_interp"] = a_interp; - dump["a_solution"] = a_solution; + dump["a"] = a_field; + dump["a_interp"] = a_interp; + dump["a_solution"] = a_solution; - dump["b"] = b; - dump["b_interp"] = b_interp; - dump["b_solution"] = b_solution; + dump["b"] = b_field; + dump["b_interp"] = b_interp; + dump["b_solution"] = b_solution; - dump["c"] = c; - dump["c_interp"] = c_interp; - dump["c_solution"] = c_solution; + dump["c"] = c_field; + dump["c_interp"] = c_interp; + dump["c_solution"] = c_solution; - bout::writeDefaultOutputFile(dump); + bout::writeDefaultOutputFile(dump); - bout::checkForUnusedOptions(); + bout::checkForUnusedOptions(); + } BoutFinalise(); return 0; From 183c7a2da9440eb69a2bf21732f7de10872d196f Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 16:52:15 +0100 Subject: [PATCH 07/13] WIP: Switch `PetscXZHermiteSpline` to use `PetscMatrix` for weights Currently gives pretty bad results, not sure why --- include/bout/interpolation_xz.hxx | 10 +- .../interpolation/petsc_hermite_spline_xz.cxx | 91 ++++++++++++++++++- 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index ddb891ad19..ab3b94aeef 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -23,8 +23,12 @@ #ifndef BOUT_INTERP_XZ_H #define BOUT_INTERP_XZ_H +#include "bout/bout_types.hxx" #include "bout/build_defines.hxx" +#include "bout/field3d.hxx" +#include "bout/globalindexer.hxx" #include "bout/mask.hxx" +#include "bout/petsc_interface.hxx" #include "bout/petsclib.hxx" class Options; @@ -257,13 +261,15 @@ public: class PetscXZHermiteSpline : public XZInterpolation { PetscLib petsclib; bool isInit{false}; + IndexerPtr indexer; + PetscMatrix weights; Mat petscWeights{nullptr}; Vec rhs{nullptr}; Vec result{nullptr}; std::vector newWeights; - Tensor> i_corner; // x-index of bottom-left grid point - Tensor k_corner; // z-index of bottom-left grid point + Tensor i_corner; // x-index of bottom-left grid point + Tensor k_corner; // z-index of bottom-left grid point // Basis functions for cubic Hermite spline interpolation // see http://en.wikipedia.org/wiki/Cubic_Hermite_spline diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx index df5ad46261..6c57c0686b 100644 --- a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -21,9 +21,13 @@ **************************************************************************/ #include "../impls/bout/boutmesh.hxx" +#include "bout/field3d.hxx" +#include "bout/globalindexer.hxx" #include "bout/globals.hxx" #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" +#include "bout/operatorstencil.hxx" +#include "bout/petsc_interface.hxx" #include #include @@ -102,11 +106,47 @@ class IndConverter { #if BOUT_HAS_PETSC +namespace { +// A 4x4 stencil in x-z, covering (x-1, z-1) to (x+2, z+2) +auto XZstencil(const Mesh& localmesh) { + using ind_type = Field3D::ind_type; + OperatorStencil stencil; + IndexOffset zero; + + // Stencil pattern + // x --> + // 0 4 8 12 + // z 1 5 9 13 + // | 2 6 10 14 + // V 3 7 11 15 + + const std::vector> offsets = { + zero.xm().zm(), zero.xm(), zero.xm().zp(), zero.xm().zp().zp(), + zero.zm(), zero, zero.zp(), zero.zp().zp(), + zero.xp().zm(), zero.xp(), zero.xp().zp(), zero.xp().zp().zp(), + zero.xp().xp().zm(), zero.xp().xp(), zero.xp().xp().zp(), zero.xp().xp().zp().zp(), + }; + + stencil.add( + [&localmesh](ind_type ind) -> bool { + return (localmesh.xstart <= ind.x() and ind.x() <= localmesh.xend) + and (localmesh.ystart <= ind.y() and ind.y() <= localmesh.yend) + and (localmesh.zstart <= ind.z() and ind.z() <= localmesh.zend); + }, + offsets); + stencil.add([]([[maybe_unused]] auto ind) -> bool { return true; }, {zero}); + + return stencil; +} +} + PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), - petsclib(&Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]), h00_x(localmesh), h01_x(localmesh), - h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), - h10_z(localmesh), h11_z(localmesh) { + petsclib(&Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]), + indexer(std::make_shared>(localmesh, XZstencil(*localmesh))), + weights(indexer), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), + h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), + h11_z(localmesh) { // Index arrays contain guard cells in order to get subscripts right i_corner.reallocate(localmesh->LocalNx, localmesh->LocalNy, localmesh->LocalNz); @@ -159,6 +199,14 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de const auto actual_region = getRegion(region); + // PETSc doesn't like mixing `INSERT` and `ADD`, so first make sure matrix is zeroed + // TODO: PetscMatrix could buffer and insert all at once + weights.partialAssemble(); + BOUT_FOR(i, actual_region) { + weights(i, i) = 0.0; + } + weights.partialAssemble(); + BOUT_FOR(i, actual_region) { const int x = i.x(); const int y = i.y(); @@ -225,6 +273,11 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; + weights(i, i) += (h00_x[i] * h00_z[i]) - (h11_x[i] * h00_z[i] / 2); + weights(i, i.xp()) += (h01_x[i] * h00_z[i]) + (h10_x[i] * h00_z[i] / 2); + weights(i, i.xm()) += -(h10_x[i] * h00_z[i] / 2); + weights(i, i.xp(2)) += (h11_x[i] * h00_z[i] / 2); + newWeights[5][i] += h00_x[i] * h00_z[i]; newWeights[9][i] += h01_x[i] * h00_z[i]; newWeights[9][i] += h10_x[i] * h00_z[i] / 2; @@ -234,6 +287,11 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + weights(i, i.zp()) += (h00_x[i] * h01_z[i]) - (h11_x[i] * h01_z[i] / 2); + weights(i, i.zp().xp()) += (h01_x[i] * h01_z[i]) + (h10_x[i] * h01_z[i] / 2); + weights(i, i.zp().xm()) += -(h10_x[i] * h01_z[i] / 2); + weights(i, i.zp().xp(2)) += h11_x[i] * h01_z[i] / 2; + newWeights[6][i] += h00_x[i] * h01_z[i]; newWeights[10][i] += h01_x[i] * h01_z[i]; newWeights[10][i] += h10_x[i] * h01_z[i] / 2; @@ -243,6 +301,15 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; + weights(i, i.zp()) += (h00_x[i] * h10_z[i] / 2) - (h11_x[i] * h10_z[i] / 4); + weights(i, i.zm()) += -(h00_x[i] * h10_z[i] / 2) + (h11_x[i] * h10_z[i] / 4); + weights(i, i.xp().zp()) += (h01_x[i] * h10_z[i] / 2) + (h10_x[i] * h10_z[i] / 4); + weights(i, i.xp().zm()) += -(h01_x[i] * h10_z[i] / 2) - (h10_x[i] * h10_z[i] / 4); + weights(i, i.xm().zp()) += -(h10_x[i] * h10_z[i] / 4); + weights(i, i.xm().zm()) += (h10_x[i] * h10_z[i] / 4); + weights(i, i.xp(2).zp()) += h11_x[i] * h10_z[i] / 4; + weights(i, i.xp(2).zm()) += -(h11_x[i] * h10_z[i] / 4); + newWeights[6][i] += h00_x[i] * h10_z[i] / 2; newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; newWeights[10][i] += h01_x[i] * h10_z[i] / 2; @@ -258,6 +325,16 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + weights(i, i.zp(2)) += (h00_x[i] * h11_z[i] / 2) - (h11_x[i] * h11_z[i] / 4); + weights(i, i) += -(h00_x[i] * h11_z[i] / 2) + (h11_x[i] * h11_z[i] / 4); + weights(i, i.xp().zp(2)) += (h01_x[i] * h11_z[i] / 2) + (h10_x[i] * h11_z[i] / 4); + weights(i, i.xp()) += -(h01_x[i] * h11_z[i] / 2) - (h10_x[i] * h11_z[i] / 4); + weights(i, i.xm().zp(2)) += -(h10_x[i] * h11_z[i] / 4); + weights(i, i.xm()) += (h10_x[i] * h11_z[i] / 4); + weights(i, i.xp(2).zp(2)) += h11_x[i] * h11_z[i] / 4; + weights(i, i.xp(2)) += -(h11_x[i] * h11_z[i] / 4); + weights(i, i.zp(2)) += -(h11_x[i] * h11_z[i] / 4); + newWeights[7][i] += h00_x[i] * h11_z[i] / 2; newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; newWeights[11][i] += h01_x[i] * h11_z[i] / 2; @@ -284,6 +361,8 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de } } + weights.assemble(); + MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); if (!isInit) { @@ -347,6 +426,12 @@ Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, ASSERT2(std::isfinite(cptr[int(i)])); } VecRestoreArrayRead(result, &cptr); + + PetscVector f_petsc(f, indexer); + PetscVector f_result = weights * f_petsc; + auto new_result = f_result.toField(); + // Using the Petsc interface classes gives pretty shoddy results + // return new_result; return f_interp; } From 39bef1ca44c4e8ad6daaf726d0b08de6f823222b Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 24 May 2024 16:58:26 +0100 Subject: [PATCH 08/13] Fix headers when building without PETSc --- src/mesh/interpolation/petsc_hermite_spline_xz.cxx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx index 6c57c0686b..4cd675980d 100644 --- a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -20,7 +20,12 @@ * **************************************************************************/ +#include "bout/build_defines.hxx" + +#if BOUT_HAS_PETSC + #include "../impls/bout/boutmesh.hxx" +#include "bout/boutexception.hxx" #include "bout/field3d.hxx" #include "bout/globalindexer.hxx" #include "bout/globals.hxx" @@ -31,7 +36,6 @@ #include #include -#include #include class IndConverter { @@ -104,8 +108,6 @@ class IndConverter { } }; -#if BOUT_HAS_PETSC - namespace { // A 4x4 stencil in x-z, covering (x-1, z-1) to (x+2, z+2) auto XZstencil(const Mesh& localmesh) { From 654f6057c8c5aada7b9d5d5cb276b6d4997937f9 Mon Sep 17 00:00:00 2001 From: ZedThree Date: Fri, 24 May 2024 15:59:19 +0000 Subject: [PATCH 09/13] Apply clang-format changes --- src/mesh/interpolation/petsc_hermite_spline_xz.cxx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx index 4cd675980d..e6695b9c19 100644 --- a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -140,7 +140,7 @@ auto XZstencil(const Mesh& localmesh) { return stencil; } -} +} // namespace PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), @@ -204,9 +204,7 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // PETSc doesn't like mixing `INSERT` and `ADD`, so first make sure matrix is zeroed // TODO: PetscMatrix could buffer and insert all at once weights.partialAssemble(); - BOUT_FOR(i, actual_region) { - weights(i, i) = 0.0; - } + BOUT_FOR(i, actual_region) { weights(i, i) = 0.0; } weights.partialAssemble(); BOUT_FOR(i, actual_region) { @@ -359,7 +357,8 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de k_corner(x, y, z) - 1 + k); vals[k] = newWeights[j * 4 + k][i]; } - MatSetValues(petscWeights, 1, idxn.data(), 4, idxm.data(), vals.data(), INSERT_VALUES); + MatSetValues(petscWeights, 1, idxn.data(), 4, idxm.data(), vals.data(), + INSERT_VALUES); } } From c453dee87d4a41ee5edcc152a336ad6a0ef6ffca Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 30 May 2024 16:47:19 +0100 Subject: [PATCH 10/13] Add a simplified `GlobalIndexer` needed for `PetscXZHermiteSpline` --- include/bout/globalindexer.hxx | 87 +++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/include/bout/globalindexer.hxx b/include/bout/globalindexer.hxx index e756ead3b2..3da3da8606 100644 --- a/include/bout/globalindexer.hxx +++ b/include/bout/globalindexer.hxx @@ -17,21 +17,25 @@ using IndexerPtr = std::shared_ptr>; using InterpolationWeights = std::vector; -/*! - * An object which accepts index objects produced by iterating over - * fields and returns a global index. This index can be used when - * constructing PETSc arrays. Guard regions used for communication - * between processes will have the indices of the part of the interior - * region they are mirroring. Boundaries required by the stencil will - * have unique indices. If no stencil is provided then only the - * interior region will be assigned global indices. By default, the - * indexer is fully initialised so that guard cells are communicated - * (ensuring they hold the appropriate global indices). However, by - * passing `autoInitialise = false` this behaviour can be turned off - * and the user can then manually call the `initialise()` method - * later. This can be useful for mocking/faking the class when - * testing. - */ +/// Convert local indices on current process to consistent, unique +/// global flat indices across all processes. +/// +/// Note that this can only convert indices both local to *and owned by* the +/// current process! +/// +/// An object which accepts index objects produced by iterating over +/// fields and returns a global index. This index can be used when +/// constructing PETSc arrays. Guard regions used for communication +/// between processes will have the indices of the part of the interior +/// region they are mirroring. Boundaries required by the stencil will +/// have unique indices. If no stencil is provided then only the +/// interior region will be assigned global indices. By default, the +/// indexer is fully initialised so that guard cells are communicated +/// (ensuring they hold the appropriate global indices). However, by +/// passing `autoInitialise = false` this behaviour can be turned off +/// and the user can then manually call the `initialise()` method +/// later. This can be useful for mocking/faking the class when +/// testing. template class GlobalIndexer { public: @@ -98,7 +102,7 @@ public: } } - virtual ~GlobalIndexer() {} + virtual ~GlobalIndexer() = default; /// Call this immediately after construction when running unit tests. void initialiseTest() {} @@ -123,7 +127,8 @@ public: /// Convert the local index object to a global index which can be /// used in PETSc vectors and matrices. - int getGlobal(const ind_type& ind) const { + virtual int getGlobal(const ind_type& ind) const { + ASSERT1(isLocal(ind)); return static_cast(std::round(indices[ind])); } @@ -224,7 +229,8 @@ private: Array int_indices; /// The first and last global index on this processor (inclusive in /// both cases) - int globalStart, globalEnd; + int globalStart = 0; + int globalEnd = 0; /// Stencil for which this indexer has been configured OperatorStencil stencils; @@ -237,4 +243,49 @@ private: mutable std::vector numDiagonal, numOffDiagonal; }; +namespace bout { +/// A simplified local-to-global index converter: includes all boundaries, but +/// not guard cells, and assumes a simple rectangular division of processes. +/// +/// The conversion indices are not precalculated or cached, so this may be less +/// efficient than `GlobalIndexer`. However, it can convert "local" indices +/// outside of the range owned by the current process, for example a local index +/// with `x = -1` which would be owned by the process to the left. +class SimpleGlobalIndexer : public GlobalIndexer { + // Stencil to ensure x-boundaries (to a depth of 2) are included in the base + // indexer + // + // Actually, I'm not 100% sure what this does, but this appears to the minimum + // necessary to get things to work + static auto XZstencil(const Mesh& mesh) { + using ind_type = Field3D::ind_type; + const IndexOffset zero; + OperatorStencil stencil; + + stencil.add( + [&mesh](ind_type ind) -> bool { + return (mesh.xstart <= ind.x() and ind.x() <= mesh.xend) + and (mesh.ystart <= ind.y() and ind.y() <= mesh.yend) + and (mesh.zstart <= ind.z() and ind.z() <= mesh.zend); + }, + {zero.xm(2), zero.xp(2), zero.zm(2), zero.zp(2)}); + + return stencil; + } + +public: + SimpleGlobalIndexer() = default; + explicit SimpleGlobalIndexer(Mesh* localmesh, bool autoInitialise = true) + : GlobalIndexer(localmesh, XZstencil(*localmesh), autoInitialise) {} + + int getGlobal(const ind_type& ind) const override { + const auto& mesh = *getMesh(); + return ((mesh.getGlobalXIndex(ind.x()) * mesh.GlobalNy) + + mesh.getGlobalYIndex(ind.y())) + * mesh.GlobalNz + + mesh.getGlobalZIndex(ind.z()); + } +}; +} // namespace bout + #endif // BOUT_GLOBALINDEXER_H From 05e4d8cb00a410a1313ddaa47d9b3cf551064942 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 30 May 2024 17:03:12 +0100 Subject: [PATCH 11/13] Use `PetscMatrix` interface for `PetscXZHermiteSpline` --- include/bout/interpolation_xz.hxx | 8 +- .../interpolation/petsc_hermite_spline_xz.cxx | 313 +++--------------- 2 files changed, 47 insertions(+), 274 deletions(-) diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index ab3b94aeef..0ea5efa7ac 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -23,7 +23,6 @@ #ifndef BOUT_INTERP_XZ_H #define BOUT_INTERP_XZ_H -#include "bout/bout_types.hxx" #include "bout/build_defines.hxx" #include "bout/field3d.hxx" #include "bout/globalindexer.hxx" @@ -260,13 +259,8 @@ public: #if BOUT_HAS_PETSC class PetscXZHermiteSpline : public XZInterpolation { PetscLib petsclib; - bool isInit{false}; IndexerPtr indexer; PetscMatrix weights; - Mat petscWeights{nullptr}; - Vec rhs{nullptr}; - Vec result{nullptr}; - std::vector newWeights; Tensor i_corner; // x-index of bottom-left grid point Tensor k_corner; // z-index of bottom-left grid point @@ -296,7 +290,7 @@ public: PetscXZHermiteSpline(PetscXZHermiteSpline&& other) = default; PetscXZHermiteSpline& operator=(const PetscXZHermiteSpline& other) = delete; PetscXZHermiteSpline& operator=(PetscXZHermiteSpline&& other) = delete; - ~PetscXZHermiteSpline() override; + ~PetscXZHermiteSpline() override = default; void calcWeights(const Field3D& delta_x, const Field3D& delta_z, const std::string& region = "RGN_NOBNDRY") override; diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx index e6695b9c19..2cfef68b43 100644 --- a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -24,7 +24,6 @@ #if BOUT_HAS_PETSC -#include "../impls/bout/boutmesh.hxx" #include "bout/boutexception.hxx" #include "bout/field3d.hxx" #include "bout/globalindexer.hxx" @@ -34,119 +33,14 @@ #include "bout/operatorstencil.hxx" #include "bout/petsc_interface.hxx" -#include -#include #include -class IndConverter { -public: - IndConverter(Mesh* mesh) - : mesh(dynamic_cast(mesh)), nxpe(mesh->getNXPE()), - nype(mesh->getNYPE()) {} - // ix and iy are global indices - // iy is local - int fromMeshToGlobal(int ix, int iy, int iz) { - const int xstart = mesh->xstart; - const int lnx = mesh->LocalNx - xstart * 2; - // x-proc-id - int pex = divToNeg(ix - xstart, lnx); - if (pex < 0) { - pex = 0; - } - if (pex >= nxpe) { - pex = nxpe - 1; - } - const int zstart = 0; - const int lnz = mesh->LocalNz - zstart * 2; - // z-proc-id - // pez only for wrapping around ; later needs similar treatment than pey - const int pez = divToNeg(iz - zstart, lnz); - // y proc-id - y is already local - const int ystart = mesh->ystart; - const int lny = mesh->LocalNy - ystart * 2; - const int pey_offset = divToNeg(iy - ystart, lny); - int pey = pey_offset + mesh->getYProcIndex(); - while (pey < 0) { - pey += nype; - } - while (pey >= nype) { - pey -= nype; - } - ASSERT2(pex >= 0); - ASSERT2(pex < nxpe); - ASSERT2(pey >= 0); - ASSERT2(pey < nype); - return fromLocalToGlobal(ix - pex * lnx, iy - pey_offset * lny, iz - pez * lnz, pex, - pey, 0); - } - int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz) { - return fromLocalToGlobal(ilocalx, ilocaly, ilocalz, mesh->getXProcIndex(), - mesh->getYProcIndex(), 0); - } - int fromLocalToGlobal(const int ilocalx, const int ilocaly, const int ilocalz, - const int pex, const int pey, const int pez) { - ASSERT3(ilocalx >= 0); - ASSERT3(ilocaly >= 0); - ASSERT3(ilocalz >= 0); - const int ilocal = ((ilocalx * mesh->LocalNy) + ilocaly) * mesh->LocalNz + ilocalz; - const int ret = ilocal - + mesh->LocalNx * mesh->LocalNy * mesh->LocalNz - * ((pey * nxpe + pex) * nzpe + pez); - ASSERT3(ret >= 0); - ASSERT3(ret < nxpe * nype * mesh->LocalNx * mesh->LocalNy * mesh->LocalNz); - return ret; - } - -private: - // number of procs - BoutMesh* mesh; - int nxpe; - int nype; - int nzpe{1}; - static int divToNeg(const int n, const int d) { - return (n < 0) ? ((n - d + 1) / d) : (n / d); - } -}; - -namespace { -// A 4x4 stencil in x-z, covering (x-1, z-1) to (x+2, z+2) -auto XZstencil(const Mesh& localmesh) { - using ind_type = Field3D::ind_type; - OperatorStencil stencil; - IndexOffset zero; - - // Stencil pattern - // x --> - // 0 4 8 12 - // z 1 5 9 13 - // | 2 6 10 14 - // V 3 7 11 15 - - const std::vector> offsets = { - zero.xm().zm(), zero.xm(), zero.xm().zp(), zero.xm().zp().zp(), - zero.zm(), zero, zero.zp(), zero.zp().zp(), - zero.xp().zm(), zero.xp(), zero.xp().zp(), zero.xp().zp().zp(), - zero.xp().xp().zm(), zero.xp().xp(), zero.xp().xp().zp(), zero.xp().xp().zp().zp(), - }; - - stencil.add( - [&localmesh](ind_type ind) -> bool { - return (localmesh.xstart <= ind.x() and ind.x() <= localmesh.xend) - and (localmesh.ystart <= ind.y() and ind.y() <= localmesh.yend) - and (localmesh.zstart <= ind.z() and ind.z() <= localmesh.zend); - }, - offsets); - stencil.add([]([[maybe_unused]] auto ind) -> bool { return true; }, {zero}); - - return stencil; -} -} // namespace PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), petsclib(&Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]), - indexer(std::make_shared>(localmesh, XZstencil(*localmesh))), - weights(indexer), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), + indexer(std::make_shared(localmesh)), + weights(indexer, false), h00_x(localmesh), h01_x(localmesh), h10_x(localmesh), h11_x(localmesh), h00_z(localmesh), h01_z(localmesh), h10_z(localmesh), h11_z(localmesh) { @@ -168,25 +62,9 @@ PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) h10_z.allocate(); h11_z.allocate(); - newWeights.reserve(16); - for (int w = 0; w < 16; ++w) { - newWeights.emplace_back(localmesh); - newWeights[w].allocate(); - } - - const int grid_size = localmesh->LocalNx * localmesh->LocalNy * localmesh->LocalNz; - const int proc_size = grid_size * localmesh->getNXPE() * localmesh->getNYPE(); - MatCreateAIJ(MPI_COMM_WORLD, grid_size, grid_size, proc_size, proc_size, 16, nullptr, - 16, nullptr, &petscWeights); -} - -PetscXZHermiteSpline::~PetscXZHermiteSpline() { - if (!isInit) { - return; - } - MatDestroy(&petscWeights); - VecDestroy(&rhs); - VecDestroy(&result); + // The stencil has 16 elements, so maximum number of columns in any + // given row (whether on local or remote process) is 16 + MatMPIAIJSetPreallocation(*weights.get(), 16, nullptr, 16, nullptr); } void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, @@ -197,16 +75,9 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de const int xend = (localmesh->xend - localmesh->xstart + 1) * localmesh->getNXPE() + localmesh->xstart - 1; - IndConverter conv{localmesh}; - + // TODO: work out why using `getRegion(region)` directly causes + // issues on more than one core const auto actual_region = getRegion(region); - - // PETSc doesn't like mixing `INSERT` and `ADD`, so first make sure matrix is zeroed - // TODO: PetscMatrix could buffer and insert all at once - weights.partialAssemble(); - BOUT_FOR(i, actual_region) { weights(i, i) = 0.0; } - weights.partialAssemble(); - BOUT_FOR(i, actual_region) { const int x = i.x(); const int y = i.y(); @@ -220,7 +91,7 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // t_x, t_z are the normalised coordinates \in [0,1) within the cell // calculated by taking the remainder of the floating point index BoutReal t_x = delta_x(x, y, z) - static_cast(i_corn); - BoutReal t_z = delta_z(x, y, z) - static_cast(k_corner(x, y, z)); + const BoutReal t_z = delta_z(x, y, z) - static_cast(k_corner(x, y, z)); // NOTE: A (small) hack to avoid one-sided differences if (i_corn >= xend) { @@ -247,9 +118,6 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de x, y, z, delta_z(x, y, z), k_corner(x, y, z)); } - i_corner[i] = SpecificInd( - (((i_corn * ny) + (y + y_offset)) * nz + k_corner(x, y, z)), ny, nz); - h00_x[i] = (2. * t_x * t_x * t_x) - (3. * t_x * t_x) + 1.; h00_z[i] = (2. * t_z * t_z * t_z) - (3. * t_z * t_z) + 1.; @@ -262,114 +130,41 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de h11_x[i] = (t_x * t_x * t_x) - (t_x * t_x); h11_z[i] = (t_z * t_z * t_z) - (t_z * t_z); - for (int w = 0; w < 16; ++w) { - newWeights[w][i] = 0; - } - // The distribution of our weights: - // 0 4 8 12 - // 1 5 9 13 - // 2 6 10 14 - // 3 7 11 15 - // e.g. 1 == ic.xm(); 4 == ic.zm(); 5 == ic; 7 == ic.zp(2); + // Need to convert from global indices to local + const auto i_c = Field3D::ind_type( + (((localmesh->getLocalXIndex(i_corn) * ny) + (y + y_offset)) * nz + + localmesh->getLocalZIndex(k_corner(x, y, z))), + ny, nz); // f[ic] * h00_x[i] + f[icxp] * h01_x[i] + fx[ic] * h10_x[i] + fx[icxp] * h11_x[i]; - weights(i, i) += (h00_x[i] * h00_z[i]) - (h11_x[i] * h00_z[i] / 2); - weights(i, i.xp()) += (h01_x[i] * h00_z[i]) + (h10_x[i] * h00_z[i] / 2); - weights(i, i.xm()) += -(h10_x[i] * h00_z[i] / 2); - weights(i, i.xp(2)) += (h11_x[i] * h00_z[i] / 2); - - newWeights[5][i] += h00_x[i] * h00_z[i]; - newWeights[9][i] += h01_x[i] * h00_z[i]; - newWeights[9][i] += h10_x[i] * h00_z[i] / 2; - newWeights[1][i] -= h10_x[i] * h00_z[i] / 2; - newWeights[13][i] += h11_x[i] * h00_z[i] / 2; - newWeights[5][i] -= h11_x[i] * h00_z[i] / 2; - - // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + - // fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; - weights(i, i.zp()) += (h00_x[i] * h01_z[i]) - (h11_x[i] * h01_z[i] / 2); - weights(i, i.zp().xp()) += (h01_x[i] * h01_z[i]) + (h10_x[i] * h01_z[i] / 2); - weights(i, i.zp().xm()) += -(h10_x[i] * h01_z[i] / 2); - weights(i, i.zp().xp(2)) += h11_x[i] * h01_z[i] / 2; - - newWeights[6][i] += h00_x[i] * h01_z[i]; - newWeights[10][i] += h01_x[i] * h01_z[i]; - newWeights[10][i] += h10_x[i] * h01_z[i] / 2; - newWeights[2][i] -= h10_x[i] * h01_z[i] / 2; - newWeights[14][i] += h11_x[i] * h01_z[i] / 2; - newWeights[6][i] -= h11_x[i] * h01_z[i] / 2; - - // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + - // fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; - weights(i, i.zp()) += (h00_x[i] * h10_z[i] / 2) - (h11_x[i] * h10_z[i] / 4); - weights(i, i.zm()) += -(h00_x[i] * h10_z[i] / 2) + (h11_x[i] * h10_z[i] / 4); - weights(i, i.xp().zp()) += (h01_x[i] * h10_z[i] / 2) + (h10_x[i] * h10_z[i] / 4); - weights(i, i.xp().zm()) += -(h01_x[i] * h10_z[i] / 2) - (h10_x[i] * h10_z[i] / 4); - weights(i, i.xm().zp()) += -(h10_x[i] * h10_z[i] / 4); - weights(i, i.xm().zm()) += (h10_x[i] * h10_z[i] / 4); - weights(i, i.xp(2).zp()) += h11_x[i] * h10_z[i] / 4; - weights(i, i.xp(2).zm()) += -(h11_x[i] * h10_z[i] / 4); - - newWeights[6][i] += h00_x[i] * h10_z[i] / 2; - newWeights[4][i] -= h00_x[i] * h10_z[i] / 2; - newWeights[10][i] += h01_x[i] * h10_z[i] / 2; - newWeights[8][i] -= h01_x[i] * h10_z[i] / 2; - newWeights[10][i] += h10_x[i] * h10_z[i] / 4; - newWeights[8][i] -= h10_x[i] * h10_z[i] / 4; - newWeights[2][i] -= h10_x[i] * h10_z[i] / 4; - newWeights[0][i] += h10_x[i] * h10_z[i] / 4; - newWeights[14][i] += h11_x[i] * h10_z[i] / 4; - newWeights[12][i] -= h11_x[i] * h10_z[i] / 4; - newWeights[6][i] -= h11_x[i] * h10_z[i] / 4; - newWeights[4][i] += h11_x[i] * h10_z[i] / 4; - - // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + - // fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; - weights(i, i.zp(2)) += (h00_x[i] * h11_z[i] / 2) - (h11_x[i] * h11_z[i] / 4); - weights(i, i) += -(h00_x[i] * h11_z[i] / 2) + (h11_x[i] * h11_z[i] / 4); - weights(i, i.xp().zp(2)) += (h01_x[i] * h11_z[i] / 2) + (h10_x[i] * h11_z[i] / 4); - weights(i, i.xp()) += -(h01_x[i] * h11_z[i] / 2) - (h10_x[i] * h11_z[i] / 4); - weights(i, i.xm().zp(2)) += -(h10_x[i] * h11_z[i] / 4); - weights(i, i.xm()) += (h10_x[i] * h11_z[i] / 4); - weights(i, i.xp(2).zp(2)) += h11_x[i] * h11_z[i] / 4; - weights(i, i.xp(2)) += -(h11_x[i] * h11_z[i] / 4); - weights(i, i.zp(2)) += -(h11_x[i] * h11_z[i] / 4); - - newWeights[7][i] += h00_x[i] * h11_z[i] / 2; - newWeights[5][i] -= h00_x[i] * h11_z[i] / 2; - newWeights[11][i] += h01_x[i] * h11_z[i] / 2; - newWeights[9][i] -= h01_x[i] * h11_z[i] / 2; - newWeights[11][i] += h10_x[i] * h11_z[i] / 4; - newWeights[9][i] -= h10_x[i] * h11_z[i] / 4; - newWeights[3][i] -= h10_x[i] * h11_z[i] / 4; - newWeights[1][i] += h10_x[i] * h11_z[i] / 4; - newWeights[15][i] += h11_x[i] * h11_z[i] / 4; - newWeights[13][i] -= h11_x[i] * h11_z[i] / 4; - newWeights[7][i] -= h11_x[i] * h11_z[i] / 4; - newWeights[5][i] += h11_x[i] * h11_z[i] / 4; - - std::array idxn = {conv.fromLocalToGlobal(x, y + y_offset, z)}; - for (int j = 0; j < 4; ++j) { - std::array idxm{}; - std::array vals{}; - for (std::size_t k = 0; k < 4; ++k) { - idxm[k] = conv.fromMeshToGlobal(i_corn - 1 + j, y + y_offset, - k_corner(x, y, z) - 1 + k); - vals[k] = newWeights[j * 4 + k][i]; - } - MatSetValues(petscWeights, 1, idxn.data(), 4, idxm.data(), vals.data(), - INSERT_VALUES); - } + // f[iczp] * h00_x[i] + f[icxpzp] * h01_x[i] + fx[iczp] * h10_x[i] + fx[icxpzp] * h11_x[i]; + // fz[ic] * h00_x[i] + fz[icxp] * h01_x[i] + fxz[ic] * h10_x[i]+ fxz[icxp] * h11_x[i]; + // fz[iczp] * h00_x[i] + fz[icxpzp] * h01_x[i] + fxz[iczp] * h10_x[i] + fxz[icxpzp] * h11_x[i]; + + // Quite a few duplicated terms, could pre-calculate + weights(i, i_c.xm()) = (h10_x[i] * h11_z[i] / 4) - (h10_x[i] * h00_z[i] / 2); + weights(i, i_c.xm().zm()) = (h10_x[i] * h10_z[i] / 4); + weights(i, i_c.xm().zp()) = -(h10_x[i] * h01_z[i] / 2) - (h10_x[i] * h10_z[i] / 4); + weights(i, i_c.xm().zp(2)) = -(h10_x[i] * h11_z[i] / 4); + weights(i, i_c.xp()) = (h01_x[i] * h00_z[i]) + (h10_x[i] * h00_z[i] / 2) + - (h01_x[i] * h11_z[i] / 2) - (h10_x[i] * h11_z[i] / 4); + weights(i, i_c.xp().zm()) = -(h01_x[i] * h10_z[i] / 2) - (h10_x[i] * h10_z[i] / 4); + weights(i, i_c.xp().zp()) = (h01_x[i] * h01_z[i]) + (h01_x[i] * h10_z[i] / 2) + + (h10_x[i] * h01_z[i] / 2) + (h10_x[i] * h10_z[i] / 4); + weights(i, i_c.xp().zp(2)) = (h01_x[i] * h11_z[i] / 2) + (h10_x[i] * h11_z[i] / 4); + weights(i, i_c.xp(2)) = (h11_x[i] * h00_z[i] / 2) - (h11_x[i] * h11_z[i] / 4); + weights(i, i_c.xp(2).zm()) = -(h11_x[i] * h10_z[i] / 4); + weights(i, i_c.xp(2).zp()) = (h11_x[i] * h01_z[i] / 2) + (h11_x[i] * h10_z[i] / 4); + weights(i, i_c.xp(2).zp(2)) = (h11_x[i] * h11_z[i] / 4); + weights(i, i_c) = (h00_x[i] * h00_z[i]) + (h11_x[i] * h11_z[i] / 4) + - (h00_x[i] * h11_z[i] / 2) - (h11_x[i] * h00_z[i] / 2); + weights(i, i_c.zm()) = (h11_x[i] * h10_z[i] / 4) - (h00_x[i] * h10_z[i] / 2); + weights(i, i_c.zp()) = (h00_x[i] * h01_z[i]) + (h00_x[i] * h10_z[i] / 2) + - (h11_x[i] * h01_z[i] / 2) - (h11_x[i] * h10_z[i] / 4); + weights(i, i_c.zp(2)) = (h00_x[i] * h11_z[i] / 2) - (h11_x[i] * h11_z[i] / 4); } weights.assemble(); - - MatAssemblyBegin(petscWeights, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(petscWeights, MAT_FINAL_ASSEMBLY); - if (!isInit) { - MatCreateVecs(petscWeights, &rhs, &result); - } - isInit = true; } void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& delta_z, @@ -408,32 +203,16 @@ PetscXZHermiteSpline::getWeightsForYApproximation(int i, int j, int k, int yoffs {i, j + yoffset, k_mod_p2, 0.5 * h11_z(i, j, k)}}; } -Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, - const std::string& region) const { +Field3D +PetscXZHermiteSpline::interpolate(const Field3D& f, + [[maybe_unused]] const std::string& region) const { ASSERT1(f.getMesh() == localmesh); - Field3D f_interp{emptyFrom(f)}; - - BoutReal* ptr = nullptr; - const BoutReal* cptr = nullptr; - VecGetArray(rhs, &ptr); - BOUT_FOR(i, f.getRegion("RGN_NOY")) { ptr[int(i)] = f[i]; } - VecRestoreArray(rhs, &ptr); - MatMult(petscWeights, rhs, result); - VecGetArrayRead(result, &cptr); - const auto region2 = y_offset == 0 ? region : fmt::format("RGN_YPAR_{:+d}", y_offset); - BOUT_FOR(i, f.getRegion(region2)) { - f_interp[i] = cptr[int(i)]; - ASSERT2(std::isfinite(cptr[int(i)])); - } - VecRestoreArrayRead(result, &cptr); - - PetscVector f_petsc(f, indexer); - PetscVector f_result = weights * f_petsc; - auto new_result = f_result.toField(); - // Using the Petsc interface classes gives pretty shoddy results - // return new_result; - return f_interp; + const PetscVector f_petsc(f, indexer); + const PetscVector f_result = weights * f_petsc; + // TODO: we should only consider the passed-in region + // const auto region2 = y_offset == 0 ? region : fmt::format("RGN_YPAR_{:+d}", y_offset); + return f_result.toField(); } Field3D PetscXZHermiteSpline::interpolate(const Field3D& f, const Field3D& delta_x, From bf90b1d5b009ad6336a4f7cb7a7f063aaf55304a Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Thu, 30 May 2024 17:53:59 +0100 Subject: [PATCH 12/13] Fix a bunch of clang-tidy warnings --- src/mesh/interpolation/petsc_hermite_spline_xz.cxx | 14 +++++++++++--- .../test-interpolate/test_interpolate.cxx | 14 +++++++++----- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx index 2cfef68b43..7f5323a39d 100644 --- a/src/mesh/interpolation/petsc_hermite_spline_xz.cxx +++ b/src/mesh/interpolation/petsc_hermite_spline_xz.cxx @@ -24,6 +24,8 @@ #if BOUT_HAS_PETSC +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" #include "bout/boutexception.hxx" #include "bout/field3d.hxx" #include "bout/globalindexer.hxx" @@ -31,11 +33,16 @@ #include "bout/index_derivs_interface.hxx" #include "bout/interpolation_xz.hxx" #include "bout/operatorstencil.hxx" +#include "bout/paralleltransform.hxx" #include "bout/petsc_interface.hxx" +#include "bout/petsclib.hxx" +#include "bout/region.hxx" +#include +#include +#include #include - PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) : XZInterpolation(y_offset, mesh), petsclib(&Options::root()["mesh:paralleltransform:xzinterpolation:hermitespline"]), @@ -64,6 +71,7 @@ PetscXZHermiteSpline::PetscXZHermiteSpline(int y_offset, Mesh* mesh) // The stencil has 16 elements, so maximum number of columns in any // given row (whether on local or remote process) is 16 + // NOLINTNEXTLINE(misc-include-cleaner) MatMPIAIJSetPreallocation(*weights.get(), 16, nullptr, 16, nullptr); } @@ -85,8 +93,8 @@ void PetscXZHermiteSpline::calcWeights(const Field3D& delta_x, const Field3D& de // The integer part of xt_prime, zt_prime are the indices of the cell // containing the field line end-point - int i_corn = static_cast(floor(delta_x(x, y, z))); - k_corner(x, y, z) = static_cast(floor(delta_z(x, y, z))); + int i_corn = static_cast(std::floor(delta_x(x, y, z))); + k_corner(x, y, z) = static_cast(std::floor(delta_z(x, y, z))); // t_x, t_z are the normalised coordinates \in [0,1) within the cell // calculated by taking the remainder of the floating point index diff --git a/tests/integrated/test-interpolate/test_interpolate.cxx b/tests/integrated/test-interpolate/test_interpolate.cxx index d71228933f..30b0899316 100644 --- a/tests/integrated/test-interpolate/test_interpolate.cxx +++ b/tests/integrated/test-interpolate/test_interpolate.cxx @@ -11,14 +11,19 @@ #include #include "bout/bout.hxx" +#include "bout/bout_types.hxx" +#include "bout/boutexception.hxx" #include "bout/constants.hxx" +#include "bout/field3d.hxx" #include "bout/field_factory.hxx" +#include "bout/globals.hxx" #include "bout/interpolation_xz.hxx" +#include "bout/options.hxx" +#include "bout/options_io.hxx" #include "bout/sys/generator_context.hxx" /// Get a FieldGenerator from the options for a variable -std::shared_ptr getGeneratorFromOptions(const std::string& varname, - std::string& func) { +auto getGeneratorFromOptions(const std::string& varname, std::string& func) { Options* options = Options::getRoot()->getSection(varname); options->get("solution", func, "0.0"); @@ -69,7 +74,7 @@ int main(int argc, char** argv) { for (const auto& index : deltax) { // Get some random displacements BoutReal dx = index.x() + dice(); - BoutReal dz = index.z() + dice(); + const BoutReal dz = index.z() + dice(); // For the last point, put the displacement inwards // Otherwise we try to interpolate in the guard cells, which doesn't work so well if (index.x() >= mesh->xend && mesh->getNXPE() - 1 == mesh->getXProcIndex()) { @@ -79,8 +84,7 @@ int main(int argc, char** argv) { deltaz[index] = dz; // Get the global indices bout::generator::Context pos{index, CELL_CENTRE, deltax.getMesh(), 0.0}; - pos.set("x", mesh->GlobalX(dx), "z", - TWOPI * static_cast(dz) / static_cast(mesh->LocalNz)); + pos.set("x", mesh->GlobalX(dx), "z", TWOPI * dz / mesh->LocalNz); // Generate the analytic solution at the displacements a_solution[index] = a_gen->generate(pos); b_solution[index] = b_gen->generate(pos); From b7f5b94cb44a822d58381c32b47ac7665b0776e9 Mon Sep 17 00:00:00 2001 From: Peter Hill Date: Fri, 31 May 2024 08:58:17 +0100 Subject: [PATCH 13/13] Remove recursive check Although `getGlobal` is only valid for local indices, unfortunately `isLocal` calls `getGlobal`, so we can't actually check! --- include/bout/globalindexer.hxx | 1 - 1 file changed, 1 deletion(-) diff --git a/include/bout/globalindexer.hxx b/include/bout/globalindexer.hxx index 3da3da8606..76c60e1cb6 100644 --- a/include/bout/globalindexer.hxx +++ b/include/bout/globalindexer.hxx @@ -128,7 +128,6 @@ public: /// Convert the local index object to a global index which can be /// used in PETSc vectors and matrices. virtual int getGlobal(const ind_type& ind) const { - ASSERT1(isLocal(ind)); return static_cast(std::round(indices[ind])); }